In [3]:
pip install selenium




In [5]:
pip install webdriver-manager

Note: you may need to restart the kernel to use updated packages.


In [3]:
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select, WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import pandas as pd
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from datetime import datetime, timedelta

def get_issuers(driver):
    driver.get('https://www.mse.mk/mk/stats/symbolhistory/REPL')
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    options = soup.select(".form-control option")
    issuer_codes = [option.text for option in options if option.text.isalpha()]
    return issuer_codes

def check_last_date(issuer_code, data_file="stock_data1.csv"):
    try:
        data = pd.read_csv(data_file, parse_dates=['Date'], dayfirst=True)  # Explicitly set dayfirst=True
        data['Date'] = pd.to_datetime(data['Date'], dayfirst=True).dt.date 
        issuer_data = data[data['Issuer_Code'] == issuer_code]
        if not issuer_data.empty:
            last_date = issuer_data['Date'].max()
            return last_date + timedelta(days=1)  # Next date for data retrieval
    except FileNotFoundError:
        pass
    return datetime.now().date() - timedelta(days=365*10)  # If no data, go back 10 years

def save_data_to_csv(new_data, data_file="stock_data1.csv"):
    try:
        existing_data = pd.read_csv(data_file, parse_dates=['Date'])
        existing_data['Date'] = pd.to_datetime(existing_data['Date']).dt.date
        combined_data = pd.concat([existing_data, pd.DataFrame(new_data)], ignore_index=True)
    except FileNotFoundError:
        combined_data = pd.DataFrame(new_data)
    
    combined_data.drop_duplicates(subset=["Issuer_Code", "Date"], keep="last", inplace=True)
    combined_data.to_csv(data_file, index=False, date_format="%d.%m.%Y")  # Save dates without time

def fill_missing_data(driver, issuer_codes, data_file="stock_data1.csv"):
    all_data = []

    for code in issuer_codes:
        start_date = check_last_date(code, data_file)
        end_date = datetime.now().date()

        for year in range(start_date.year, end_date.year + 1):
            driver.get('https://www.mse.mk/mk/stats/symbolhistory/REPL')
            wait = WebDriverWait(driver, 5)

            try:
                date_inputFrom = wait.until(EC.presence_of_element_located((By.ID, "FromDate")))
                date_inputTo = wait.until(EC.presence_of_element_located((By.ID, "ToDate")))
                date_inputFrom.clear()
                date_inputTo.clear()
                
                if year == start_date.year:
                    date_inputFrom.send_keys(start_date.strftime("%d.%m.%Y"))
                else:
                    date_inputFrom.send_keys(f"01.01.{year}")
                
                if year == end_date.year:
                    date_inputTo.send_keys(end_date.strftime("%d.%m.%Y"))
                else:
                    date_inputTo.send_keys(f"31.12.{year}")

                select_elementCode = wait.until(EC.presence_of_element_located((By.ID, "Code")))
                select = Select(select_elementCode)
                select.select_by_value(code)
                print(f"Processing code: {code} for year {year}")

                button = driver.find_element(By.CLASS_NAME, "btn-primary-sm")
                button.click()

                table = WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.ID, "resultsTable")))
                soup = BeautifulSoup(driver.page_source, 'html.parser')
                table_rows = soup.select("#resultsTable tbody tr")

                for row in table_rows:
                    cols = row.select("td")
                    if len(cols) >= 9:
                        data_dict = {
                            "Issuer_Code": code,
                            "Date": datetime.strptime(cols[0].text.strip(), "%d.%m.%Y").date(),
                            "Last_Price": cols[1].text.strip(),
                            "Max_Price": cols[2].text.strip(),
                            "Min_Price": cols[3].text.strip(),
                            "Average_Price": cols[4].text.strip(),
                            "Percent": cols[5].text.strip(),
                            "Quantity": cols[6].text.strip(),
                            "Traffic": cols[7].text.strip(),
                            "Sum_Traffic": cols[8].text.strip()
                        }
                        all_data.append(data_dict)

            except (TimeoutException, NoSuchElementException):
                print(f"No data for code {code} in year {year}")
                continue

    save_data_to_csv(all_data, data_file)
    print("Data saved to", data_file)

chrome_options = Options()
chrome_options.add_argument("--headless")

service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=chrome_options)

start_time = time.time()  # Start timer

try:
    issuer_codes = get_issuers(driver)
    fill_missing_data(driver, issuer_codes)
finally:
    driver.quit()

execution_time = time.time() - start_time  # End timer
print(f"Execution Time: {execution_time:.2f} seconds")
print("Data collection complete.")


  data = pd.read_csv(data_file, parse_dates=['Date'], dayfirst=True)  # Explicitly set dayfirst=True


Processing code: ADIN for year 2024
No data for code ADIN in year 2024


  data = pd.read_csv(data_file, parse_dates=['Date'], dayfirst=True)  # Explicitly set dayfirst=True


Processing code: ALK for year 2024
No data for code ALK in year 2024


  data = pd.read_csv(data_file, parse_dates=['Date'], dayfirst=True)  # Explicitly set dayfirst=True


Processing code: ALKB for year 2024
No data for code ALKB in year 2024


  data = pd.read_csv(data_file, parse_dates=['Date'], dayfirst=True)  # Explicitly set dayfirst=True


Processing code: AMEH for year 2024
No data for code AMEH in year 2024


  data = pd.read_csv(data_file, parse_dates=['Date'], dayfirst=True)  # Explicitly set dayfirst=True


Processing code: APTK for year 2024
No data for code APTK in year 2024


  data = pd.read_csv(data_file, parse_dates=['Date'], dayfirst=True)  # Explicitly set dayfirst=True


Processing code: ATPP for year 2024
No data for code ATPP in year 2024


  data = pd.read_csv(data_file, parse_dates=['Date'], dayfirst=True)  # Explicitly set dayfirst=True


Processing code: AUMK for year 2024
No data for code AUMK in year 2024


  data = pd.read_csv(data_file, parse_dates=['Date'], dayfirst=True)  # Explicitly set dayfirst=True


Processing code: BANA for year 2024
No data for code BANA in year 2024


  data = pd.read_csv(data_file, parse_dates=['Date'], dayfirst=True)  # Explicitly set dayfirst=True


Processing code: BGOR for year 2024
No data for code BGOR in year 2024


  data = pd.read_csv(data_file, parse_dates=['Date'], dayfirst=True)  # Explicitly set dayfirst=True


Processing code: BIKF for year 2024
No data for code BIKF in year 2024


  data = pd.read_csv(data_file, parse_dates=['Date'], dayfirst=True)  # Explicitly set dayfirst=True


Processing code: BIM for year 2024
No data for code BIM in year 2024


  data = pd.read_csv(data_file, parse_dates=['Date'], dayfirst=True)  # Explicitly set dayfirst=True


Processing code: BLTU for year 2024
No data for code BLTU in year 2024


  data = pd.read_csv(data_file, parse_dates=['Date'], dayfirst=True)  # Explicitly set dayfirst=True


Processing code: CBNG for year 2024
No data for code CBNG in year 2024


  data = pd.read_csv(data_file, parse_dates=['Date'], dayfirst=True)  # Explicitly set dayfirst=True


Processing code: CDHV for year 2024
No data for code CDHV in year 2024


  data = pd.read_csv(data_file, parse_dates=['Date'], dayfirst=True)  # Explicitly set dayfirst=True


Processing code: CEVI for year 2024
No data for code CEVI in year 2024


  data = pd.read_csv(data_file, parse_dates=['Date'], dayfirst=True)  # Explicitly set dayfirst=True


Processing code: CKB for year 2024
No data for code CKB in year 2024


  data = pd.read_csv(data_file, parse_dates=['Date'], dayfirst=True)  # Explicitly set dayfirst=True


Processing code: CKBKO for year 2024
No data for code CKBKO in year 2024


  data = pd.read_csv(data_file, parse_dates=['Date'], dayfirst=True)  # Explicitly set dayfirst=True


Processing code: DEBA for year 2024
No data for code DEBA in year 2024


  data = pd.read_csv(data_file, parse_dates=['Date'], dayfirst=True)  # Explicitly set dayfirst=True


Processing code: DIMI for year 2024
No data for code DIMI in year 2024


  data = pd.read_csv(data_file, parse_dates=['Date'], dayfirst=True)  # Explicitly set dayfirst=True


Processing code: EDST for year 2024
No data for code EDST in year 2024


  data = pd.read_csv(data_file, parse_dates=['Date'], dayfirst=True)  # Explicitly set dayfirst=True


Processing code: ELMA for year 2024
No data for code ELMA in year 2024


  data = pd.read_csv(data_file, parse_dates=['Date'], dayfirst=True)  # Explicitly set dayfirst=True


Processing code: ELNC for year 2024
No data for code ELNC in year 2024


  data = pd.read_csv(data_file, parse_dates=['Date'], dayfirst=True)  # Explicitly set dayfirst=True


Processing code: ENER for year 2024
No data for code ENER in year 2024


  data = pd.read_csv(data_file, parse_dates=['Date'], dayfirst=True)  # Explicitly set dayfirst=True


Processing code: ENSA for year 2024
No data for code ENSA in year 2024


  data = pd.read_csv(data_file, parse_dates=['Date'], dayfirst=True)  # Explicitly set dayfirst=True


Processing code: EUHA for year 2024
No data for code EUHA in year 2024


  data = pd.read_csv(data_file, parse_dates=['Date'], dayfirst=True)  # Explicitly set dayfirst=True


Processing code: EUMK for year 2024
No data for code EUMK in year 2024


  data = pd.read_csv(data_file, parse_dates=['Date'], dayfirst=True)  # Explicitly set dayfirst=True


Processing code: EVRO for year 2024
No data for code EVRO in year 2024


PermissionError: [Errno 13] Permission denied