In [None]:
from datetime import datetime, timedelta
import os
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from concurrent.futures import ThreadPoolExecutor, as_completed

# FILTER 1
def get_codes():
    url = "https://www.mse.mk/en/stats/symbolhistory/ALK"
    with requests.Session() as session:
        response = session.get(url)
    soup = BeautifulSoup(response.content, "html.parser")
    dropdown = soup.find("select", id="Code")
    if not dropdown:
        return []
    return [
      option.text.strip()
      for option in dropdown.find_all("option")
      if not any(char.isdigit() for char in option.text) and not option.text.strip().startswith(('E'))
    ]
# FILTER 2
def get_last_update(code):
    path = f"{code}.csv"
    try:
        df = pd.read_csv(path)
        return pd.to_datetime(df['Date']).max()
    except (FileNotFoundError, pd.errors.EmptyDataError):
        return None
# FILTER 3
def fill_data(dataframe):
    numeric_cols = ['LastTradePrice', 'Max', 'Min', 'Avg. Price', '%chg.', 'Volume', 'Turnover in BEST', 'TotalTurnover']

    for col in numeric_cols:
        if col in dataframe.columns:
            dataframe[col] = dataframe[col].replace('', np.nan)  # Replace empty strings with NaN
            dataframe[col] = dataframe[col].replace({',': ''}, regex=True).astype(float)  # Remove commas and convert to float

    dataframe['Date'] = dataframe['Date'].ffill()

    for col in numeric_cols:
        if col in dataframe.columns:
            dataframe[col] = dataframe[col].fillna(dataframe[col].mean())

    for col in numeric_cols:
        if col in dataframe.columns:
            dataframe[col] = dataframe[col].apply(lambda x: f"{x:,.2f}")

def fetch_code(session, code, start_date, end_date):
    url = (
        f"https://www.mse.mk/en/stats/symbolhistory/{code}"
        f"?FromDate={start_date.strftime('%m/%d/%Y')}"
        f"&ToDate={end_date.strftime('%m/%d/%Y')}"
    )
    response = session.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    tbody = soup.select_one('tbody')
    if not tbody:
        return []
    return [[cell.get_text(strip=True) for cell in row.find_all('td')] for row in tbody.find_all('tr')]

def update_code(session, code):
    current_date = datetime.now()
    last_update = get_last_update(code)
    all_data = []

    if last_update:
      start_date = (last_update + timedelta(days=1))
    else:
       start_date = current_date - timedelta(days=3650)

    while start_date <= current_date:
        year_end = datetime(start_date.year, 12, 31)
        end_date = min(year_end, current_date)
        data = fetch_code(session, code, start_date, end_date)
        all_data.extend(data)
        start_date = end_date + timedelta(days=1)

    if all_data:
        save_to_csv(code, all_data)

def save_to_csv(code, data):
    columns = ['Date', 'LastTradePrice', 'Max', 'Min', 'Avg. Price', '%chg.', 'Volume', 'Turnover in BEST', 'TotalTurnover']
    df = pd.DataFrame(data, columns=columns)

    fill_data(df)

    file_path = f"{code}.csv"
    df.to_csv(file_path, mode='a', header=not os.path.exists(file_path), index=False)

if __name__ == "__main__":
    codes = get_codes()
    with requests.Session() as session:
        with ThreadPoolExecutor(max_workers=5) as executor:
            futures = {executor.submit(update_code, session, code): code for code in codes}
            for future in as_completed(futures):
                code = futures[future]
                try:
                    future.result()
                    print(f"{code} update complete.")
                except Exception as e:
                    print(f"Error updating {code}: {e}")


In [15]:
print(pd.read_csv('ALK.csv'))

            Date LastTradePrice        Max        Min Avg. Price  %chg.  \
0     12/30/2014       5,100.00   5,100.00   5,070.00   5,087.79   0.75   
1     12/29/2014       5,050.00   5,050.00   5,049.00   5,049.86   0.00   
2     12/26/2014       5,050.00   5,050.00   5,050.00   5,050.00   0.42   
3     12/25/2014       5,030.00   5,030.00   5,020.00   5,029.05   0.58   
4     12/24/2014       5,000.00   5,000.00   5,000.00   5,000.00  -0.49   
...          ...            ...        ...        ...        ...    ...   
2444    1/9/2024      18,450.00  18,450.00  18,400.00  18,415.77   0.09   
2445    1/5/2024      18,450.00  18,450.00  18,301.00  18,399.39   0.00   
2446    1/4/2024      18,399.00  18,400.00  18,399.00  18,399.08   0.44   
2447    1/3/2024      18,300.00  18,400.00  18,290.00  18,318.52   0.19   
2448    1/2/2024      18,151.00  18,300.00  18,151.00  18,283.19   0.71   

      Volume Turnover in BEST TotalTurnover  
0      68.00       345,970.00    345,970.00  
1     8