In [2]:
from datetime import datetime, timedelta
import os
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from concurrent.futures import ThreadPoolExecutor, as_completed

# FILTER 1
def get_codes():
    url = "https://www.mse.mk/en/stats/symbolhistory/ALK"
    with requests.Session() as session:
        response = session.get(url)
    soup = BeautifulSoup(response.content, "html.parser")
    dropdown = soup.find("select", id="Code")
    if not dropdown:
        return []
    return [
      option.text.strip()
      for option in dropdown.find_all("option")
      if not any(char.isdigit() for char in option.text) and not option.text.strip().startswith(('E'))
    ]
# FILTER 2
def get_last_update(code):
    path = f"{code}.csv"
    try:
        df = pd.read_csv(path)
        return pd.to_datetime(df['Date']).max()
    except (FileNotFoundError, pd.errors.EmptyDataError):
        return None

def fetch_code(session, code, start_date, end_date):
    url = (
        f"https://www.mse.mk/en/stats/symbolhistory/{code}"
        f"?FromDate={start_date.strftime('%m/%d/%Y')}"
        f"&ToDate={end_date.strftime('%m/%d/%Y')}"
    )
    response = session.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    tbody = soup.select_one('tbody')
    if not tbody:
        return []
    return [[cell.get_text(strip=True) for cell in row.find_all('td')] for row in tbody.find_all('tr')]

def update_code(session, code):
    current_date = datetime.now()
    last_update = get_last_update(code)
    all_data = []

    if last_update:
      start_date = (last_update + timedelta(days=1))
    else:
       start_date = current_date - timedelta(days=3650)

    while start_date <= current_date:
        year_end = datetime(start_date.year, 12, 31)
        end_date = min(year_end, current_date)
        data = fetch_code(session, code, start_date, end_date)
        all_data.extend(data)
        start_date = end_date + timedelta(days=1)

    if all_data:
        save_to_csv(code, all_data)

def save_to_csv(code, data):
    columns = ['Date', 'LastTradePrice', 'Max', 'Min', 'Avg. Price', '%chg.', 'Volume', 'Turnover in BEST', 'TotalTurnover']
    df = pd.DataFrame(data, columns=columns)

    file_path = f"{code}.csv"
    df.to_csv(file_path, mode='a', header=not os.path.exists(file_path), index=False)

if __name__ == "__main__":
    codes = get_codes()
    with requests.Session() as session:
        with ThreadPoolExecutor(max_workers=5) as executor:
            futures = {executor.submit(update_code, session, code): code for code in codes}
            for future in as_completed(futures):
                code = futures[future]
                try:
                    future.result()
                    print(f"{code} update complete.")
                except Exception as e:
                    print(f"Error updating {code}: {e}")


ADIN update complete.
ALK update complete.
AMEH update complete.
AMBR update complete.
ALKB update complete.
APTK update complete.
AUMK update complete.
BANA update complete.
BIM update complete.
BLTU update complete.
ATPP update complete.
CDHV update complete.
BIKF update complete.
CKB update complete.
CBNG update complete.
CKBKO update complete.
DEBA update complete.
CEVI update complete.
BGOR update complete.
FAKM update complete.
FKTL update complete.
FROT update complete.
DIMI update complete.
FUBT update complete.
GDKM update complete.
FERS update complete.
GECK update complete.
GECT update complete.
GRDN update complete.
GRSN update complete.
GIMS update complete.
GRNT update complete.
GALE update complete.
GRZD update complete.
GTRG update complete.
GTC update complete.
INB update complete.
INHO update complete.
INOV update complete.
INPR update complete.
INTP update complete.
IJUG update complete.
JAKO update complete.
KARO update complete.
JUSK update complete.
KKST update co