In [None]:
# Install chromium, its driver, and selenium.
!apt update
!apt install chromium-chromedriver
!pip install selenium

[33m0% [Working][0m            Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]
Get:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Get:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  Packages [1,105 kB]
Get:4 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Hit:5 http://archive.ubuntu.com/ubuntu jammy InRelease
Ign:6 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Get:7 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:8 https://r2u.stat.illinois.edu/ubuntu jammy Release [5,713 B]
Hit:9 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Get:10 https://r2u.stat.illinois.edu/ubuntu jammy Release.gpg [793 B]
Hit:11 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:12 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Get:13 http://security.ubuntu.com/ubuntu

In [1]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
from datetime import datetime, timedelta
from concurrent.futures import ThreadPoolExecutor, as_completed
import time

start_time = time.time()




url = "https://www.mse.mk/mk/stats/symbolhistory/MPT/?FromDate=01.01.2023"
response = requests.get(url)
raw_html = response.text
soup = BeautifulSoup(raw_html, "html.parser")
select = soup.find("select", {"id": "Code"})

if select:
    option_values = [
        option['value'] for option in select.find_all('option')
        if not re.search(r'\d', option['value'])
    ]
else:
    print("No <select> element found with id 'Code'")

data = {
    "Issuer": [],
    "Date": [],
    "Open": [],
    "High": [],
    "Low": [],
    "Close": [],
    "Change": [],
    "Volume": [],
    "Turnover": [],
    "Market Cap": []
}

def fetch_data_for_issuer_year(issuer, from_date, to_date):
    base_url = "https://www.mse.mk/mk/stats/symbolhistory/"
    url = f"{base_url}{issuer}/?FromDate={from_date}&ToDate={to_date}"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    tabela = soup.find("tbody")

    temp_data = {
        "Issuer": [],
        "Date": [],
        "Open": [],
        "High": [],
        "Low": [],
        "Close": [],
        "Change": [],
        "Volume": [],
        "Turnover": [],
        "Market Cap": []
    }

    if tabela:
        rows = tabela.find_all("tr")
        for row in rows:
            cells = row.find_all("td")
            if len(cells) == 9:
                temp_data["Issuer"].append(issuer)
                temp_data["Date"].append(cells[0].get_text(strip=True))
                temp_data["Open"].append(cells[1].get_text(strip=True))
                temp_data["High"].append(cells[2].get_text(strip=True))
                temp_data["Low"].append(cells[3].get_text(strip=True))
                temp_data["Close"].append(cells[4].get_text(strip=True))
                temp_data["Change"].append(cells[5].get_text(strip=True))
                temp_data["Volume"].append(cells[6].get_text(strip=True))
                temp_data["Turnover"].append(cells[7].get_text(strip=True))
                temp_data["Market Cap"].append(cells[8].get_text(strip=True))

        for key in data.keys():
            data[key].extend(temp_data[key])

def fetch_all_data_parallel():
    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = []
        for option in option_values:
            for year in range(2014, 2025):
                from_date = f"01.01.{year}"
                to_date = f"31.12.{year}"
                if year == 2024:
                    to_date = f"01.11.{year}"
                futures.append(executor.submit(fetch_data_for_issuer_year, option, from_date, to_date))

        for future in as_completed(futures):
            future.result()

def save_data_to_csv():
    df = pd.DataFrame(data)
    df['Date'] = pd.to_datetime(df['Date'], format='%d.%m.%Y')

    columns_to_format = ["Open", "High", "Low", "Close", "Change", "Turnover", "Market Cap"]
    for column in columns_to_format:
        df[column] = df[column].apply(format_mk_price)

    df['Date'] = df['Date'].dt.strftime('%d.%m.%Y')
    df.to_csv('stock_data.csv', index=False, encoding='utf-8')
    print("Податоците се успешно запишани во 'stock_data.csv' со македонски формат на цените.")

def format_mk_price(value):
    try:
        if ',' in value:
            return value
        number = int(value.replace('.', '').replace(',', ''))
        return f"{number:,.2f}".replace(',', 'X').replace('.', ',').replace('X', '.')
    except ValueError:
        return value

if __name__ == "__main__":
    fetch_all_data_parallel()
    save_data_to_csv()

    df = pd.read_csv('stock_data.csv', encoding='utf-8')
    df['Date'] = pd.to_datetime(df['Date'], format='%d.%m.%Y')
    latest_dates = df.groupby('Issuer')['Date'].max().reset_index()
    yesterday = datetime.now() - timedelta(days=1)
    yesterday_str = yesterday.strftime('%d.%m.%Y')

    new_data = {
        "Issuer": [],
        "Date": [],
        "Open": [],
        "High": [],
        "Low": [],
        "Close": [],
        "Change": [],
        "Volume": [],
        "Turnover": [],
        "Market Cap": []
    }

    def fetch_data_for_date_range(issuer, from_date, to_date):
        base_url = "https://www.mse.mk/mk/stats/symbolhistory/"
        url = f"{base_url}{issuer}/?FromDate={from_date}&ToDate={to_date}"
        response = requests.get(url)
        soup = BeautifulSoup(response.text, "html.parser")
        tabela = soup.find("tbody")

        if tabela:
            rows = tabela.find_all("tr")
            for row in rows:
                cells = row.find_all("td")
                if len(cells) == 9:
                    new_data["Issuer"].append(issuer)
                    new_data["Date"].append(cells[0].get_text(strip=True))
                    new_data["Open"].append(cells[1].get_text(strip=True))
                    new_data["High"].append(cells[2].get_text(strip=True))
                    new_data["Low"].append(cells[3].get_text(strip=True))
                    new_data["Close"].append(cells[4].get_text(strip=True))
                    new_data["Change"].append(cells[5].get_text(strip=True))
                    new_data["Volume"].append(cells[6].get_text(strip=True))
                    new_data["Turnover"].append(cells[7].get_text(strip=True))
                    new_data["Market Cap"].append(cells[8].get_text(strip=True))

    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = []
        for _, row in latest_dates.iterrows():
            issuer = row['Issuer']
            last_date = row['Date']
            if last_date < yesterday:
                from_date_str = last_date.strftime('%d.%m.%Y')
                to_date_str = yesterday_str
                futures.append(executor.submit(fetch_data_for_date_range, issuer, from_date_str, to_date_str))

        for future in as_completed(futures):
            future.result()

    new_data_df = pd.DataFrame(new_data)
    new_data_df['Date'] = pd.to_datetime(new_data_df['Date'], format='%d.%m.%Y')

    df = pd.concat([df, new_data_df])
    df['Date'] = df['Date'].dt.strftime('%d.%m.%Y')
    df.to_csv('stock_data.csv', index=False, encoding='utf-8')

    print("Податоците се успешно ажурирани со нови вредности до вчерашниот ден.")

    end_time = time.time()
    execution_time = end_time - start_time

    print(f"Време на извршување: {execution_time} секунди")

Податоците се успешно запишани во 'stock_data.csv' со македонски формат на цените.
Податоците се успешно ажурирани со нови вредности до вчерашниот ден.
Време на извршување: 559.5185868740082 секунди


# New Section