In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed
import time

In [3]:
def fetch_data_for_company(company, mainUrl, fromDates, toDates):
    myList = []
    for i in range(len(fromDates)):
        url = f"{mainUrl}{company}?FromDate=10.11.{fromDates[i]}&ToDate=09.11.{toDates[i]}"
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        rows = soup.select("#resultsTable > tbody > tr")

        for row in rows:
            datum = row.select_one("td:nth-child(1)").text
            cena_posledna = row.select_one("td:nth-child(2)").text
            mak = row.select_one("td:nth-child(3)").text
            minimum = row.select_one("td:nth-child(4)").text
            cena_prosecna = row.select_one("td:nth-child(5)").text
            procent_promet = row.select_one("td:nth-child(6)").text
            kolicina = row.select_one("td:nth-child(7)").text
            BEST_MKD = row.select_one("td:nth-child(8)").text
            vk_promet = row.select_one("td:nth-child(9)").text

            cena_dict = {
                "Company Name": company,
                "Date": datum,
                "Price": cena_posledna,
                "Maximum": mak,
                "Minimum": minimum,
                "Average Price": cena_prosecna,
                "Procent of turnover": procent_promet,
                "Quantity": kolicina,
                "BEST MKD": BEST_MKD,
                "Total turnover": vk_promet
            }
            myList.append(cena_dict)

    return pd.DataFrame(myList)

In [5]:
def scrape(mainUrl):
    fromDates = ['2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023']
    toDates = ['2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023', '2024']
    pomUrl = mainUrl + "ALK"
    response = requests.get(pomUrl)
    soup = BeautifulSoup(response.text, 'html.parser')
    sifri = soup.select("#Code > option")
    companies = [s.text for s in sifri if not any(char.isdigit() for char in s.text)]

    dataFrameList = []
    with ThreadPoolExecutor(max_workers=20) as executor:
        futures = {executor.submit(fetch_data_for_company, company, mainUrl, fromDates, toDates): company for company in
                   companies}
        for future in as_completed(futures):
            company = futures[future]
            try:
                dataFrameList.append(future.result())
            except Exception as e:
                print(f"Error fetching data for {company}: {e}")

    return dataFrameList

In [7]:
start_time = time.time()
data = scrape("https://www.mse.mk/mk/stats/symbolhistory/")
end_time = time.time()

In [15]:
print(f"Time taken: {end_time - start_time}")

Time taken: 1255.6618976593018
