In [5]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
import os
from threading import Thread


def fetch_issuers():
    # The URL that we use to fetch the issuers
    url = 'https://www.mse.mk/en/stats/symbolhistory/KMB'

    # request
    response = requests.get(url)

    # In case if the requests fails
    while response.status_code != 200:
        response = requests.get(url)

    # Parse the HTML that we got
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find the dropdown menu for issuers
    issuers_dropdown = soup.find('select', {'id': 'Code'})
    issuers = []

    # Add the issuer code to the issuers list
    for option in issuers_dropdown.find_all('option'):
        code = option.get('value')
        if code and not any(char.isdigit() for char in code):
            issuers.append(code)

    # return the issuers
    return issuers

############################################################################################################

def table(data):
    df = pd.DataFrame(data)
    df.columns = ["Date", "Last trade price", "Max", "Min", "Avg.Price", "%chg.", "Volume",
                  "Turnover in BEST in denars", "Total turnover in denars"]
    return df

###############################################################################################################

def parse_cells(row):
    translation_table = str.maketrans({',': '.', '.': ','})

    cells = row.find_all('td')

    if len(cells) < 9:
        return None

    date = cells[0].text
    last_trade_price = cells[1].text.translate(translation_table)
    max = cells[2].text.translate(translation_table)
    min = cells[3].text.translate(translation_table)
    avg_price = cells[4].text.translate(translation_table)
    chg = cells[5].text.translate(translation_table)
    volume = cells[6].text.translate(translation_table)
    turnover_in_best = cells[7].text.translate(translation_table)
    total_turnover = cells[8].text.translate(translation_table)

    result = [date, last_trade_price, max, min, avg_price, chg, volume, turnover_in_best, total_turnover]
    return result

############################################################################################################################

def parse_soup(bs):
    # table = bs.find('table', {'id': 'resultsTable'})
    #
    # rows = []
    # if table is not None:
    #     tbody = table.find('tbody')  # Get the <tbody> element
    #     if tbody:
    #         rows = tbody.find_all('tr')  # Get all the rows in the body
    # else:
    #     rows = None
    #
    # data = []
    # for row in rows:
    #
    #     parsed_row = parse_cells.parse_cells(row)
    #     data.append(parsed_row)
    #
    # return data
    table = bs.find_all('tbody')
    if len(table) == 0:
        return None
    table = table[0]
    table_rows = table.find_all('tr')
    ret_table = []

    for row in table_rows:
        ret_table.append(parse_cells(row))

    return ret_table


####################################################################################################################################

def range_in_days(date, days):
    date_from = datetime.strptime(date, '%m/%d/%Y') - timedelta(days=days)
    return date_from.strftime('%m/%d/%Y').__str__()


########################################################################################################################

def fetch_range(companies, code, start_date, end_date):
    base_url = 'https://www.mse.mk/en/stats/symbolhistory/'

    date_from = datetime.strptime(start_date, '%m/%d/%Y')
    date_to = datetime.strptime(end_date, '%m/%d/%Y')

    days = (date_to - date_from).days
    years = days // 365
    daysleft = days % 365

    start_date = range_in_days(end_date, 365)

    if years == 0:
        start_date = range_in_days(end_date, daysleft)

    stock_data = []

    for i in range(1, years + 2):
        if i == (years + 1):
            start_date = datetime.strptime(end_date, '%m/%d/%Y') - timedelta(days=daysleft)
            start_date = date_from.strftime('%m/%d/%Y').__str__()

        url = base_url + code

        form_data = {
            'FromDate': start_date,
            'ToDate': end_date,
        }

        response = requests.post(url, data=form_data,timeout=(25, 60))

        while response.status_code != 200:
            response = requests.post(url, data=form_data,timeout=(25, 60))

        soup = BeautifulSoup(response.text, 'html.parser')

        parsed = parse_soup(soup)

        if parsed is not None:
            stock_data += parsed

        end_date = start_date
        start_date = range_in_days(end_date, 365)

    #df = pd.DataFrame(stock_data)
    #print(df)
    res = table(stock_data)
    res.to_excel(f'./database/{code}.xlsx')
    companies[code] = res


#########################################################################################################################################

def fetch_data(companies, issuers):
    dataframes = dict()
    threadpool = []
    today = str(datetime.today().strftime('%m/%d/%Y'))

    for code in issuers:

        if f'{code}.xlsx' not in os.listdir('./database'):
            search_from = range_in_days(today, 365 * 10)
        else:
            curr_df = pd.read_excel(f'./database/{code}.xlsx')
            search_from = str((datetime.strptime(curr_df.Date[0], '%m/%d/%Y') + timedelta(days=1)).strftime('%m/%d/%Y'))
            yesterday = (datetime.today() - timedelta(days=1)).strftime('%m/%d/%Y')

            if today == search_from or yesterday == search_from:
                continue

            dataframes[code] = curr_df.drop(columns=['Unnamed: 0'])

        # Pass to 3rd Filter
        # fetch_range.fetch_range(companies, code, search_from, today)
        thread = Thread(target=fetch_range, args=(companies,code, search_from, today))
        thread.start()
        threadpool.append(thread)

    for thread in threadpool:
        thread.join()

    for code in issuers:
        if code in companies:
            df = pd.concat(
                [companies[code], dataframes[code] if code in dataframes.keys() else pd.DataFrame()],
                axis=0)
            df = df.reset_index().drop(columns=['index'])
            df.to_excel(f'./database/{code}.xlsx')


############################################################################################################################################


if __name__ == '__main__':
    issuers = fetch_issuers()
    # issuers = ["KMB"]
    companies = dict()
    fetch_data(companies, issuers)




Exception in thread Thread-31 (fetch_range):
Traceback (most recent call last):
  File "d:\Faculty\5th Semester\Software Design and Architecture\HomeWorks\.venv\Lib\site-packages\urllib3\connectionpool.py", line 536, in _make_request
    response = conn.getresponse()
  File "d:\Faculty\5th Semester\Software Design and Architecture\HomeWorks\.venv\Lib\site-packages\urllib3\connection.py", line 507, in getresponse
    httplib_response = super().getresponse()
  File "C:\Users\Ardit\AppData\Local\Programs\Python\Python313\Lib\http\client.py", line 1428, in getresponse
    response.begin()
    ~~~~~~~~~~~~~~^^
  File "C:\Users\Ardit\AppData\Local\Programs\Python\Python313\Lib\http\client.py", line 331, in begin
    version, status, reason = self._read_status()
                              ~~~~~~~~~~~~~~~~~^^
  File "C:\Users\Ardit\AppData\Local\Programs\Python\Python313\Lib\http\client.py", line 292, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
             