<a href="https://colab.research.google.com/github/bekjarska221153/Dizajn_i_arhitektura_na_softver_domasna/blob/main/Code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install selenium
import requests
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
import csv
def collect_issuers():
    response = requests.get(BASE_URL, headers=HEADERS)
    soup = BeautifulSoup(response.text, 'html.parser')
    dropdown = soup.find("select", {"id": "Code"})
    issuers = []

    for option in dropdown.find_all("option"):
        issuer_code = option.get("value")
        if issuer_code and not re.search(r'\d', issuer_code):
            issuers.append(issuer_code)

    with open("issuers.csv", "w", newline="") as file:
        writer = csv.writer(file)
        writer.writerow(["Issuer"])
        for issuer in issuers:
            writer.writerow([issuer])

    print("Issuer codes saved to issuers.csv")
    return issuers


In [None]:

def load_issuers(filename='issuers.csv'):
    # Read issuers from the CSV file
    issuers = []
    with open(filename, mode='r', newline='', encoding='utf-8') as file:
        reader = csv.reader(file)
        next(reader)  # Skip the header if there's one
        for row in reader:
            issuers.append(row[0].strip())
    return issuers

def collect_issuer_data(issuer_code):
    # Define the date range for data collection
    end_date = datetime.now()
    start_date = end_date - timedelta(days=365 * 10)  # Start date 10 years ago

    # Base URL and headers
    base_url = "https://www.mse.mk/mk/stats/symbolhistory/kmb"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36"
    }

    # List to store data for the issuer
    issuer_data = []

    while start_date < end_date:
        # Set the range to a maximum of 1 year
        range_end_date = min(start_date + timedelta(days=365), end_date)
        start_date_str = start_date.strftime('%d.%m.%Y')
        range_end_date_str = range_end_date.strftime('%d.%m.%Y')

        # Prepare payload for the post request
        payload = {
            'Code': issuer_code,
            'FromDate': start_date_str,
            'ToDate': range_end_date_str,
            'action': 'Прикажи'
        }

        # Send POST request to retrieve the data
        response = requests.post(base_url, headers=headers, data=payload)
        response.raise_for_status()

        # Parse the response HTML content
        soup = BeautifulSoup(response.content, 'html.parser')

        # Locate the results table and rows within it
        table = soup.find("table", {"id": "resultsTable"})
        if not table:
            print(f"No data available for {issuer_code} from {start_date_str} to {range_end_date_str}")
            start_date = range_end_date + timedelta(days=1)
            continue

        # Process each row in the table (skipping the header row)
        rows = table.find_all("tr")[1:]
        for row in rows:
            columns = row.find_all("td")
            if len(columns) >= 9:
                data = {
                    'Issuer': issuer_code,
                    'Date': columns[0].text.strip(),
                    'Last Transaction Price': columns[1].text.strip(),
                    'Max': columns[2].text.strip(),
                    'Min': columns[3].text.strip(),
                    'Average Price': columns[4].text.strip(),
                    '% Change': columns[5].text.strip(),
                    'Quantity': columns[6].text.strip(),
                    'Trading Volume (Denars)': columns[7].text.strip(),
                    'Total Volume (Denars)': columns[8].text.strip()
                }
                issuer_data.append(data)

        # Move to the next one-year range
        start_date = range_end_date + timedelta(days=1)

    return issuer_data

def main():
    issuers = load_issuers('issuers.csv')
    all_data = []

    for issuer_code in issuers:
        print(f"Collecting data for issuer: {issuer_code}")
        issuer_data = collect_issuer_data(issuer_code)
        all_data.extend(issuer_data)

    # Save all collected data to a CSV file
    df = pd.DataFrame(all_data)
    df.to_csv("all_issuers_data_last_10_years.csv", index=False, encoding='utf-8')
    print("Data collection complete. Saved to all_issuers_data_last_10_years.csv.")

# Run the main function
if __name__ == "__main__":
    main()


Collecting data for issuer: ADIN
Collecting data for issuer: ALK
Collecting data for issuer: ALKB
Collecting data for issuer: AMEH
Collecting data for issuer: APTK
Collecting data for issuer: ATPP
Collecting data for issuer: AUMK
Collecting data for issuer: BANA
Collecting data for issuer: BGOR
Collecting data for issuer: BIKF
No data available for BIKF from 19.11.2022 to 19.11.2023
Collecting data for issuer: BIM
Collecting data for issuer: BLTU
Collecting data for issuer: CBNG
No data available for CBNG from 14.11.2016 to 14.11.2017
No data available for CBNG from 15.11.2017 to 15.11.2018
Collecting data for issuer: CDHV
No data available for CDHV from 17.11.2020 to 17.11.2021
Collecting data for issuer: CEVI
Collecting data for issuer: CKB
Collecting data for issuer: CKBKO
No data available for CKBKO from 13.11.2014 to 13.11.2015
No data available for CKBKO from 14.11.2015 to 13.11.2016
No data available for CKBKO from 14.11.2016 to 14.11.2017
No data available for CKBKO from 15.11.