In [1]:
import csv
import json
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
import requests
import os
import time


class StockData:
    def __init__(self, seller, date, last_transaction, max_value, min_value, average, change, volume, best_sales, all_sales):
        self.seller = seller
        self.date = date
        self.last_transaction = last_transaction
        self.max_value = max_value
        self.min_value = min_value
        self.average = average
        self.change = change
        self.volume = volume
        self.best_sales = best_sales
        self.all_sales = all_sales

    def to_dict(self):
        return {
            "seller": self.seller,
            "date": self.date,
            "last_transaction": self.last_transaction,
            "max_value": self.max_value,
            "min_value": self.min_value,
            "average": self.average,
            "change": self.change,
            "volume": self.volume,
            "best_sales": self.best_sales,
            "all_sales": self.all_sales,
        }


def save_to_csv(file_path, data, seller):
    file_exists = os.path.exists(file_path)
    with open(file_path, mode="a", newline="", encoding="utf-8") as file:
        writer = csv.DictWriter(file, fieldnames=data[0].keys())
        if not file_exists:
            writer.writeheader()
        writer.writerows(data)
    print(f"Data for {seller} saved to '{file_path}'.")


def save_to_json(file_path, data, seller):
    if os.path.exists(file_path):
        with open(file_path, mode="r", encoding="utf-8") as file:
            existing_data = json.load(file)
    else:
        existing_data = []

    existing_data.extend(data)

    with open(file_path, mode="w", encoding="utf-8") as file:
        json.dump(existing_data, file, indent=4, ensure_ascii=False)
    print(f"Data for {seller} saved to '{file_path}'.")


def read_from_csv(file_path):
    if not os.path.exists(file_path):
        return []
    with open(file_path, mode="r", encoding="utf-8") as file:
        reader = csv.DictReader(file)
        return [row for row in reader]


def scrape_page(seller, from_date, to_date):
    url = f"https://www.mse.mk/mk/stats/symbolhistory/{seller}/?FromDate={from_date}&ToDate={to_date}"
    print(f"Fetching data from URL: {url}")
    response = requests.get(url)
    response.raise_for_status()
    soup = BeautifulSoup(response.text, "html.parser")
    rows = soup.select("tbody tr")
    if not rows:
        print(f"No data found for {seller} from {from_date} to {to_date}.")
    data = []
    for row in rows:
        cells = [cell.text.strip() for cell in row.select("td")]
        if len(cells) >= 9 and cells[2]:  
            try:
                date = datetime.strptime(cells[0], "%d.%m.%Y").strftime("%d.%m.%Y")
                data.append(
                    StockData(
                        seller=seller,
                        date=date,
                        last_transaction=f"{cells[1]} ден.",
                        max_value=f"{cells[2]} ден.",
                        min_value=f"{cells[3]} ден.",
                        average=f"{cells[4]} ден.",
                        change=cells[5],
                        volume=cells[6],
                        best_sales=f"{cells[7]} ден.",
                        all_sales=f"{cells[8]} ден.",
                    ).to_dict()
                )
            except ValueError:
                print(f"Skipping invalid row: {cells}")
            except Exception as e:
                print(f"Unexpected error processing row {cells}: {e}")
    return data


def update_seller_data(file_path_csv, file_path_json, seller):
    today = datetime.now().date()
    existing_data = read_from_csv(file_path_csv)
    try:
        last_date = max(
            (datetime.strptime(row["date"], "%d.%m.%Y").date() for row in existing_data if "date" in row and row["date"]),
            default=None,
        )
    except ValueError:
        print("Error parsing existing dates in CSV.")
        last_date = None

    from_date = (last_date + timedelta(days=1)) if last_date else today - timedelta(days=365 * 10)
    new_data = []
    while from_date <= today:
        to_date = min(from_date + timedelta(days=365), today)
        print(f"Scraping {seller} from {from_date} to {to_date}...")
        try:
            scraped_data = scrape_page(seller, from_date.strftime("%Y-%m-%d"), to_date.strftime("%Y-%m-%d"))
            new_data.extend(scraped_data)
        except Exception as e:
            print(f"Error scraping data for {seller} from {from_date} to {to_date}: {e}")
        time.sleep(2)  
        from_date = to_date + timedelta(days=1)
    if new_data:
        save_to_csv(file_path_csv, new_data, seller)
        save_to_json(file_path_json, new_data, seller)
    else:
        print(f"No new data found for {seller}.")


def get_sellers():
    url = "https://www.mse.mk/mk/stats/symbolhistory/ADIN"
    response = requests.get(url)
    response.raise_for_status()
    soup = BeautifulSoup(response.text, "html.parser")
    return [
        option.text.strip()
        for option in soup.select("option")
        if option.text.strip() and not any(c.isdigit() for c in option.text.strip())
    ]


if __name__ == "__main__":
    seller_file_csv = "stock_data.csv"
    seller_file_json = "stock.json"
    try:
        sellers = get_sellers()
        print("Sellers found:", sellers)
        for seller in sellers:
            update_seller_data(seller_file_csv, seller_file_json, seller)
    except Exception as e:
        print(f"An error occurred: {e}")


Sellers found: ['ADIN', 'ALK', 'ALKB', 'AMBR', 'AMEH', 'APTK', 'ATPP', 'AUMK', 'BANA', 'BGOR', 'BIKF', 'BIM', 'BLTU', 'CBNG', 'CDHV', 'CEVI', 'CKB', 'CKBKO', 'DEBA', 'DIMI', 'EDST', 'ELMA', 'ELNC', 'ENER', 'ENSA', 'EUHA', 'EUMK', 'EVRO', 'FAKM', 'FERS', 'FKTL', 'FROT', 'FUBT', 'GALE', 'GDKM', 'GECK', 'GECT', 'GIMS', 'GRDN', 'GRNT', 'GRSN', 'GRZD', 'GTC', 'GTRG', 'IJUG', 'INB', 'INHO', 'INOV', 'INPR', 'INTP', 'JAKO', 'JULI', 'JUSK', 'KARO', 'KDFO', 'KJUBI', 'KKFI', 'KKST', 'KLST', 'KMB', 'KMPR', 'KOMU', 'KONF', 'KONZ', 'KORZ', 'KPSS', 'KULT', 'KVAS', 'LAJO', 'LHND', 'LOTO', 'LOZP', 'MAGP', 'MAKP', 'MAKS', 'MB', 'MERM', 'MKSD', 'MLKR', 'MODA', 'MPOL', 'MPT', 'MPTE', 'MTUR', 'MZHE', 'MZPU', 'NEME', 'NOSK', 'OBPP', 'OILK', 'OKTA', 'OMOS', 'OPFO', 'OPTK', 'ORAN', 'OSPO', 'OTEK', 'PELK', 'PGGV', 'PKB', 'POPK', 'PPIV', 'PROD', 'PROT', 'PTRS', 'RADE', 'REPL', 'RIMI', 'RINS', 'RZEK', 'RZIT', 'RZIZ', 'RZLE', 'RZLV', 'RZTK', 'RZUG', 'RZUS', 'SBT', 'SDOM', 'SIL', 'SKON', 'SKP', 'SLAV', 'SNBT', 'SN