In [1]:
from pathlib import Path
import importlib
import logging
import pickle

import pandas as pd
import yaml

from base import BaseRecord

logging.basicConfig(level=logging.INFO)

with open('db.bin', mode='wb') as fp:
    pickle.dump({}, fp)

In [2]:
path = Path('configs/runway.yaml')
platform_name = path.stem
with open(path, mode='r') as fp:
    platform_config = yaml.safe_load(fp)
platform = importlib.import_module(platform_name)

platform_config = [platform_config[-1]]  # temp

dfs = []
for kwargs in platform_config:
    try:
        dfs.append(platform.Scraper(**kwargs).scrape())
    except Exception as error:
        logging.error(error)
        logging.info(f'Skipping for config: {kwargs}.')
new_inventory = pd.concat(dfs, axis=0, ignore_index=True)

INFO:root:Scraping at 2024-11-24 21:27:45.056102+00:00 for Item ID: 0024403005.
INFO:root:Fetching HTML from https://runway-webstore.com/ap/item/i/m/0024403005.
INFO:root:Extracting item information from HTML.
INFO:root:Parsing item information to DataFrame.


In [3]:
class DbConnector:
    def __init__(self) -> None:
        self.db = self._connect()

    def _connect(self) -> dict[str, pd.DataFrame]:
        with open('db.bin', mode='rb') as fp:
            db = pickle.load(fp)
        return db
    
    def _save(self) -> None:
        with open('db.bin', mode='wb') as fp:
            pickle.dump(self.db, fp)
        self.db = self._connect()

    def table_exists(self, table_name: str) -> bool:
        return table_name in self.db
    
    def create_table(self, table_name: str, record_type: type[BaseRecord]) -> None:
        columns_dtypes = record_type.get_df_dtypes()
        self.db[table_name] = pd.DataFrame(columns=list(columns_dtypes)).astype(columns_dtypes)
        self._save()

In [4]:
db_connector = DbConnector()

In [5]:
db_connector.db

{}

In [6]:
if not db_connector.table_exists(platform_name):
    db_connector.create_table(platform_name, platform.Record)

In [9]:
db_connector.db

{'runway': Empty DataFrame
 Columns: [item_id, name, brand, price_original, price_current, price_currency, color, size, is_available, unit_left, asof, url]
 Index: []}