#### Notes:

The main purpose of this notebook is to restrict the databases (around 2 Gb to a managable size...)
<br> ... so a proper demo database is generated here.


In [None]:
import pandas as pd
from pathlib import Path
import shutil
import sys
import sqlite3

sys.path.insert(0, r"E:/Thesis/crypto_project")

from factor_model.model_update.database_generators import (
    FACTOR_MODEL_ESTIMATES,
    RAW_DATA_DB,
    RETURN_DB,
    SPECIFIC_RISK,
)

In [8]:
DATABASE_LOCATION = Path(r"E:\Thesis\database")
DEMO_DATABASE_LOCATION = Path(r"E:\Thesis\demo_database")
SYMBOL_LIST_LOC = Path(r"E:\Thesis\demo_database\bita_10_20241027.csv")

In [None]:
# first copy databases...
databases = [
    RAW_DATA_DB,
    RETURN_DB,
    FACTOR_MODEL_ESTIMATES,
    SPECIFIC_RISK,
    "default.sqlite3",
]
for database in databases:
    shutil.copyfile(DATABASE_LOCATION / database, DEMO_DATABASE_LOCATION / database)

In [None]:
# load symbols
symbols = list(pd.read_csv(SYMBOL_LIST_LOC, header=None)[0])

table_to_db_map = {
    "raw_price_data": RAW_DATA_DB,
    "returns": RETURN_DB,
    "exposures": FACTOR_MODEL_ESTIMATES,
    "specific_returns": FACTOR_MODEL_ESTIMATES,
}

In [36]:
# restrict the relevant datatables...
for table in table_to_db_map.keys():
    with sqlite3.connect(DEMO_DATABASE_LOCATION / table_to_db_map[table]) as conn:
        df = pd.read_sql(f"select * from {table}", conn)
        print(f">>> table name: {table}")
        print(f"original length: {len(df)}")
        df = df[df["symbol"].isin(symbols)]
        conn.execute(f"DROP TABLE IF EXISTS {table}")
        print(f"updated length: {len(df)}")
        df.to_sql(table, conn, if_exists="replace", index=False)
        conn.execute("VACUUM")

>>> table name: raw_price_data
original length: 22834
updated length: 22834
>>> table name: returns
original length: 22814
updated length: 22814
>>> table name: exposures
original length: 20587
updated length: 20587
>>> table name: specific_returns
original length: 20587
updated length: 20587
