In [42]:
import sys
import os
import pandas as pd
import sqlite3
from scipy import stats
from statsmodels.stats.diagnostic import het_white
from tqdm import tqdm


In [6]:
DATABASE_LOCATION = r"E:/Thesis/database"
sys.path.insert(0, r"E:/Thesis/crypto_project")
from factor_model.model_update.database_generators import (
    RAW_DATA_DB,
    RETURN_DB,
    SPECIFIC_RISK,
    FACTOR_MODEL_ESTIMATES,
    FIX_SET_OF_HALF_LIFES,
)

In [47]:

with sqlite3.connect(os.path.join(DATABASE_LOCATION, FACTOR_MODEL_ESTIMATES)) as conn:
    all_exposures = pd.read_sql(
        "SELECT * FROM exposures", conn
    )

with sqlite3.connect(os.path.join(DATABASE_LOCATION, FACTOR_MODEL_ESTIMATES)) as conn:
    all_rsquares = pd.read_sql(
        "SELECT * FROM r_squares", conn
    )

with sqlite3.connect(os.path.join(DATABASE_LOCATION, FACTOR_MODEL_ESTIMATES)) as conn:
    all_residuals = pd.read_sql(
        "SELECT * FROM specific_returns", conn
    )


In [48]:
all_dates = list(all_rsquares["date"])


In [None]:
jb_container = []
white_container = []

In [45]:
for date in tqdm(all_dates):
    core_exposure = all_exposures[
        (all_exposures["date"] == date) & (all_exposures["core_universe"] > 0)
    ].copy()
    core_symbols = list(core_exposure["symbol"])
    core_residuals = all_residuals[(all_residuals["date"]==date)&(all_residuals["symbol"].isin(core_symbols))].copy()
    core_residuals.sort_values(by="symbol", inplace=True)
    core_exposure.sort_values(by="symbol", inplace=True)

    res = stats.jarque_bera(core_residuals["specific_return"])
    jb_container.append(float(res.pvalue))
    white_test = het_white(core_residuals["specific_return"],  core_exposure[["market","new_coin","momentum", "volume", "reversal","size"]])
    white_container.append(float(white_test[1]))


100%|██████████| 2363/2363 [44:33<00:00,  1.13s/it]


In [56]:
all_rsquares_update = all_rsquares.copy()
all_rsquares_update["white_lm_pvalue"] = white_container
all_rsquares_update["jb_pvalue"] = jb_container


In [59]:
with sqlite3.connect(os.path.join(DATABASE_LOCATION, FACTOR_MODEL_ESTIMATES)) as conn:
    all_rsquares_update.to_sql("r_squares", conn, if_exists="replace", index=False)