In [42]:
import sys
sys.path.insert(0, r"K:/Thesis/codes/crypto_project")
sys.path.insert(0, r"E:/Thesis/crypto_project")
import os
import sqlite3
import pandas as pd
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from factor_model.model_update.database_generators.generate_databases import refresh_database

DATABASE_LOCATION = r"E:/Thesis/database"
from factor_model.model_update.database_generators import (
    FACTOR_MODEL_ESTIMATES,
    FIX_SET_OF_HALF_LIFES,
    SPECIFIC_RISK
)


In [28]:
from factor_model.risk_calculations.specific_risk import (
    generate_raw_portfolio_specific_risk
)
from factor_model.risk_calculations.core_universe_portfolio import generate_market_portfolio

In [29]:
with sqlite3.connect(os.path.join(DATABASE_LOCATION, FACTOR_MODEL_ESTIMATES)) as conn:
    full_specific_returns = pd.read_sql_query(f"SELECT * FROM specific_returns", conn)

with sqlite3.connect(
    os.path.join(DATABASE_LOCATION, FACTOR_MODEL_ESTIMATES)
) as conn:
    estim_universe = pd.read_sql_query(
        f"SELECT ticker, transformed_market_cap, date, core_universe FROM exposures where core_universe > 0",
        conn,
    )

In [30]:
full_specific_returns = full_specific_returns[full_specific_returns["ticker"].isin(
    list(set(estim_universe[estim_universe["core_universe"]>0]["ticker"]))
)].copy()

In [31]:
dates = list(full_specific_returns["date"].unique())[1:]
risk_calculation_parameters = {
    "correlation_half_life": 730,  # days
    "variance_half_life": 365,  # days
    "specific_risk_half_life": 365,
    "date": "2023-03-04",
    "minimum_history_spec_ret": 730
}

In [32]:
def multi_process_calc(temp_half_life):
    risk_calculation_parameters = {"specific_risk_half_life": temp_half_life}
    risk_for_half_life = full_specific_returns.groupby("ticker").apply(
        lambda group: pd.DataFrame({
            'date': group['date'],
            'specific_risk': group['specific_return']
                .ewm(halflife=risk_calculation_parameters['specific_risk_half_life'])
                .std(),
            'symbol': group['ticker'].iloc[0]
        })
    )
    return risk_for_half_life.reset_index(drop=True)

In [33]:
all_core_spec_risk = {}

for temp_half_life in tqdm(FIX_SET_OF_HALF_LIFES):
    all_core_spec_risk[temp_half_life] = multi_process_calc(temp_half_life)
    all_core_spec_risk[temp_half_life].set_index(["date", "symbol"],inplace=True)

  0%|          | 0/38 [00:00<?, ?it/s]

In [34]:
# for key in tqdm(all_core_spec_risk.keys()):
#     all_core_spec_risk[key].to_csv( os.path.join(DATABASE_LOCATION, f"core_spec_risk_{key}.csv"), index=False)

In [35]:
# all_core_spec_risk = dict()
# for key in FIX_SET_OF_HALF_LIFES:
#     all_core_spec_risk[key].pd.read_csv(os.path.join(DATABASE_LOCATION, f"core_spec_risk_{key}.csv"))
#     all_core_spec_risk[key].set_index(["date", "symbol"],inplace=True)

In [36]:
estim_universe_dict = {date: df for date, df in estim_universe.groupby("date")}
for key in all_core_spec_risk.keys():
    all_core_spec_risk[key] = {date: df for date, df in all_core_spec_risk[key].groupby("date")}

In [37]:
all_combined_specific_risk_estimates = list()

for date in tqdm(dates):
    # 1. gather exposures
    exposure = estim_universe_dict[date]
    # 2. identify universe_info
    market_portfolio = generate_market_portfolio(exposure)

    for half_life in FIX_SET_OF_HALF_LIFES:
        # 3. grab relevant spec risks
        tempd_df = all_core_spec_risk[half_life][date]
        tempd_df.reset_index(inplace=True)
        tempd_df.set_index("symbol", inplace=True)
        tempd_df.dropna(inplace=True)

        all_combined_specific_risk_estimates.append(
            pd.DataFrame(
                {
                    "date": [date],
                    "half_life": [half_life],
                    "specific_risk": generate_raw_portfolio_specific_risk(
                        tempd_df.to_dict()["specific_risk"], market_portfolio
                    ),
                }
            )
        )

  0%|          | 0/2314 [00:00<?, ?it/s]

In [38]:
combined_spec_risk = pd.concat(all_combined_specific_risk_estimates)

In [46]:
refresh_database(
    symbol_level_data={str(date): combined_spec_risk},
    database_location=DATABASE_LOCATION,
    database_name=SPECIFIC_RISK,
    database_table_name="core_specific_risk",
    delete_database=True,
    key_field_name="version_date",
    update_mode="append",
)