In [49]:
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 29 21:39:37 2023

@author: Bebesi Laci
"""

import sys
import os
import re
import pandas as pd
import numpy as np
import yfinance as yf
import logging
import sqlite3
from tqdm import tqdm
import statsmodels.api as sm

sys.path.insert(0, r"K:/Thesis/codes/crypto_project")
from factor_model.offline_elements.loaders.price_data_loaders import (
    generate_ytickets,
    generate_price_data_map
)
from factor_model.offline_elements.loaders.market_cap_loader import (
    generate_market_cap_data, generate_market_cap_only
)
from factor_model.offline_elements.loaders.crypto_coin_loaders import (
    get_ticker_list
)

from factor_model.offline_elements.return_generators.total_return_calculation import generate_return_data
from datetime import date, timedelta
from factor_model.offline_elements.database_generators.generate_databases import refresh_raw_price_database
from factor_model.offline_elements.estimation_universe.estimation_core_universe import generate_estimation_basis
from factor_model.offline_elements.styles.return_based import generate_x_month_price_change
from factor_model.offline_elements.factor_return_estimation.factor_return_estimation import create_factor_return_data
# from analysis.tickers import ticker_list
# from analysis.utility import convert_str_numbs_to_float
# from analysis.styles.return_based import generate_x_month_momentum
# from analysis.market_cap import generate_market_cap_data
# from analysis.ticker_basics import generate_ytickets, generate_price_data_map
# from analysis.returns import generate_return_data
# from analysis.estimation_basis import generate_estimation_basis
# from datetime import date, timedelta
# import matplotlib.pyplot as plt
# import datetime
# import statsmodels.api as sm
# from analysis.factor_return_dataset import create_factor_return_data, create_expo_from_daily_data
# from tqdm import tqdm
# import matplotlib.pyplot as plt

DATABASE_LOCATION = r"K:/Thesis/database"
RAW_DATA_DB = "raw_price_data.db"

In [43]:

parameters = {
    "HORIZON" : "20y",
    "ESTIMATION_HORIZON": int(6.5*365),
    "WEIGHT_FUNCTION" : np.sqrt,
    "PRESENT_IN_MARKET" : 3*365,  # trade days, 3 years approx
    "ESTIMATION_DAY": date.today(),
    "MARKET_CAP_COVERAGE" : 0.82,
    "NEW_COIN_INCLUSION": 365,  # after X days
    "REGRESSORS_SET1": ["market", "size", "momentum", "reversal"],
    "REGRESSORS_SET2": ["new_coin"],
    "MONTH_LENGTH": 30
}
parameters["CORE_UNIV_START"] = parameters["ESTIMATION_DAY"] - timedelta(parameters["PRESENT_IN_MARKET"])


In [51]:
tickers = get_ticker_list()


In [34]:
yfinance_tickers = generate_ytickets(tickers)
price_data_map = generate_price_data_map(yfinance_tickers, parameters["HORIZON"])

# refresh tickers and throw out those ones, which have no price history
drop_keys = list()
for key in price_data_map.keys():
    if len(price_data_map[key]) < 2:  # minimum number of obs for returns are 2
        del yfinance_tickers[key]
        drop_keys.append(key)
for key in drop_keys:
    del price_data_map[key]


In [35]:
market_cap_df = generate_market_cap_only(yfinance_tickers)


In [52]:
return_data_map = generate_return_data(price_data_map)
estimation_dates=list(return_data_map["BTC-USD"].tail(parameters["ESTIMATION_HORIZON"])["date"])

  price_data_map[key]["Close"].head(-1)
  price_data_map[key]["Close"].head(-1)


In [37]:
estimation_dates=list(return_data_map["ETC-USD"].tail(parameters["ESTIMATION_HORIZON"])["date"])

In [38]:
# weighting generation and universe
market_cap_df["transformed_market_cap"] = parameters["WEIGHT_FUNCTION"](market_cap_df["market_cap"])
market_cap_df.sort_values(by = "transformed_market_cap", ascending=False, inplace=True)

# 0. market cap date 
market_cap_date = parameters["ESTIMATION_DAY"]
estimation_basis, univ_first_appearence = generate_estimation_basis(return_data_map, market_cap_df, market_cap_date, parameters)

#### Style generation

In [45]:
momentum_move_map = generate_x_month_price_change(price_data_map,  x_len = 6, month_len = parameters["MONTH_LENGTH"])
reversal_map = generate_x_month_price_change(price_data_map,  x_len = 1, month_len = parameters["MONTH_LENGTH"])
daily_data_maps = {}
daily_data_maps["reversal"] = reversal_map
daily_data_maps["momentum"] = momentum_move_map
daily_data_maps["return"] = return_data_map


  ) / np.matrix(price_data_map[key]["Close"].head(-x_len * month_len))
  ) / np.matrix(price_data_map[key]["Close"].head(-x_len * month_len))


In [53]:
tstats_all = list()
coefficients_all = list()
idx=0
for date in tqdm(estimation_dates):
    # step 0 : assemble estimation data
    factor_return_data = create_factor_return_data(estimation_basis, parameters, date, daily_data_maps)

    # step 1 estimation
    try:
        mod_wls = sm.WLS(endog = factor_return_data["return"],
                        exog = factor_return_data[parameters["REGRESSORS_SET1"]],
                        weights = factor_return_data["transformed_market_cap"]).fit()
        # save results
        coefficient_date = mod_wls.params.to_frame().T
        coefficient_date["date"] = date
        coefficients_all.append(coefficient_date)
        tstat_date = mod_wls.tvalues.to_frame().T
        tstat_date["date"] = date
        tstats_all.append(tstat_date)
    except:
        print(date)


TypeError: 'module' object is not callable

In [66]:
refresh_raw_price_database(price_data_map, DATABASE_LOCATION)

  method=method,


In [58]:
with sqlite3.connect(os.path.join(DATABASE_LOCATION, "RAW_DATA_DB")) as conn:
    df = pd.read_sql_query("SELECT * FROM raw_price_data", conn)
