In [94]:
from database.market import Market
from database.sec import SEC
from database.adatabase import ADatabase
import pandas as pd
import matplotlib.pyplot as plt
from processor.processor import Processor as p
from statistics import variance
from tqdm import tqdm
from modeler.modeler import Modeler as m
from datetime import datetime, timedelta

In [3]:
market = Market()
sec = SEC()

In [3]:
# market.connect()
# prices = market.retrieve("alpha_prices")
# market.disconnect()

In [21]:
fed_fund = pd.read_csv("./csv_files/FED/FEDFUNDS.csv")
spy = pd.read_csv("./csv_files/FED/SPY.csv")
gdp = pd.read_csv("./csv_files/FED/USGDP.csv")
brent = pd.read_csv("./csv_files/FED/POILBREUSDM.csv")
wti = pd.read_csv("./csv_files/FED/POILWTIUSDM.csv")
cpi = pd.read_csv("./csv_files/FED/CPIAUCSL.csv")
unemployment = pd.read_csv("./csv_files/FED/UNRATE.csv")

In [22]:
datasets = {"spy":spy,
           "fed_fund":fed_fund,
           "gdp":gdp,
            "brent":brent,
            "wti":wti, 
            "cpi":cpi, 
            "unemployment":unemployment}

In [23]:
cleaned_sets = {}
for dataset in tqdm(datasets.keys()):
    try:
        cleaned = p.column_date_processing(datasets[dataset])
        cleaned["year"] = [x.year for x in cleaned["date"]]
        cleaned["quarter"] = [x.quarter for x in cleaned["date"]]
        cleaned_quarterly = cleaned.groupby(["year","quarter"]).mean().reset_index()
        cleaned_sets[dataset] = cleaned_quarterly
    except Exception as e:
        print(str(e))

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 162.75it/s]


In [69]:
base = cleaned_sets["spy"][["year","quarter","adjclose"]]
base.rename(columns={"adjclose":"spy"},inplace=True)

In [70]:
for dataset in cleaned_sets:
    if dataset == "spy":
        continue
    else:
        ds = cleaned_sets[dataset]
        base = base.merge(ds.drop("date",axis=1,errors="ignore"),on=["year","quarter"],how="left")

In [71]:
data = base.copy()

In [72]:
data.columns

Index(['year', 'quarter', 'spy', 'fedfunds', 'gdp', 'poilbreusdm',
       'poilwtiusdm', 'cpiaucsl', 'unrate'],
      dtype='object')

In [73]:
base["y"] = base["spy"].shift(-4)

In [74]:
base["date"] = [datetime(int(row[1]["year"]),int(row[1]["quarter"]-1) * 3 + 1,1) for row in base.iterrows()]

In [None]:
factors = [x for x in base.columns if x not in ["year","quarter","y","date"]]
factors

In [99]:
sims = []
for year in tqdm(range(2000,2022)):
    try:
        training_set = base[(base["year"]<year) & (base["year"]>=year-7)].copy().reset_index()
        prediction_set = base[base["year"]==year].copy()
        spy_model = m.regression({"X":training_set[factors],"y":training_set[["y"]]})
        sim = prediction_set
        for row in spy_model.iterrows():
            model = row[1]["model"]
            api = row[1]["api"]
            score = row[1]["score"]
            predictions = model.predict(sim[factors])
            sim[f"{api}_prediction"] = predictions
            sim[f"{api}_score"] = score
            sims.append(sim)
    except Exception as e:
        print(year,str(e))

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 22/22 [00:48<00:00,  2.21s/it]


In [100]:
final_sim = pd.concat(sims)
final_sim["year"] = final_sim["year"] + 1

In [101]:
final_sim

Unnamed: 0,year,quarter,spy,fedfunds,gdp,poilbreusdm,poilwtiusdm,cpiaucsl,unrate,y,date,skl_prediction,skl_score,xgb_prediction,xgb_score,cat_prediction,cat_score
28,2000,1,94.493197,5.676667,10002.179,26.773333,28.790000,170.100000,4.033333,85.651283,2000-01-01,98.718175,0.980373,91.535400,0.986531,88.351059,0.974627
29,2000,2,96.478110,6.273333,10247.720,26.540000,28.806667,171.433333,3.933333,83.283940,2000-04-01,101.132695,0.980373,91.535400,0.986531,85.849160,0.974627
30,2000,3,98.682245,6.520000,10318.165,30.340000,31.723333,173.000000,4.000000,78.110608,2000-07-01,102.434394,0.980373,91.535400,0.986531,85.849160,0.974627
31,2000,4,91.535036,6.473333,10435.744,29.576667,31.940000,174.233333,3.900000,75.811079,2000-10-01,103.778588,0.980373,91.535400,0.986531,85.849160,0.974627
28,2000,1,94.493197,5.676667,10002.179,26.773333,28.790000,170.100000,4.033333,85.651283,2000-01-01,98.718175,0.980373,91.535400,0.986531,88.351059,0.974627
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115,2021,4,456.310534,0.080000,24002.815,82.208182,80.213929,278.413333,4.233333,,2021-10-01,640.528008,0.949884,462.967712,0.706976,313.953543,0.906888
112,2021,1,379.576657,0.080000,22038.226,60.567295,57.790333,263.524667,6.200000,444.053418,2021-01-01,476.788880,0.949884,425.731873,0.706976,350.856722,0.906888
113,2021,2,412.175900,0.070000,22740.959,68.626869,66.102648,268.760333,5.900000,427.519001,2021-04-01,533.722286,0.949884,447.953583,0.706976,339.427620,0.906888
114,2021,3,437.021955,0.090000,23202.344,73.001970,70.589848,273.163333,5.100000,,2021-07-01,588.213406,0.949884,460.842529,0.706976,334.110335,0.906888


In [104]:
strat_db = ADatabase("strategy_econ")
strat_db.connect()
strat_db.store("sim",final_sim)
strat_db.disconnect()