In [27]:
from modeler.modeler import Modeler as m
from database.market import Market
from database.strategy import Strategy
import pandas as pd
from datetime import datetime, timedelta
from extractor.tiingo_extractor import TiingoExtractor
from tqdm import tqdm
from time import sleep
import pickle

In [16]:
market = Market()
strategy = Strategy()
start = 2021
end = 2022
weekly_sets = []
number_of_training_weeks = 14

In [17]:
market.connect()
sp5 = market.retrieve("sp500")
market.disconnect()
tickers = sp5["Symbol"]

In [18]:
strategy.connect()
prices = strategy.retrieve("prices")
strategy.disconnect()

In [19]:
prices["date"] = pd.to_datetime(prices["date"])
prices["year"] = [x.year for x in prices["date"]]
prices["quarter"] = [x.quarter for x in prices["date"]]
prices["week"] = [x.week for x in prices["date"]]

In [20]:
for ticker in tqdm(sp5["Symbol"].unique()):
    ticker_data = prices[prices["ticker"]==ticker]
    weekly = ticker_data.groupby(["year","quarter","week"]).mean().reset_index()
    for i in range(number_of_training_weeks):
        weekly[i] = weekly["adjClose"].shift(1)
    weekly["y"] = weekly["adjClose"].shift(-1)
    weekly.dropna(inplace=True)
    weekly["ticker"] = ticker
    weekly_sets.append(weekly)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 505/505 [00:23<00:00, 21.63it/s]


In [21]:
data = pd.concat(weekly_sets)
for i in range(number_of_training_weeks):
    data.rename(columns={i:str(i)},inplace=True)

In [22]:
data

Unnamed: 0,year,quarter,week,close,high,low,open,volume,adjClose,adjHigh,...,6,7,8,9,10,11,12,13,y,ticker
1,2018.0,4.0,45.0,202.5850,204.135000,200.21500,202.5000,2481664.50,184.651736,186.064527,...,174.861749,174.861749,174.861749,174.861749,174.861749,174.861749,174.861749,174.861749,183.640907,MMM
2,2018.0,4.0,46.0,201.4760,203.518000,197.75800,199.9720,2702883.20,183.640907,185.502145,...,184.651736,184.651736,184.651736,184.651736,184.651736,184.651736,184.651736,184.651736,183.272457,MMM
3,2018.0,4.0,47.0,200.3950,203.012475,200.00250,202.2125,2182586.25,183.272457,185.665043,...,183.640907,183.640907,183.640907,183.640907,183.640907,183.640907,183.640907,183.640907,186.731497,MMM
4,2018.0,4.0,48.0,203.4740,204.252000,200.03200,201.5820,2265245.20,186.731497,187.445481,...,183.272457,183.272457,183.272457,183.272457,183.272457,183.272457,183.272457,183.272457,186.268968,MMM
5,2018.0,4.0,49.0,202.9700,206.987500,201.08500,205.3750,2609600.00,186.268968,189.955895,...,186.731497,186.731497,186.731497,186.731497,186.731497,186.731497,186.731497,186.731497,182.409051,MMM
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
146,2021.0,3.0,26.0,120.5150,121.515000,119.85005,120.7050,495934.50,120.515000,121.515000,...,119.466667,119.466667,119.466667,119.466667,119.466667,119.466667,119.466667,119.466667,120.342500,XYL
147,2021.0,3.0,27.0,120.3425,120.867500,119.23000,120.2575,1026721.75,120.342500,120.867500,...,120.515000,120.515000,120.515000,120.515000,120.515000,120.515000,120.515000,120.515000,120.558000,XYL
148,2021.0,3.0,28.0,120.5580,121.768000,119.61400,120.8000,777269.00,120.558000,121.768000,...,120.342500,120.342500,120.342500,120.342500,120.342500,120.342500,120.342500,120.342500,121.290000,XYL
149,2021.0,3.0,29.0,121.2900,121.810000,119.87500,120.7080,719161.80,121.290000,121.810000,...,120.558000,120.558000,120.558000,120.558000,120.558000,120.558000,120.558000,120.558000,124.374000,XYL


In [23]:
strategy.connect()
categories = strategy.retrieve("application_stock_categories")
strategy.disconnect()

In [24]:
categories

Unnamed: 0,_id,year,quarter,ticker,prediction,score
0,610a9fce7c9f3d1cb2b7e3a0,2021,1,A,,0.727642
1,610a9fce7c9f3d1cb2b7e3a1,2021,2,A,,0.727642
2,610a9fce7c9f3d1cb2b7e3a2,2021,3,A,,0.727642
3,610a9fce7c9f3d1cb2b7e3a3,2021,1,AAL,100Industrials,0.727642
4,610a9fce7c9f3d1cb2b7e3a4,2021,2,AAL,100Industrials,0.727642
...,...,...,...,...,...,...
1506,610a9fce7c9f3d1cb2b7e982,2021,2,OTIS,,0.727642
1507,610a9fce7c9f3d1cb2b7e983,2021,3,OTIS,,0.727642
1508,610a9fce7c9f3d1cb2b7e984,2021,2,VTRS,100Consumer Discretionary,0.727642
1509,610a9fce7c9f3d1cb2b7e985,2021,2,VTRS,100Consumer Discretionary,0.727642


In [31]:
training_year = 1
weekly_gap = 1
for year in range(start,end):
    for quarter in range(3,4):
        quarterly_categories = categories[(categories["year"]==year) & (categories["quarter"]==quarter)]
        for category in quarterly_categories["prediction"].unique():
            try:
                category_tickers = quarterly_categories[quarterly_categories["prediction"]==category]["ticker"].unique()
                model_data = data[(data["ticker"].isin(category_tickers))]
                first_index = model_data[(model_data["year"] == year - training_year) & (model_data["quarter"]==quarter)].index.values.tolist()[0]
                last_index = model_data[(model_data["year"] == year) & (model_data["quarter"]==quarter)].index.values.tolist()[0]
                training_data = model_data.iloc[first_index:last_index]
                prediction_data = model_data[(model_data["year"] == year) & (model_data["quarter"]==quarter)]
                X = training_data[[str(x) for x in range(number_of_training_weeks)]]
                y = training_data["y"]
                xgb_models = m.xgb_regression({"X":X,"y":y})
                sk_models = m.sk_regression({"X":X,"y":y})
                sk_models.append(xgb_models)
                models = pd.DataFrame(sk_models)
                model = models.sort_values("score",ascending=False).iloc[0]
                model["model"] = pickle.dumps(model["model"])
                model["category"] = category
                model["year"] = year
                model["quarter"] = quarter
                strategy.connect()
                strategy.store("models",pd.DataFrame([model]))
                strategy.disconnect()
            except Exception as e:
                print(year,quarter,category,str(e))
            

2021 1 None list index out of range
2021 2 None list index out of range
2021 3 None list index out of range
