In [1]:
from datetime import datetime, timedelta
import pytz
import pandas as pd
from tqdm import tqdm
from modeler.modeler import Modeler as m
from processor.processor import Processor as p
from database.market import Market
from database.adatabase import ADatabase
import numpy as np
import matplotlib.pyplot as plt
import pickle

In [2]:
start_date = datetime(2016,1,1)
end_date = datetime(2021,1,1)

In [3]:
market = Market()
speculation_db = ADatabase("strategy_speculation")

In [4]:
market.connect()
prices = market.retrieve("prices")
sp5 = market.retrieve("sp500")
market.disconnect()

In [5]:
prices = p.column_date_processing(prices)

In [6]:
prices["year"] = [x.year for x in prices["date"]]
prices["week"] = [x.week for x in prices["date"]]
prices["quarter"] = [x.quarter for x in prices["date"]]

In [7]:
data = []
for ticker in tqdm(sp5["Symbol"]):
    ticker_data = prices[prices["ticker"]==ticker]
    ticker_data.sort_values("date",ascending=True)
    ticker_data = ticker_data.groupby(["year","week"]).mean().reset_index()
    for i in range(14):
        ticker_data[str(i)] = ticker_data["adjclose"].shift(i)
    ticker_data["d1"] = ticker_data["adjclose"].pct_change(periods=7)
    ticker_data["d2"] = ticker_data["d1"].pct_change(periods=1)
    ticker_data["d3"] = ticker_data["d2"].pct_change(periods=1)
    ticker_data.dropna(inplace=True)
    ticker_data["ticker"] = ticker
    data.append(ticker_data)
    

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 505/505 [01:50<00:00,  4.59it/s]


In [8]:
final_data = pd.concat(data)

In [9]:
final_data.head()

Unnamed: 0,year,week,close,high,low,open,volume,adjclose,adjhigh,adjlow,...,8,9,10,11,12,13,d1,d2,d3,ticker
13,1994,14,99.702,100.228,98.75,99.026,390940.0,11.634235,11.695614,11.523146,...,12.162794,12.295876,12.662951,12.810846,12.686328,12.359292,-0.065907,0.337379,1.867036,MMM
14,1994,15,50.252,51.002,50.002,50.528,636460.0,11.727821,11.902856,11.669476,...,12.455112,12.162794,12.295876,12.662951,12.810846,12.686328,-0.057694,-0.124612,-1.369352,MMM
15,1994,16,48.0,48.728,47.502,48.428,647380.0,11.202249,11.372149,11.086025,...,12.445873,12.455112,12.162794,12.295876,12.662951,12.810846,-0.075376,0.30648,-3.459481,MMM
16,1994,17,48.625,49.19,48.005,48.815,950700.0,11.348111,11.479971,11.203415,...,12.115465,12.445873,12.455112,12.162794,12.295876,12.662951,-0.069841,-0.073435,-1.239608,MMM
17,1994,18,49.602,49.804,49.054,49.452,642860.0,11.576124,11.623266,11.448231,...,12.200182,12.115465,12.445873,12.455112,12.162794,12.295876,-0.033589,-0.519061,6.068273,MMM


In [12]:
start_year = 2022
end_year = 2023
speculation_db.connect()
for year in range(start_year,end_year):
    for ticker in tqdm(sp5["Symbol"]):
        try:
            ticker_data = final_data[final_data["ticker"]==ticker]
            factors = [str(i) for i in range(14)]
            training_data = ticker_data[(ticker_data["year"]<year) & (ticker_data["year"]>=year-5)].reset_index(drop=True)
            prediction_data = ticker_data[ticker_data["year"]==year]
            if training_data.index.size > 100:
                refined_data = {"X":training_data[factors],"y":training_data[["adjclose"]].rename(columns={"adjclose":"y"})}
                models = m.regression(refined_data)
                models["model"] = [pickle.dumps(x) for x in models["model"]]
                models["ticker"] = ticker
                models["year"] = year
                speculation_db.store("models",models)
            else:
                continue
        except Exception as e:
            print(year,ticker,str(e))
speculation_db.disconnect()

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 505/505 [44:15<00:00,  5.26s/it]Custom logger is already specified. Specify more than one logger at same time is not thread safe.


In [18]:
start_year = 2022
end_year = 2023
speculation_db.connect()
models = speculation_db.retrieve("models")
models["model"] = [pickle.loads(x) for x in models["model"]]
for year in range(start_year,end_year):
    for ticker in tqdm(sp5["Symbol"]):
        try:
            ticker_data = final_data[final_data["ticker"]==ticker]
            factors = [str(i) for i in range(14)]
            prediction_data = ticker_data[ticker_data["year"]==year]
            ticker_model_data = models[(models["ticker"]==ticker) & (models["year"]==year)]
            for row in ticker_model_data.iterrows():
                model = row[1]["model"]
                api = row[1]["api"]
                score = row[1]["score"]
                prediction_data[f"{api}_prediction"] = model.predict(prediction_data[factors])
                prediction_data[f"{api}_score"] = score
            included_columns = ["year","week","ticker"]
            included_columns.extend([x for x in prediction_data.columns if "score" in x or "prediction" in x])
            speculation_db.store("predictions",prediction_data[included_columns].tail(1))
        except Exception as e:
            print(year,ticker,str(e))
speculation_db.disconnect()

 13%|██████████████████████████████▍                                                                                                                                                                                                   | 68/505 [00:03<00:23, 18.30it/s]

strategy_speculation predictions documents must be a non-empty list


 16%|█████████████████████████████████████▏                                                                                                                                                                                            | 83/505 [00:04<00:22, 18.98it/s]

strategy_speculation predictions documents must be a non-empty list


 99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉   | 498/505 [00:28<00:00, 17.77it/s]

2022 WLTW Found array with 0 sample(s) (shape=(0, 14)) while a minimum of 1 is required.


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 505/505 [00:28<00:00, 17.51it/s]

strategy_speculation predictions documents must be a non-empty list
strategy_speculation predictions documents must be a non-empty list
strategy_speculation predictions documents must be a non-empty list
strategy_speculation predictions documents must be a non-empty list



