In [1]:
from database.market import Market
from database.adatabase import ADatabase
from modeler.modeler import Modeler as m
from processor.processor import Processor as p
from datetime import datetime, timedelta
import pytz
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import pickle

In [2]:
market = Market()

In [3]:
financial_db = ADatabase("strategy_financial")

In [4]:
market.connect()
sp5 = market.retrieve("sp500")
prices = market.retrieve("prices")
financials = market.retrieve("financial_set")
market.disconnect()

In [5]:
prices = p.column_date_processing(prices)
prices["quarter"] = [x.quarter for x in prices["date"]]
prices["year"] = [x.year for x in prices["date"]]

In [6]:
financials = p.column_date_processing(financials)

In [7]:
prices["close"] = [float(x) for x in prices["close"]]

In [8]:
labels = prices[["year","quarter","ticker","close"]].groupby(["year","quarter","ticker"]).mean().reset_index()
labels["year"] = labels["year"] - 1

In [16]:
data = financials.merge(labels,on=["year","quarter","ticker"])

In [17]:
factors = [x for x in data.columns if x not in ["ticker","year","quarter","close"]]

In [18]:
factors

['assets',
 'liabilitiesandstockholdersequity',
 'incometaxexpensebenefit',
 'retainedearningsaccumulateddeficit',
 'accumulatedothercomprehensiveincomelossnetoftax',
 'earningspersharebasic',
 'earningspersharediluted',
 'propertyplantandequipmentnet',
 'cashandcashequivalentsatcarryingvalue',
 'entitycommonstocksharesoutstanding',
 'weightedaveragenumberofdilutedsharesoutstanding',
 'weightedaveragenumberofsharesoutstandingbasic',
 'stockholdersequity']

In [21]:
data

Unnamed: 0,ticker,quarter,year,assets,liabilitiesandstockholdersequity,incometaxexpensebenefit,retainedearningsaccumulateddeficit,accumulatedothercomprehensiveincomelossnetoftax,earningspersharebasic,earningspersharediluted,propertyplantandequipmentnet,cashandcashequivalentsatcarryingvalue,entitycommonstocksharesoutstanding,weightedaveragenumberofdilutedsharesoutstanding,weightedaveragenumberofsharesoutstandingbasic,stockholdersequity,close
0,MMM,3,2009,2.594400e+10,2.594400e+10,5.767500e+08,2.246700e+10,-3.664500e+09,1.7700,1.752500,6.892500e+09,1.972000e+09,698320662.0,706250000.0,699600000.0,1.034150e+10,83.806719
1,MMM,3,2009,2.594400e+10,2.594400e+10,5.767500e+08,2.246700e+10,-3.664500e+09,1.7700,1.752500,6.892500e+09,1.972000e+09,698320662.0,706250000.0,699600000.0,1.034150e+10,83.806719
2,MMM,1,2010,2.652150e+10,2.652150e+10,1.646667e+09,2.299000e+10,-3.700000e+09,5.0700,5.003333,6.943000e+09,2.058000e+09,711733377.0,715300000.0,706000000.0,1.132200e+10,90.021935
3,MMM,1,2010,2.652150e+10,2.652150e+10,1.646667e+09,2.299000e+10,-3.700000e+09,5.0700,5.003333,6.943000e+09,2.058000e+09,711733377.0,715300000.0,706000000.0,1.132200e+10,90.021935
4,MMM,2,2010,2.763600e+10,2.763600e+10,3.385000e+08,2.399200e+10,-3.750500e+09,1.0300,1.015000,6.929500e+09,2.342250e+09,713068068.0,709700000.0,702650000.0,1.316500e+10,93.476825
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14153,ZBH,2,2020,2.507580e+10,2.507580e+10,2.535000e+07,1.014675e+10,-2.485000e+08,-0.6300,-0.630000,2.071450e+09,1.525750e+09,206802113.0,206150000.0,205450000.0,1.213335e+10,166.843333
14154,ZBH,3,2020,2.414315e+10,2.414315e+10,1.002500e+07,1.001865e+10,-2.538500e+08,-0.4875,-0.492500,2.066950e+09,6.656500e+08,207049828.0,206400000.0,205700000.0,1.201475e+10,149.860000
14155,ZBH,4,2020,2.433210e+10,2.433210e+10,-1.075000e+08,1.011510e+10,-2.644500e+08,1.2350,1.220000,2.059950e+09,7.926000e+08,207278375.0,207150000.0,206000000.0,1.213015e+10,135.263333
14156,ZTS,3,2020,1.230700e+10,1.230700e+10,1.307500e+08,4.737500e+09,-7.910000e+08,1.1675,1.160000,1.975500e+09,2.643500e+09,475144704.0,480425000.0,477175000.0,2.845000e+09,203.099231


In [26]:
start_year = 2013
end_year = 2014
performance = []
sim = []
financial_db.connect()
for year in tqdm(range(start_year,end_year)):
    try:
        training_data = data[(data["year"]>=year-1) & (data["year"]<year)].reset_index(drop=True)
        if training_data.index.size > 100:
            prediction_data = data[(data["year"]==year)]
            refined_data = {"X":training_data[factors],"y":training_data[["close"]].rename(columns={"close":"y"})}
            models = m.regression(refined_data)
            models["year"] = year
            for row in models.iterrows():
                api = row[1]["api"]
                prediction = row[1]["model"].predict(prediction_data[factors])
                prediction_data[f"{api}_prediction"] = prediction
                prediction_data[f"{api}_score"] = row[1]["score"]
            included_columns = ["year","quarter","ticker"]
            included_columns.extend([x for x in prediction_data.columns if "score" in x or "prediction" in x])
            prediction_data["year"] = prediction_data["year"] + 1
            sim.append(prediction_data[included_columns])
    except Exception as e:
        print(year,sector,str(e))
financial_db.disconnect()

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:12<00:00, 12.45s/it]


In [27]:
sim

[       year  quarter ticker  skl_prediction    skl_score  xgb_prediction  \
 20     2014        1    MMM      140.670546 -2023.805655      114.055893   
 21     2014        2    MMM      109.761757 -2023.805655      108.926788   
 22     2014        2    MMM      138.779082 -2023.805655      103.992805   
 23     2014        3    MMM      116.860819 -2023.805655      124.469376   
 24     2014        4    MMM      123.180710 -2023.805655      124.818802   
 ...     ...      ...    ...             ...          ...             ...   
 14075  2014        4   ZBRA       75.136486 -2023.805655       55.990398   
 14124  2014        1    ZBH       83.781446 -2023.805655       82.212364   
 14125  2014        2    ZBH       79.994793 -2023.805655       76.864998   
 14126  2014        3    ZBH       80.900916 -2023.805655       85.340950   
 14127  2014        4    ZBH       81.630899 -2023.805655       85.438782   
 
        xgb_score  cat_prediction  cat_score  
 20      0.853428      100.

In [23]:
simulation = pd.concat(sim).groupby(["year","quarter","ticker"]).mean().reset_index()

In [25]:
financial_db.connect()
financial_db.store("sim",simulation)
financial_db.disconnect()