In [2]:
from sys import path
from os.path import dirname as dir

In [3]:
path.append(dir(path[0]))

In [4]:
from statistics import median
from AgentLayer.ConventionalAgents.LinearRegression import LinearRegressionAgent
from AgentLayer.DataSplitter.TimeSeriesSplitter import TimeSeriesSplitter
from FinancialDataLayer.DataCollection.DataDownloader import DataDownloader
from FinancialDataLayer.DataProcessing.DefaultFeatureEngineer import DefaultFeatureEngineer
import yaml
import numpy as np
from AgentLayer.metrics import *

In [5]:
# IMPORT .yaml FILE
    # Gather user parameters
with open("..//user_params.yaml", "r") as stream:
    try:
        user_params = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print(exc)

tickers = user_params["tickers"]
env_kwargs = user_params["env_params"]
tech_indicator_list = env_kwargs["tech_indicator_list"]

In [6]:
# FETCH DATA
print("\nTest 3: Downloading from Yahoo.........")
downloaded_df = DataDownloader(start_date='2009-01-01',
                                end_date='2021-10-31',
                                ticker_list=tickers).download_from_yahoo()


Test 3: Downloading from Yahoo.........
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
Shape of DataFrame:  (12924, 8)


In [7]:
downloaded_df

Unnamed: 0,date,open,high,low,close,volume,tic,day
0,2008-12-31,3.070357,3.133571,3.047857,2.606278,607541200,AAPL,2
1,2008-12-31,41.590000,43.049999,41.500000,32.005901,5443100,BA,2
2,2008-12-31,43.700001,45.099998,43.700001,30.628820,6277400,CAT,2
3,2008-12-31,72.900002,74.629997,72.900002,43.314430,9964300,CVX,2
4,2009-01-02,3.067143,3.251429,3.041429,2.771174,746015200,AAPL,4
...,...,...,...,...,...,...,...,...
12919,2021-10-28,111.580002,113.269997,111.580002,109.762665,7538200,CVX,3
12920,2021-10-29,147.220001,149.940002,146.410004,149.172180,124953200,AAPL,4
12921,2021-10-29,206.839996,208.240005,205.289993,207.029999,8559500,BA,4
12922,2021-10-29,207.990005,209.750000,203.240005,202.055878,4292200,CAT,4


In [8]:
 # PREPROCESS DATA
print("\nTest 4: Feature engineer.........")
data_processor = DefaultFeatureEngineer(use_default=False,
                                        tech_indicator_list=tech_indicator_list,
                                        use_vix=True,
                                        use_turbulence=True,
                                        use_covar=True)
# included technical indicators as features
df_processed = data_processor.extend_data(downloaded_df)


Test 4: Feature engineer.........
Successfully added technical indicators
[*********************100%***********************]  1 of 1 completed
Shape of DataFrame:  (3231, 8)
Successfully added vix
Successfully added turbulence index
Successfully added covariances


In [9]:
df_processed

Unnamed: 0,date,tic,open,high,low,close,volume,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,vix,turbulence,cov_list,return_list
0,2009-12-31,AAPL,7.611786,7.619643,7.520000,6.434924,352410800.0,3.0,0.105229,6.531585,5.633488,60.410518,155.827325,31.312031,6.105642,6.048183,21.680000,0.000000,"[[0.0004566266851897576, 0.0002589236496079325...",tic AAPL BA CAT ...
0,2009-12-31,BA,55.000000,55.220001,54.049999,42.180122,2189400.0,3.0,0.448062,43.940997,41.779002,54.118722,17.483417,4.580979,42.211023,40.690113,21.680000,0.000000,"[[0.0004566266851897576, 0.0002589236496079325...",tic AAPL BA CAT ...
0,2009-12-31,CAT,57.599998,57.959999,56.990002,40.802948,3859700.0,3.0,0.043860,42.300241,40.350236,51.872628,-74.366602,6.791854,41.526065,41.025309,21.680000,0.000000,"[[0.0004566266851897576, 0.0002589236496079325...",tic AAPL BA CAT ...
0,2009-12-31,CVX,77.720001,77.779999,76.930000,46.806019,4246600.0,3.0,0.009377,47.587705,46.552033,52.439881,-69.789517,6.763381,47.276571,46.664712,21.680000,0.000000,"[[0.0004566266851897576, 0.0002589236496079325...",tic AAPL BA CAT ...
1,2010-01-04,AAPL,7.622500,7.660714,7.585000,6.535087,493729600.0,0.0,0.119897,6.599508,5.619095,62.133726,168.777083,33.760635,6.113836,6.060275,20.040001,0.000000,"[[0.0004430596199728812, 0.0002466265105727518...",tic AAPL BA CAT ...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2976,2021-10-27,CVX,113.220001,113.580002,111.650002,108.743828,9096100.0,2.0,3.178450,112.516863,98.478378,61.699285,98.878403,34.651291,102.406341,98.743036,16.980000,1.475711,"[[0.00027957799125446277, 6.779491578161685e-0...",tic AAPL BA CAT ...
2977,2021-10-28,AAPL,149.820007,153.169998,149.720001,151.930573,100077900.0,3.0,1.306374,152.340870,137.144783,58.671047,166.690516,23.469447,144.414872,146.737929,16.530001,9.694300,"[[0.00027279647837621625, 5.925447326429548e-0...",tic AAPL BA CAT ...
2977,2021-10-28,BA,206.000000,208.740005,204.600006,207.850006,8825500.0,3.0,-3.006977,231.687612,205.504387,41.943087,-146.293232,35.544181,218.354333,219.751167,16.530001,9.694300,"[[0.00027279647837621625, 5.925447326429548e-0...",tic AAPL BA CAT ...
2977,2021-10-28,CAT,197.360001,204.500000,197.050003,202.135117,4462700.0,3.0,0.741821,203.795321,184.458960,51.696330,106.185114,4.524754,193.625510,200.327880,16.530001,9.694300,"[[0.00027279647837621625, 5.925447326429548e-0...",tic AAPL BA CAT ...


In [10]:
# split data to train and test
splitter = TimeSeriesSplitter()
train = splitter.get_split_data(df_processed, '2009-01-01', '2020-06-30')
trade = splitter.get_split_data(df_processed, '2020-07-01', '2021-09-02')

In [11]:
# Get unique tic and trade
unique_tic = trade.tic.unique()
unique_trade_date = trade.date.unique()

In [12]:
#Prepare data for machine learning models
x_train, y_train = data_processor.prepare_ml_data(train)

In [13]:
# Create Linear Regression model and train it
lr = LinearRegressionAgent()
trained_lr = lr.train_model(x_train, y_train)

Model trained succesfully


In [14]:
# Predict
portfolio, portfolio_cumprod, meta_coefficient = lr.predict(trained_lr, 1000000, df_processed, unique_trade_date, tech_indicator_list)

In [15]:
portfolio

Unnamed: 0,date,account_value
0,2020-07-01,1000000
1,2020-07-02,1002717.337037
2,2020-07-06,1042091.814716
3,2020-07-07,992014.183768
4,2020-07-08,998669.002421
...,...,...
291,2021-08-26,1200643.252031
292,2021-08-27,1229758.157681
293,2021-08-30,1207076.280064
294,2021-08-31,1217280.340974


In [16]:
portfolio_cumprod

0           NaN
1      0.002717
2      0.042092
3     -0.007986
4     -0.001331
         ...   
291    0.200643
292    0.229758
293    0.207076
294    0.217280
295    0.209627
Name: account_value, Length: 296, dtype: float64

In [17]:
meta_coefficient

Unnamed: 0,date,weights
0,2020-07-01,tic weight predicted_y 0 AAPL 0.0 ...
1,2020-07-02,tic weight predicted_y 0 AAPL 0....
2,2020-07-06,tic weight predicted_y 0 AAPL 1....
3,2020-07-07,tic weight predicted_y 0 AAPL 0....
4,2020-07-08,tic weight predicted_y 0 AAPL 0....
...,...,...
290,2021-08-25,tic weight predicted_y 0 AAPL 1....
291,2021-08-26,tic weight predicted_y 0 AAPL 6....
292,2021-08-27,tic weight predicted_y 0 AAPL 1....
293,2021-08-30,tic weight predicted_y 0 AAPL 0....


In [18]:
#Save model
lr.save_model(trained_lr,"lr_model")

Model saved succesfully.


In [19]:
#Load model
my_model = lr.load_model("lr_model")

Model loaded succesfully.
