In [1]:
from sys import path
from os.path import dirname as dir

In [2]:
path.append(dir(path[0]))

In [3]:
from statistics import median
from AgentLayer.ConventionalAgents.LinearRegression import LinearRegressionAgent
from AgentLayer.DataSplitter.TimeSeriesSplitter import TimeSeriesSplitter
from FinancialDataLayer.DataCollection.DataDownloader import DataDownloader
from FinancialDataLayer.DataProcessing.DefaultFeatureEngineer import DefaultFeatureEngineer
import yaml
import numpy as np
from AgentLayer.metrics import *

In [5]:
# IMPORT .yaml FILE
    # Gather user parameters
with open("..//user_params.yaml", "r") as stream:
    try:
        user_params = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print(exc)

tickers = user_params["tickers"]
env_kwargs = user_params["env_params"]
tech_indicator_list = env_kwargs["tech_indicator_list"]

In [7]:
# FETCH DATA
print("\nTest 3: Downloading from Yahoo.........")
downloaded_df = DataDownloader(start_date='2009-01-01',
                                end_date='2021-10-31',
                                ticker_list=tickers).download_from_yahoo()
    
print("Raw Data: ", downloaded_df.head())


Test 3: Downloading from Yahoo.........
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
Shape of DataFrame:  (12924, 8)
Raw Data:          date       open       high        low      close     volume   tic  day
0 2008-12-31   3.070357   3.133571   3.047857   2.606277  607541200  AAPL    2
1 2008-12-31  41.590000  43.049999  41.500000  32.005886    5443100    BA    2
2 2008-12-31  43.700001  45.099998  43.700001  30.628834    6277400   CAT    2
3 2008-12-31  72.900002  74.629997  72.900002  43.314426    9964300   CVX    2
4 2009-01-02   3.067143   3.251429   3.041429   2.771174  746015200  AAPL    4


In [8]:
 # PREPROCESS DATA
print("\nTest 4: Feature engineer.........")
data_processor = DefaultFeatureEngineer(use_default=False,
                                        tech_indicator_list=tech_indicator_list,
                                        use_vix=True,
                                        use_turbulence=True,
                                        use_covar=True)
# included technical indicators as features
df_processed = data_processor.extend_data(downloaded_df)


Test 4: Feature engineer.........
Successfully added technical indicators
[*********************100%***********************]  1 of 1 completed
Shape of DataFrame:  (3231, 8)
Successfully added vix
Successfully added turbulence index
Successfully added covariances


In [9]:
# split data to train and test
splitter = TimeSeriesSplitter()
train = splitter.get_split_data(df_processed, '2009-01-01', '2020-06-30')
trade = splitter.get_split_data(df_processed, '2020-07-01', '2021-09-02')

In [10]:
# Get unique tic and trade
unique_tic = trade.tic.unique()
unique_trade_date = trade.date.unique()

In [11]:
#Prepare data for machine learning models
x_train, y_train = data_processor.prepare_ml_data(train)

In [12]:
# Create Linear Regression model and train it
lr = LinearRegressionAgent()
trained_lr = lr.train_model(x_train, y_train)

Model trained succesfully


In [13]:
# Predict
portfolio, portfolio_cumprod, meta_coefficient = lr.predict(trained_lr, 1000000, df_processed, unique_trade_date, tech_indicator_list)

In [14]:
print("portfolio: \n", portfolio)

portfolio: 
           date   account_value
0   2020-07-01         1000000
1   2020-07-02  1002717.337037
2   2020-07-06  1042091.814716
3   2020-07-07   992014.183768
4   2020-07-08   998669.002421
..         ...             ...
291 2021-08-26  1200643.252031
292 2021-08-27  1229758.157681
293 2021-08-30  1207076.280064
294 2021-08-31  1217280.340974
295 2021-09-01  1209627.252981

[296 rows x 2 columns]


In [15]:
print("portfolio_cumprod: \n", portfolio_cumprod)

portfolio_cumprod: 
 0           NaN
1      0.002717
2      0.042092
3     -0.007986
4     -0.001331
         ...   
291    0.200643
292    0.229758
293    0.207076
294    0.217280
295    0.209627
Name: account_value, Length: 296, dtype: float64


In [16]:
print("Meta Coefficient: \n", meta_coefficient)

Meta Coefficient: 
           date                                            weights
0   2020-07-01      tic        weight  predicted_y
0  AAPL  3....
1   2020-07-02      tic        weight  predicted_y
0  AAPL  5....
2   2020-07-06      tic        weight  predicted_y
0  AAPL  2....
3   2020-07-07      tic        weight  predicted_y
0  AAPL  0....
4   2020-07-08      tic        weight  predicted_y
0  AAPL  2....
..         ...                                                ...
290 2021-08-25      tic        weight  predicted_y
0  AAPL  4....
291 2021-08-26      tic        weight  predicted_y
0  AAPL  0....
292 2021-08-27      tic        weight  predicted_y
0  AAPL  8....
293 2021-08-30      tic        weight  predicted_y
0  AAPL  2....
294 2021-08-31      tic        weight  predicted_y
0  AAPL  5....

[295 rows x 2 columns]


In [17]:
#Save model
lr.save_model(trained_lr,"lr_model")

Model saved succesfully.


In [18]:
#Load model
my_model = lr.load_model("lr_model")

Model loaded succesfully.
