## Technical Indicators List

All the indicators that https://pypi.org/project/stockstats/ supports

In [1]:
INDICATORS_LIST_ALL = ['high_5_sma','rsi','boll','macd','cr','wr','cci','tr','atr','dma','pdi','dx','adx','adxr','trix','tema','vr','mfi','vwma',
                   'chop','ppo','stochrsi','supertrend','aroon','close_75_z','ao'
                   ]

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
#%matplotlib inline
import mplfinance as mpf # matplot.finance to plot candlesticks
import random
import itertools # iterators for efficient looping: https://docs.python.org/3/library/itertools.html 

from finrl.meta.preprocessor.yahoodownloader import YahooDownloader # a veces puede dar error al hacer fetch de la data porque no encuentra un stock o sus precios, o por la timezone
from finrl import config_tickers # config_tickers es una clase en la que se guardan en listas los nombres de cada stock para cada índice
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent
from stable_baselines3.common.logger import configure
from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline



In [3]:
TRAIN_START_DATE = '2010-01-01'
TRAIN_END_DATE = '2021-10-01'
TEST_START_DATE = '2021-10-01'
TEST_END_DATE = '2023-03-01'

In [4]:
stock = YahooDownloader(start_date = TRAIN_START_DATE,
                     end_date = TEST_END_DATE,
                     ticker_list = config_tickers.DOW_30_TICKER).fetch_data()

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

Shape of DataFrame:  (97013, 8)


In [5]:
stock # this DF contains all the stock price history from the train start date to the test end date, for every firm in the DJ30

Unnamed: 0,date,open,high,low,close,volume,tic,day
0,2010-01-04,7.622500,7.660714,7.585000,6.470741,493729600,AAPL,0
1,2010-01-04,56.630001,57.869999,56.560001,41.200794,5277400,AMGN,0
2,2010-01-04,40.810001,41.099998,40.389999,33.090427,6894300,AXP,0
3,2010-01-04,55.720001,56.389999,54.799999,43.777550,6186700,BA,0
4,2010-01-04,57.650002,59.189999,57.509998,40.190231,7325600,CAT,0
...,...,...,...,...,...,...,...,...
97008,2023-02-28,482.670013,483.359985,473.920013,465.066833,3902100,UNH,1
97009,2023-02-28,220.000000,221.770004,219.500000,217.824524,5385400,V,1
97010,2023-02-28,38.700001,38.970001,38.549999,35.538387,16685300,VZ,1
97011,2023-02-28,35.480000,35.779999,35.320000,32.750977,8847000,WBA,1


In [6]:
stock.day.describe()

count    97013.000000
mean         2.024131
std          1.398530
min          0.000000
25%          1.000000
50%          2.000000
75%          3.000000
max          4.000000
Name: day, dtype: float64

Stock symbols

In [7]:
tickers = stock.tic.unique()

We wanted to get:

In [8]:
print(np.reshape(config_tickers.DOW_30_TICKER, (len(config_tickers.DOW_30_TICKER))))
print("Number of firms in the index: ",len(config_tickers.DOW_30_TICKER))

['AXP' 'AMGN' 'AAPL' 'BA' 'CAT' 'CSCO' 'CVX' 'GS' 'HD' 'HON' 'IBM' 'INTC'
 'JNJ' 'KO' 'JPM' 'MCD' 'MMM' 'MRK' 'MSFT' 'NKE' 'PG' 'TRV' 'UNH' 'CRM'
 'VZ' 'V' 'WBA' 'WMT' 'DIS' 'DOW']
Number of firms in the index:  30


Creo que DOW es el DJIA, la media

But we got (because of the unavailable timezone or price data):

In [9]:
print(tickers) # stocks we have to train the agent
print("Number of firms downloaded: ",len(tickers))

['AAPL' 'AMGN' 'AXP' 'BA' 'CAT' 'CRM' 'CSCO' 'CVX' 'DIS' 'GS' 'HD' 'HON'
 'IBM' 'INTC' 'JNJ' 'JPM' 'KO' 'MCD' 'MMM' 'MRK' 'MSFT' 'NKE' 'PG' 'TRV'
 'UNH' 'V' 'VZ' 'WBA' 'WMT' 'DOW']
Number of firms downloaded:  30


## Agent 2

with following technical indicators:

In [10]:
# let's first take 4 technical indicators randomly from the supported indicators list
INDICATORS = [INDICATORS_LIST_ALL[i] for i in random.sample(range(len(INDICATORS_LIST_ALL)), 4)]

In [11]:
INDICATORS

['wr', 'tema', 'tr', 'ppo']

In [12]:
# creamos instancia de FeatureEngineer indicando la configuración deseada para hacer el preproceso
fe = FeatureEngineer(
                    use_technical_indicator=True,
                    tech_indicator_list = INDICATORS,
                    use_vix=True, # AUN NO SE QUE ES ESTO
                    use_turbulence=True, # measures extreme asset price fluctuation --> if the turbulence index reaches a pre-defined threshold, the agent will halt buying action and start selling the holding shares gradually
                    user_defined_feature = False) # MIRA A VER QUÉ ERA DEFINIRSE UNA FEATURE (creo que es )

processed = fe.preprocess_data(stock)

Successfully added technical indicators


[*********************100%%**********************]  1 of 1 completed


Shape of DataFrame:  (3310, 8)
Successfully added vix
Successfully added turbulence index


Comentario: hay indicadores que pese a estar documentados como soportados en https://pypi.org/project/stockstats/, esta versión de código no los reconoce. Habrá que comprobar mediante más pruebas que todos los incluídos en la lista de todos los indicadores, estén soportados por esta versión. 

In [13]:
processed

Unnamed: 0,date,open,high,low,close,volume,tic,day,wr,tema,tr,ppo,vix,turbulence
0,2010-01-04,7.622500,7.660714,7.585000,6.470741,493729600,AAPL,0,-1571.666541,6.470741,1.189973,0.000000,20.040001,0.00000
1,2010-01-04,56.630001,57.869999,56.560001,41.200794,5277400,AMGN,0,-1272.460746,41.200794,16.669205,0.000000,20.040001,0.00000
2,2010-01-04,40.810001,41.099998,40.389999,33.090427,6894300,AXP,0,-1128.110057,33.090427,8.009571,0.000000,20.040001,0.00000
3,2010-01-04,55.720001,56.389999,54.799999,43.777550,6186700,BA,0,-793.235751,43.777550,12.612450,0.000000,20.040001,0.00000
4,2010-01-04,57.650002,59.189999,57.509998,40.190231,7325600,CAT,0,-1130.938325,40.190231,18.999767,0.000000,20.040001,0.00000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95985,2023-02-27,488.769989,490.940002,481.959991,472.278229,3006200,UNH,0,-89.965306,472.307670,17.674835,-0.443907,20.950001,7.16379
95986,2023-02-27,220.729996,221.440002,219.339996,218.230591,4255300,V,0,-94.869609,217.546534,4.001724,-0.111513,20.950001,7.16379
95987,2023-02-27,38.990002,39.150002,38.630001,35.602482,14210900,VZ,0,-179.728178,35.438624,3.675713,-0.842037,20.950001,7.16379
95988,2023-02-27,36.049999,36.080002,35.270000,32.621929,5580100,WBA,0,-242.369377,32.709821,3.080139,-0.497050,20.950001,7.16379


In order to allow the Agent to have a consistent data structure to work with, we need to ensure that each stock has the same data range. This is necessary because not all stocks have data for every trading day due to holidays, stock-specific trading suspensions, or newly listed stocks. Therefore, we will establish a common data interval for each stock with day granularity and fill with 0 the (the more NaNs we have, the worst the performance will be). --> We can check this effect with other Stock Indices with less missing stock prices.

Convert the stock column to list

In [14]:
list_ticker = processed["tic"].unique().tolist()
print(list_ticker)
len(list_ticker)

['AAPL', 'AMGN', 'AXP', 'BA', 'CAT', 'CRM', 'CSCO', 'CVX', 'DIS', 'GS', 'HD', 'HON', 'IBM', 'INTC', 'JNJ', 'JPM', 'KO', 'MCD', 'MMM', 'MRK', 'MSFT', 'NKE', 'PG', 'TRV', 'UNH', 'V', 'VZ', 'WBA', 'WMT']


29

Create a list from the minimum to the maximum date and with the same granularity

In [15]:
list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str))
list_date

['2010-01-04',
 '2010-01-05',
 '2010-01-06',
 '2010-01-07',
 '2010-01-08',
 '2010-01-09',
 '2010-01-10',
 '2010-01-11',
 '2010-01-12',
 '2010-01-13',
 '2010-01-14',
 '2010-01-15',
 '2010-01-16',
 '2010-01-17',
 '2010-01-18',
 '2010-01-19',
 '2010-01-20',
 '2010-01-21',
 '2010-01-22',
 '2010-01-23',
 '2010-01-24',
 '2010-01-25',
 '2010-01-26',
 '2010-01-27',
 '2010-01-28',
 '2010-01-29',
 '2010-01-30',
 '2010-01-31',
 '2010-02-01',
 '2010-02-02',
 '2010-02-03',
 '2010-02-04',
 '2010-02-05',
 '2010-02-06',
 '2010-02-07',
 '2010-02-08',
 '2010-02-09',
 '2010-02-10',
 '2010-02-11',
 '2010-02-12',
 '2010-02-13',
 '2010-02-14',
 '2010-02-15',
 '2010-02-16',
 '2010-02-17',
 '2010-02-18',
 '2010-02-19',
 '2010-02-20',
 '2010-02-21',
 '2010-02-22',
 '2010-02-23',
 '2010-02-24',
 '2010-02-25',
 '2010-02-26',
 '2010-02-27',
 '2010-02-28',
 '2010-03-01',
 '2010-03-02',
 '2010-03-03',
 '2010-03-04',
 '2010-03-05',
 '2010-03-06',
 '2010-03-07',
 '2010-03-08',
 '2010-03-09',
 '2010-03-10',
 '2010-03-

Assign every day to each stock

In [16]:
combination = list(itertools.product(list_date,list_ticker))
combination

[('2010-01-04', 'AAPL'),
 ('2010-01-04', 'AMGN'),
 ('2010-01-04', 'AXP'),
 ('2010-01-04', 'BA'),
 ('2010-01-04', 'CAT'),
 ('2010-01-04', 'CRM'),
 ('2010-01-04', 'CSCO'),
 ('2010-01-04', 'CVX'),
 ('2010-01-04', 'DIS'),
 ('2010-01-04', 'GS'),
 ('2010-01-04', 'HD'),
 ('2010-01-04', 'HON'),
 ('2010-01-04', 'IBM'),
 ('2010-01-04', 'INTC'),
 ('2010-01-04', 'JNJ'),
 ('2010-01-04', 'JPM'),
 ('2010-01-04', 'KO'),
 ('2010-01-04', 'MCD'),
 ('2010-01-04', 'MMM'),
 ('2010-01-04', 'MRK'),
 ('2010-01-04', 'MSFT'),
 ('2010-01-04', 'NKE'),
 ('2010-01-04', 'PG'),
 ('2010-01-04', 'TRV'),
 ('2010-01-04', 'UNH'),
 ('2010-01-04', 'V'),
 ('2010-01-04', 'VZ'),
 ('2010-01-04', 'WBA'),
 ('2010-01-04', 'WMT'),
 ('2010-01-05', 'AAPL'),
 ('2010-01-05', 'AMGN'),
 ('2010-01-05', 'AXP'),
 ('2010-01-05', 'BA'),
 ('2010-01-05', 'CAT'),
 ('2010-01-05', 'CRM'),
 ('2010-01-05', 'CSCO'),
 ('2010-01-05', 'CVX'),
 ('2010-01-05', 'DIS'),
 ('2010-01-05', 'GS'),
 ('2010-01-05', 'HD'),
 ('2010-01-05', 'HON'),
 ('2010-01-05', 'IB

Merge with stock DF (potentially resulting in NaN values)

In [17]:
processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left")

Remove non-trading days generated with the date range

In [18]:
processed_full = processed_full[processed_full['date'].isin(processed['date'])]

In [19]:
processed_full

Unnamed: 0,date,tic,open,high,low,close,volume,day,wr,tema,tr,ppo,vix,turbulence
0,2010-01-04,AAPL,7.622500,7.660714,7.585000,6.470741,493729600.0,0.0,-1571.666541,6.470741,1.189973,0.000000,20.040001,0.00000
1,2010-01-04,AMGN,56.630001,57.869999,56.560001,41.200794,5277400.0,0.0,-1272.460746,41.200794,16.669205,0.000000,20.040001,0.00000
2,2010-01-04,AXP,40.810001,41.099998,40.389999,33.090427,6894300.0,0.0,-1128.110057,33.090427,8.009571,0.000000,20.040001,0.00000
3,2010-01-04,BA,55.720001,56.389999,54.799999,43.777550,6186700.0,0.0,-793.235751,43.777550,12.612450,0.000000,20.040001,0.00000
4,2010-01-04,CAT,57.650002,59.189999,57.509998,40.190231,7325600.0,0.0,-1130.938325,40.190231,18.999767,0.000000,20.040001,0.00000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139282,2023-02-27,UNH,488.769989,490.940002,481.959991,472.278229,3006200.0,0.0,-89.965306,472.307670,17.674835,-0.443907,20.950001,7.16379
139283,2023-02-27,V,220.729996,221.440002,219.339996,218.230591,4255300.0,0.0,-94.869609,217.546534,4.001724,-0.111513,20.950001,7.16379
139284,2023-02-27,VZ,38.990002,39.150002,38.630001,35.602482,14210900.0,0.0,-179.728178,35.438624,3.675713,-0.842037,20.950001,7.16379
139285,2023-02-27,WBA,36.049999,36.080002,35.270000,32.621929,5580100.0,0.0,-242.369377,32.709821,3.080139,-0.497050,20.950001,7.16379


In [20]:
len(processed_full.tic.unique())

29

In [21]:
processed_full.info()

<class 'pandas.core.frame.DataFrame'>
Index: 95990 entries, 0 to 139286
Data columns (total 14 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   date        95990 non-null  object 
 1   tic         95990 non-null  object 
 2   open        95990 non-null  float64
 3   high        95990 non-null  float64
 4   low         95990 non-null  float64
 5   close       95990 non-null  float64
 6   volume      95990 non-null  float64
 7   day         95990 non-null  float64
 8   wr          95990 non-null  float64
 9   tema        95990 non-null  float64
 10  tr          95990 non-null  float64
 11  ppo         95990 non-null  float64
 12  vix         95990 non-null  float64
 13  turbulence  95990 non-null  float64
dtypes: float64(12), object(2)
memory usage: 11.0+ MB


In [22]:
processed_full.describe()

Unnamed: 0,open,high,low,close,volume,day,wr,tema,tr,ppo,vix,turbulence
count,95990.0,95990.0,95990.0,95990.0,95990.0,95990.0,95990.0,95990.0,95990.0,95990.0,95990.0,95990.0
mean,102.224117,103.175365,101.253646,88.243726,19953830.0,2.024471,-324.878211,88.243808,15.109345,0.313405,18.721163,34.943613
std,73.520545,74.279832,72.738958,70.60797,62042780.0,1.398649,255.970194,70.61135,11.574239,1.623517,7.269653,43.066003
min,6.870357,7.0,6.794643,5.806766,305400.0,0.0,-6500.047684,5.850036,0.2075,-27.19445,9.14,0.0
25%,47.470001,47.91,47.02,38.935165,4003909.0,1.0,-440.691359,38.924815,6.316296,-0.528837,13.63,14.972892
50%,83.498051,84.139999,82.830002,67.654842,7250700.0,2.0,-255.30986,67.653497,12.613668,0.426594,16.875,24.17559
75%,138.477531,139.731735,137.169998,118.501177,14986720.0,3.0,-141.145481,118.525987,20.509214,1.294678,21.76,40.000666
max,555.0,558.099976,550.130005,540.80011,1880998000.0,4.0,-0.0,540.457223,80.821434,11.380167,82.690002,652.616689


In this case, there are no nulls, but if there were we would need to fill them in the following way. 

In [23]:
processed_full = processed_full.sort_values(['date','tic']) # we need to sort bc of the data_split method CHECK SOURCE CODE 

processed_full = processed_full.fillna(0)

In [24]:
processed_full.describe()

Unnamed: 0,open,high,low,close,volume,day,wr,tema,tr,ppo,vix,turbulence
count,95990.0,95990.0,95990.0,95990.0,95990.0,95990.0,95990.0,95990.0,95990.0,95990.0,95990.0,95990.0
mean,102.224117,103.175365,101.253646,88.243726,19953830.0,2.024471,-324.878211,88.243808,15.109345,0.313405,18.721163,34.943613
std,73.520545,74.279832,72.738958,70.60797,62042780.0,1.398649,255.970194,70.61135,11.574239,1.623517,7.269653,43.066003
min,6.870357,7.0,6.794643,5.806766,305400.0,0.0,-6500.047684,5.850036,0.2075,-27.19445,9.14,0.0
25%,47.470001,47.91,47.02,38.935165,4003909.0,1.0,-440.691359,38.924815,6.316296,-0.528837,13.63,14.972892
50%,83.498051,84.139999,82.830002,67.654842,7250700.0,2.0,-255.30986,67.653497,12.613668,0.426594,16.875,24.17559
75%,138.477531,139.731735,137.169998,118.501177,14986720.0,3.0,-141.145481,118.525987,20.509214,1.294678,21.76,40.000666
max,555.0,558.099976,550.130005,540.80011,1880998000.0,4.0,-0.0,540.457223,80.821434,11.380167,82.690002,652.616689


In [25]:
large_value = 1e9

processed_full.replace([np.inf], large_value, inplace=True)

In [26]:
processed_full.describe()

Unnamed: 0,open,high,low,close,volume,day,wr,tema,tr,ppo,vix,turbulence
count,95990.0,95990.0,95990.0,95990.0,95990.0,95990.0,95990.0,95990.0,95990.0,95990.0,95990.0,95990.0
mean,102.224117,103.175365,101.253646,88.243726,19953830.0,2.024471,-324.878211,88.243808,15.109345,0.313405,18.721163,34.943613
std,73.520545,74.279832,72.738958,70.60797,62042780.0,1.398649,255.970194,70.61135,11.574239,1.623517,7.269653,43.066003
min,6.870357,7.0,6.794643,5.806766,305400.0,0.0,-6500.047684,5.850036,0.2075,-27.19445,9.14,0.0
25%,47.470001,47.91,47.02,38.935165,4003909.0,1.0,-440.691359,38.924815,6.316296,-0.528837,13.63,14.972892
50%,83.498051,84.139999,82.830002,67.654842,7250700.0,2.0,-255.30986,67.653497,12.613668,0.426594,16.875,24.17559
75%,138.477531,139.731735,137.169998,118.501177,14986720.0,3.0,-141.145481,118.525987,20.509214,1.294678,21.76,40.000666
max,555.0,558.099976,550.130005,540.80011,1880998000.0,4.0,-0.0,540.457223,80.821434,11.380167,82.690002,652.616689


Split Train and Test datasets

In [27]:
train = data_split(processed_full, TRAIN_START_DATE,TRAIN_END_DATE)
train

Unnamed: 0,date,tic,open,high,low,close,volume,day,wr,tema,tr,ppo,vix,turbulence
0,2010-01-04,AAPL,7.622500,7.660714,7.585000,6.470741,493729600.0,0.0,-1571.666541,6.470741,1.189973,0.000000,20.040001,0.000000
0,2010-01-04,AMGN,56.630001,57.869999,56.560001,41.200794,5277400.0,0.0,-1272.460746,41.200794,16.669205,0.000000,20.040001,0.000000
0,2010-01-04,AXP,40.810001,41.099998,40.389999,33.090427,6894300.0,0.0,-1128.110057,33.090427,8.009571,0.000000,20.040001,0.000000
0,2010-01-04,BA,55.720001,56.389999,54.799999,43.777550,6186700.0,0.0,-793.235751,43.777550,12.612450,0.000000,20.040001,0.000000
0,2010-01-04,CAT,57.650002,59.189999,57.509998,40.190231,7325600.0,0.0,-1130.938325,40.190231,18.999767,0.000000,20.040001,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2956,2021-09-30,UNH,401.489990,403.489990,390.459991,375.759735,3779900.0,3.0,-144.627369,377.412716,18.430878,-1.082587,23.139999,24.927554
2956,2021-09-30,V,227.580002,228.789993,222.630005,218.139236,7128500.0,3.0,-89.252418,219.417838,6.802124,-0.684829,23.139999,24.927554
2956,2021-09-30,VZ,54.500000,54.509998,54.000000,45.622715,18736600.0,3.0,-954.825400,45.716093,8.574745,-0.471766,23.139999,24.927554
2956,2021-09-30,WBA,48.790001,48.930000,46.919998,40.519115,6449400.0,3.0,-261.638334,40.940077,6.981308,-0.556576,23.139999,24.927554


In [28]:
len(train.tic.unique())

29

In [29]:
train.tic.unique()

array(['AAPL', 'AMGN', 'AXP', 'BA', 'CAT', 'CRM', 'CSCO', 'CVX', 'DIS',
       'GS', 'HD', 'HON', 'IBM', 'INTC', 'JNJ', 'JPM', 'KO', 'MCD', 'MMM',
       'MRK', 'MSFT', 'NKE', 'PG', 'TRV', 'UNH', 'V', 'VZ', 'WBA', 'WMT'],
      dtype=object)

In [30]:
trade = data_split(processed_full, TEST_START_DATE,TEST_END_DATE)
trade

Unnamed: 0,date,tic,open,high,low,close,volume,day,wr,tema,tr,ppo,vix,turbulence
0,2021-10-01,AAPL,141.899994,142.919998,139.110001,140.653503,94639600.0,4.0,-87.094465,140.060799,3.809998,-1.173437,21.100000,120.031078
0,2021-10-01,AMGN,213.589996,214.610001,210.800003,195.822266,2629400.0,4.0,-264.229665,195.572024,19.950317,-1.490716,21.100000,120.031078
0,2021-10-01,AXP,168.500000,175.119995,168.479996,167.727463,3956000.0,4.0,-53.105693,165.806757,13.573593,1.390034,21.100000,120.031078
0,2021-10-01,BA,222.850006,226.720001,220.600006,226.000000,9113600.0,4.0,-16.830422,224.813103,6.779999,0.333270,21.100000,120.031078
0,2021-10-01,CAT,192.899994,195.869995,191.240005,183.514008,3695500.0,4.0,-117.674595,183.268519,14.584625,-1.871326,21.100000,120.031078
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
352,2023-02-27,UNH,488.769989,490.940002,481.959991,472.278229,3006200.0,0.0,-89.965306,472.307670,17.674835,-0.443907,20.950001,7.163790
352,2023-02-27,V,220.729996,221.440002,219.339996,218.230591,4255300.0,0.0,-94.869609,217.546534,4.001724,-0.111513,20.950001,7.163790
352,2023-02-27,VZ,38.990002,39.150002,38.630001,35.602482,14210900.0,0.0,-179.728178,35.438624,3.675713,-0.842037,20.950001,7.163790
352,2023-02-27,WBA,36.049999,36.080002,35.270000,32.621929,5580100.0,0.0,-242.369377,32.709821,3.080139,-0.497050,20.950001,7.163790


In [31]:
train_length = len(train)
trade_length = len(trade)
print("Number of training samples: ", train_length)
print("Number of testing samples", trade_length)

Number of training samples:  85753
Number of testing samples 10237


In [32]:
train.tail()

Unnamed: 0,date,tic,open,high,low,close,volume,day,wr,tema,tr,ppo,vix,turbulence
2956,2021-09-30,UNH,401.48999,403.48999,390.459991,375.759735,3779900.0,3.0,-144.627369,377.412716,18.430878,-1.082587,23.139999,24.927554
2956,2021-09-30,V,227.580002,228.789993,222.630005,218.139236,7128500.0,3.0,-89.252418,219.417838,6.802124,-0.684829,23.139999,24.927554
2956,2021-09-30,VZ,54.5,54.509998,54.0,45.622715,18736600.0,3.0,-954.8254,45.716093,8.574745,-0.471766,23.139999,24.927554
2956,2021-09-30,WBA,48.790001,48.93,46.919998,40.519115,6449400.0,3.0,-261.638334,40.940077,6.981308,-0.556576,23.139999,24.927554
2956,2021-09-30,WMT,46.880001,47.243332,46.416668,44.56654,22457700.0,3.0,-170.258086,44.599905,2.337872,-1.105781,23.139999,24.927554


In [33]:
trade.head()

Unnamed: 0,date,tic,open,high,low,close,volume,day,wr,tema,tr,ppo,vix,turbulence
0,2021-10-01,AAPL,141.899994,142.919998,139.110001,140.653503,94639600.0,4.0,-87.094465,140.060799,3.809998,-1.173437,21.1,120.031078
0,2021-10-01,AMGN,213.589996,214.610001,210.800003,195.822266,2629400.0,4.0,-264.229665,195.572024,19.950317,-1.490716,21.1,120.031078
0,2021-10-01,AXP,168.5,175.119995,168.479996,167.727463,3956000.0,4.0,-53.105693,165.806757,13.573593,1.390034,21.1,120.031078
0,2021-10-01,BA,222.850006,226.720001,220.600006,226.0,9113600.0,4.0,-16.830422,224.813103,6.779999,0.33327,21.1,120.031078
0,2021-10-01,CAT,192.899994,195.869995,191.240005,183.514008,3695500.0,4.0,-117.674595,183.268519,14.584625,-1.871326,21.1,120.031078


Since we need to set the parameters for the environment functions, we need to compute the stock dimension and state space.

In [34]:
# Number of unique stocks used for the training 
stock_dimension = len(train.tic.unique())
# {balance, close price, shares, N-technical indicators}
# Balance will occupy 1 input node, and it is computed as: balance = balance (t) − amount of money we pay to buy shares + amount of money we receive to sell shares
# We will have N input nodes for the stock prices and N additional input nodes to indicate the current number of shares for each stock --> 2*N
# we will have one node for every technical indicator for every stock --> M indicators * N stocks
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
print(f"Stock Dimension (Number of different companies in which we want to invest initially): {stock_dimension}, State Space (Number of input nodes to feed to the network): {state_space}")

Stock Dimension (Number of different companies in which we want to invest initially): 29, State Space (Number of input nodes to feed to the network): 175


Most of the trading companies demand trading commissions or costs. Here, we will simulate a buying and selling commission of 0,1% for each transaction. We will assume the user starts having no shares of any stock. Let's also assume we have 1.000.000 dollars to invest with. We will also define a parameter that sets the maximum amount of shares to trade, h_max. It is interesting to set the scaling reward or gamma (importance we give to the future reward), so that we specify if we give more importance to the short or long term. We could modify these parameters, if needed.

In [35]:
buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension
h_max = 100
initial_amount = 1000000
gamma = 1e-4 # we are assuming we want to get the highest return in the short term, preventing ourselves from the risk and uncertainty caused by external factors in the long term

HAZ PRUEBAS CON UN GAMMA MÁS ALTO

Let's set the configuration that will be passed to the Environment class

In [36]:
env_kwargs = {
    "hmax": h_max,
    "initial_amount": initial_amount, 
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list,
    "sell_cost_pct": sell_cost_list,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": gamma
}

Let's create the Stock Trading Environment!

In [37]:
e_train_gym = StockTradingEnv(df = train, **env_kwargs) # creates instance of the Environment class
print("Environment class type", type(e_train_gym))
env_train, _ = e_train_gym.get_sb_env() # resets the environment and converts the initial environment into a DummyVecEnv instance
print("External Environment class type: ", type(env_train))

Environment class type <class 'finrl.meta.env_stock_trading.env_stocktrading.StockTradingEnv'>
External Environment class type:  <class 'stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv'>


Since we will use different RL algorithms, let's set some flags:

In [38]:
if_using_a2c = True
if_using_ddpg = True
if_using_ppo = True
if_using_td3 = True
if_using_sac = True

Let's create the Deep Reinforcement Learning Agent!

podríamos runear todos los ind (p.e. 3 o 4 ind) y con los mejores, cambiar el resto de parametros (fechas, gamma,...)

# A2C

In [39]:
agent = DRLAgent(env = env_train) # creates Agent instance
model_a2c = agent.get_model("a2c") # gets stablebaselines3 model  

if if_using_a2c: # where to store the results
  # set up logger
  tmp_path = 'resultadosTFG' + '/set_2' + '/a2c'
  new_logger_a2c = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_a2c.set_logger(new_logger_a2c)

{'n_steps': 5, 'ent_coef': 0.01, 'learning_rate': 0.0007}
Using cpu device
Logging to resultadosTFGset_2/a2c


Let's train the agent!

In [40]:
trained_a2c = agent.train_model(model=model_a2c, 
                             tb_log_name='a2c',
                             total_timesteps=50000) if if_using_a2c else None

--------------------------------------
| time/                 |            |
|    fps                | 167        |
|    iterations         | 100        |
|    time_elapsed       | 2          |
|    total_timesteps    | 500        |
| train/                |            |
|    entropy_loss       | -41.4      |
|    explained_variance | 0.256      |
|    learning_rate      | 0.0007     |
|    n_updates          | 99         |
|    policy_loss        | 73.5       |
|    reward             | 0.63731694 |
|    std                | 1.01       |
|    value_loss         | 3.96       |
--------------------------------------
--------------------------------------
| time/                 |            |
|    fps                | 176        |
|    iterations         | 200        |
|    time_elapsed       | 5          |
|    total_timesteps    | 1000       |
| train/                |            |
|    entropy_loss       | -41.4      |
|    explained_variance | 0          |
|    learning_rate      |

In [41]:
trained_a2c.policy

ActorCriticPolicy(
  (features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (pi_features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (vf_features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (mlp_extractor): MlpExtractor(
    (policy_net): Sequential(
      (0): Linear(in_features=175, out_features=64, bias=True)
      (1): Tanh()
      (2): Linear(in_features=64, out_features=64, bias=True)
      (3): Tanh()
    )
    (value_net): Sequential(
      (0): Linear(in_features=175, out_features=64, bias=True)
      (1): Tanh()
      (2): Linear(in_features=64, out_features=64, bias=True)
      (3): Tanh()
    )
  )
  (action_net): Linear(in_features=64, out_features=29, bias=True)
  (value_net): Linear(in_features=64, out_features=1, bias=True)
)

Let's see the learning progress of the agent

In [43]:
learning_process = pd.read_csv("resultadosTFG/set_2/a2c/progress.csv")
learning_process

Unnamed: 0,train/policy_loss,time/fps,train/explained_variance,train/n_updates,train/std,train/reward,time/total_timesteps,train/learning_rate,train/entropy_loss,time/iterations,train/value_loss,time/time_elapsed
0,73.505234,167,2.561209e-01,99,1.009769,0.637317,500,0.0007,-41.426437,100,3.961940,2
1,-62.210258,176,0.000000e+00,199,1.010280,-0.834494,1000,0.0007,-41.430725,200,4.348250,5
2,14.665599,181,4.996121e-03,299,1.007172,-6.172693,1500,0.0007,-41.351318,300,12.806152,8
3,-157.536713,182,0.000000e+00,399,1.005637,3.044148,2000,0.0007,-41.307663,400,27.322723,10
4,-78.345627,183,2.942479e-02,499,1.006378,-1.295270,2500,0.0007,-41.331585,500,36.746922,13
...,...,...,...,...,...,...,...,...,...,...,...,...
95,34.898560,163,0.000000e+00,9599,1.022325,-1.645731,48000,0.0007,-41.733799,9600,2.234959,293
96,-1.477930,163,-1.192093e-07,9699,1.024604,-0.450692,48500,0.0007,-41.797241,9700,0.489941,296
97,-349.695129,163,0.000000e+00,9799,1.024909,-1.290070,49000,0.0007,-41.811863,9800,63.712025,299
98,-68.463356,163,-1.192093e-07,9899,1.024596,-2.558731,49500,0.0007,-41.797470,9900,5.505448,303


Plot the training results

In [44]:
def plot_metrics(df, n_rows, n_cols):
    fig, axes = plt.subplots(nrows=n_rows, ncols=n_cols, figsize=(5 * n_cols, 5 * n_rows))
    axes = axes.flatten()  # Flatten the array of axes if more than one row or column

    # List of tuples containing (y-axis, title)
    metrics = [
        ('train/reward', 'Reward'),
        ('train/policy_loss', 'Policy Loss'),
        ('train/std', 'Standard Deviation'),
        ('train/n_updates', 'Number of Updates'),
        ('train/value_loss', 'Value Loss'),
        ('time/fps', 'Frames Per Second'),
        ('train/explained_variance', 'Explained Variance'),
        ('train/entropy_loss', 'Entropy Loss')
    ]

    # Plot each metric
    for ax, (metric, title) in zip(axes, metrics):
        ax.plot(df['time/total_timesteps'], df[metric], label=title)
        ax.set_xlabel('Total Timesteps')
        ax.set_ylabel(title)
        ax.set_title(title)
        ax.grid(True)
        ax.legend()

    plt.tight_layout()
    plt.savefig(f'resultadosTFG/set_2/a2c/trainingStatsResults.png')
    plt.close()

df = learning_process
plot_metrics(df, n_rows=4, n_cols=2)  # Modify n_rows and n_cols as needed


The agent is not learning very well... 

Podríamos hacer una tabla resumen de todos los algos y pones cuántos steps han sido necesarios, la LR de cada uno, etc

# PPO

Set PPO parameters and set the logger

In [45]:
agent = DRLAgent(env = env_train)
PPO_PARAMS = {
    "n_steps": 2048,
    "ent_coef": 0.01,
    "learning_rate": 0.00025,
    "batch_size": 128,
}
model_ppo = agent.get_model("ppo",model_kwargs = PPO_PARAMS)

if if_using_ppo:
  # set up logger
  tmp_path = 'resultadosTFG' + '/set_2' + '/ppo'
  new_logger_ppo = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_ppo.set_logger(new_logger_ppo)

{'n_steps': 2048, 'ent_coef': 0.01, 'learning_rate': 0.00025, 'batch_size': 128}
Using cpu device
Logging to resultadosTFG/set_2/ppo


Train PPO

In [46]:
trained_ppo = agent.train_model(model=model_ppo, 
                             tb_log_name='ppo',
                             total_timesteps=50000) if if_using_ppo else None

----------------------------------
| time/              |           |
|    fps             | 205       |
|    iterations      | 1         |
|    time_elapsed    | 9         |
|    total_timesteps | 2048      |
| train/             |           |
|    reward          | 1.3900071 |
----------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 193         |
|    iterations           | 2           |
|    time_elapsed         | 21          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.015176109 |
|    clip_fraction        | 0.185       |
|    clip_range           | 0.2         |
|    entropy_loss         | -41.2       |
|    explained_variance   | -0.00193    |
|    learning_rate        | 0.00025     |
|    loss                 | 6.17        |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.0252     |
|    reward  

In [47]:
trained_ppo.policy

ActorCriticPolicy(
  (features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (pi_features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (vf_features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (mlp_extractor): MlpExtractor(
    (policy_net): Sequential(
      (0): Linear(in_features=175, out_features=64, bias=True)
      (1): Tanh()
      (2): Linear(in_features=64, out_features=64, bias=True)
      (3): Tanh()
    )
    (value_net): Sequential(
      (0): Linear(in_features=175, out_features=64, bias=True)
      (1): Tanh()
      (2): Linear(in_features=64, out_features=64, bias=True)
      (3): Tanh()
    )
  )
  (action_net): Linear(in_features=64, out_features=29, bias=True)
  (value_net): Linear(in_features=64, out_features=1, bias=True)
)

Let's see the learning progress of the agent

In [48]:
learning_process = pd.read_csv("resultadosTFG/set_2/ppo/progress.csv")
learning_process

Unnamed: 0,time/fps,train/reward,time/total_timesteps,time/iterations,time/time_elapsed,train/approx_kl,train/loss,train/explained_variance,train/clip_fraction,train/n_updates,train/std,train/clip_range,train/learning_rate,train/policy_gradient_loss,train/entropy_loss,train/value_loss
0,205,1.390007,2048,1,9,,,,,,,,,,,
1,193,-0.274912,4096,2,21,0.015176,6.166244,-0.00193,0.185303,10.0,1.002421,0.2,0.00025,-0.025244,-41.192647,10.949272
2,189,1.412772,6144,3,32,0.016409,24.042761,0.013376,0.19248,20.0,1.00227,0.2,0.00025,-0.022329,-41.214805,48.756867
3,186,0.391481,8192,4,43,0.016813,30.559006,0.009237,0.179932,30.0,1.003562,0.2,0.00025,-0.018525,-41.237341,56.495911
4,187,-1.286448,10240,5,54,0.017119,12.572822,0.026538,0.133545,40.0,1.005132,0.2,0.00025,-0.015929,-41.26412,27.048776
5,187,0.359147,12288,6,65,0.013946,24.198181,-0.00495,0.175879,50.0,1.007625,0.2,0.00025,-0.012896,-41.332831,77.699887
6,186,2.098685,14336,7,76,0.02238,93.643967,-0.00583,0.215186,60.0,1.009945,0.2,0.00025,-0.018697,-41.404733,83.849098
7,186,-1.573003,16384,8,87,0.020381,9.391601,0.010766,0.192578,70.0,1.009513,0.2,0.00025,-0.021541,-41.429351,18.721597
8,186,0.943145,18432,9,98,0.016326,21.557358,0.031291,0.160352,80.0,1.01149,0.2,0.00025,-0.019609,-41.448763,51.501638
9,186,-0.908756,20480,10,109,0.017898,38.893581,0.009254,0.170166,90.0,1.013173,0.2,0.00025,-0.011378,-41.506076,51.667449


Plot the training results

In [49]:
def plot_metrics(df, n_rows, n_cols):
    fig, axes = plt.subplots(nrows=n_rows, ncols=n_cols, figsize=(5 * n_cols, 5 * n_rows))
    axes = axes.flatten()  # Flatten the array of axes if more than one row or column

    metrics = [
        ('train/reward', 'Reward'),
        ('train/loss', 'Loss'),
        ('train/std', 'Standard Deviation'),
        ('train/approx_kl', 'approximate Kullback-Leibler divergence'),
        ('train/clip_fraction', 'Clip Fraction'),
        ('train/clip_range', 'Clip Range'),
        ('train/explained_variance', 'Explained Variance'),
        ('train/value_loss', 'Value Loss'),
        ('train/policy_gradient_loss', 'Policy Gradient Loss'),
        ('train/entropy_loss', 'Entropy Loss')
    ]

    # Plot each metric
    for ax, (metric, title) in zip(axes, metrics):
        ax.plot(df['time/total_timesteps'], df[metric], label=title)
        ax.set_xlabel('Total Timesteps')
        ax.set_ylabel(title)
        ax.set_title(title)
        ax.grid(True)
        ax.legend()

    plt.tight_layout()
    plt.savefig(f'resultadosTFG/set_2/ppo/trainingStatsResults.png')
    plt.close()

df = learning_process
plot_metrics(df, n_rows=5, n_cols=2)  # Modify n_rows and n_cols as needed

# DDPG

Create the agent and set the logger

In [50]:
agent = DRLAgent(env = env_train)
model_ddpg = agent.get_model("ddpg")

if if_using_ddpg:
  # set up logger
  tmp_path = 'resultadosTFG' + '/set_2' + '/ddpg'
  new_logger_ddpg = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_ddpg.set_logger(new_logger_ddpg)

{'batch_size': 128, 'buffer_size': 50000, 'learning_rate': 0.001}
Using cpu device
Logging to resultadosTFG/set_2/ddpg


Train the agent

In [51]:
trained_ddpg = agent.train_model(model=model_ddpg, 
                             tb_log_name='ddpg',
                             total_timesteps=50000) if if_using_ddpg else None

day: 2956, episode: 40
begin_total_asset: 1000000.00
end_total_asset: 4745881.95
total_reward: 3745881.95
total_cost: 1752.77
total_trades: 41517
Sharpe: 0.797
----------------------------------
| time/              |           |
|    episodes        | 4         |
|    fps             | 101       |
|    time_elapsed    | 117       |
|    total_timesteps | 11828     |
| train/             |           |
|    actor_loss      | -19.1     |
|    critic_loss     | 76.1      |
|    learning_rate   | 0.001     |
|    n_updates       | 8871      |
|    reward          | -9.484797 |
----------------------------------
----------------------------------
| time/              |           |
|    episodes        | 8         |
|    fps             | 97        |
|    time_elapsed    | 243       |
|    total_timesteps | 23656     |
| train/             |           |
|    actor_loss      | -11.7     |
|    critic_loss     | 7.86      |
|    learning_rate   | 0.001     |
|    n_updates       | 20699     |


In [52]:
trained_ddpg.policy

TD3Policy(
  (actor): Actor(
    (features_extractor): FlattenExtractor(
      (flatten): Flatten(start_dim=1, end_dim=-1)
    )
    (mu): Sequential(
      (0): Linear(in_features=175, out_features=400, bias=True)
      (1): ReLU()
      (2): Linear(in_features=400, out_features=300, bias=True)
      (3): ReLU()
      (4): Linear(in_features=300, out_features=29, bias=True)
      (5): Tanh()
    )
  )
  (actor_target): Actor(
    (features_extractor): FlattenExtractor(
      (flatten): Flatten(start_dim=1, end_dim=-1)
    )
    (mu): Sequential(
      (0): Linear(in_features=175, out_features=400, bias=True)
      (1): ReLU()
      (2): Linear(in_features=400, out_features=300, bias=True)
      (3): ReLU()
      (4): Linear(in_features=300, out_features=29, bias=True)
      (5): Tanh()
    )
  )
  (critic): ContinuousCritic(
    (features_extractor): FlattenExtractor(
      (flatten): Flatten(start_dim=1, end_dim=-1)
    )
    (qf0): Sequential(
      (0): Linear(in_features=204, out_

Let's see the learning progress of the agent 

In [53]:
learning_process = pd.read_csv("resultadosTFG/set_2/ddpg/progress.csv")
learning_process

Unnamed: 0,time/fps,train/actor_loss,train/critic_loss,train/n_updates,time/total_timesteps,train/reward,train/learning_rate,time/time_elapsed,time/episodes
0,101,-19.093914,76.105058,8871,11828,-9.484797,0.001,117,4
1,97,-11.74087,7.855488,20699,23656,-9.484797,0.001,243,8
2,96,-9.717728,4.048362,32527,35484,-9.484797,0.001,368,12
3,95,-10.239715,3.381701,44355,47312,-9.484797,0.001,497,16


Plot the results of the training

In [54]:
def plot_metrics(df, n_rows, n_cols):
    fig, axes = plt.subplots(nrows=n_rows, ncols=n_cols, figsize=(5 * n_cols, 5 * n_rows))
    axes = axes.flatten()  # Flatten the array of axes if more than one row or column

    metrics = [
        ('train/reward', 'Reward'),
        ('train/actor_loss', 'Actor Loss'),
        ('train/n_updates', 'Number of Updates'),
        ('train/critic_loss', 'Critic Loss')
    ]

    # Plot each metric
    for ax, (metric, title) in zip(axes, metrics):
        ax.plot(df['time/total_timesteps'], df[metric], label=title)
        ax.set_xlabel('Total Timesteps')
        ax.set_ylabel(title)
        ax.set_title(title)
        ax.grid(True)
        ax.legend()

    plt.tight_layout()
    plt.savefig(f'resultadosTFG/set_2/ddpg/trainingStatsResults.png')
    plt.close()

df = learning_process
plot_metrics(df, n_rows=4, n_cols=1)  # Modify n_rows and n_cols as needed

# TD3

Create the agent and set the logger

In [55]:
agent = DRLAgent(env = env_train)
TD3_PARAMS = {"batch_size": 100, 
              "buffer_size": 1000000, 
              "learning_rate": 0.001}

model_td3 = agent.get_model("td3",model_kwargs = TD3_PARAMS)

if if_using_td3:
  # set up logger
  tmp_path = 'resultadosTFG' + '/set_2' + '/td3'
  new_logger_td3 = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_td3.set_logger(new_logger_td3)

{'batch_size': 100, 'buffer_size': 1000000, 'learning_rate': 0.001}
Using cpu device
Logging to resultadosTFG/set_2/td3




Train TD3

In [56]:
trained_td3 = agent.train_model(model=model_td3, 
                             tb_log_name='td3',
                             total_timesteps=50000) if if_using_td3 else None

----------------------------------
| time/              |           |
|    episodes        | 4         |
|    fps             | 102       |
|    time_elapsed    | 115       |
|    total_timesteps | 11828     |
| train/             |           |
|    actor_loss      | 63.2      |
|    critic_loss     | 1.39e+03  |
|    learning_rate   | 0.001     |
|    n_updates       | 8871      |
|    reward          | -8.472155 |
----------------------------------
day: 2956, episode: 60
begin_total_asset: 1000000.00
end_total_asset: 5106622.12
total_reward: 4106622.12
total_cost: 999.00
total_trades: 41384
Sharpe: 0.888
----------------------------------
| time/              |           |
|    episodes        | 8         |
|    fps             | 99        |
|    time_elapsed    | 237       |
|    total_timesteps | 23656     |
| train/             |           |
|    actor_loss      | 34.2      |
|    critic_loss     | 400       |
|    learning_rate   | 0.001     |
|    n_updates       | 20699     |
|

In [57]:
trained_td3.policy

TD3Policy(
  (actor): Actor(
    (features_extractor): FlattenExtractor(
      (flatten): Flatten(start_dim=1, end_dim=-1)
    )
    (mu): Sequential(
      (0): Linear(in_features=175, out_features=400, bias=True)
      (1): ReLU()
      (2): Linear(in_features=400, out_features=300, bias=True)
      (3): ReLU()
      (4): Linear(in_features=300, out_features=29, bias=True)
      (5): Tanh()
    )
  )
  (actor_target): Actor(
    (features_extractor): FlattenExtractor(
      (flatten): Flatten(start_dim=1, end_dim=-1)
    )
    (mu): Sequential(
      (0): Linear(in_features=175, out_features=400, bias=True)
      (1): ReLU()
      (2): Linear(in_features=400, out_features=300, bias=True)
      (3): ReLU()
      (4): Linear(in_features=300, out_features=29, bias=True)
      (5): Tanh()
    )
  )
  (critic): ContinuousCritic(
    (features_extractor): FlattenExtractor(
      (flatten): Flatten(start_dim=1, end_dim=-1)
    )
    (qf0): Sequential(
      (0): Linear(in_features=204, out_

Let's see agent's learning process

In [58]:
learning_process = pd.read_csv("resultadosTFG/set_2/td3/progress.csv")
learning_process

Unnamed: 0,time/fps,train/actor_loss,train/critic_loss,train/n_updates,time/total_timesteps,train/reward,train/learning_rate,time/time_elapsed,time/episodes
0,102,63.238789,1389.917044,8871,11828,-8.472155,0.001,115,4
1,99,34.224938,399.805252,20699,23656,-8.472155,0.001,237,8
2,98,30.134345,35.152552,32527,35484,-8.472155,0.001,359,12
3,96,23.285146,12.791116,44355,47312,-8.472155,0.001,489,16


Plot training results

In [59]:
def plot_metrics(df, n_rows, n_cols):
    fig, axes = plt.subplots(nrows=n_rows, ncols=n_cols, figsize=(5 * n_cols, 5 * n_rows))
    axes = axes.flatten()  # Flatten the array of axes if more than one row or column

    metrics = [
        ('train/reward', 'Reward'),
        ('train/actor_loss', 'Actor Loss'),
        ('train/n_updates', 'Number of Updates'),
        ('train/critic_loss', 'Critic Loss')
    ]

    # Plot each metric
    for ax, (metric, title) in zip(axes, metrics):
        ax.plot(df['time/total_timesteps'], df[metric], label=title)
        ax.set_xlabel('Total Timesteps')
        ax.set_ylabel(title)
        ax.set_title(title)
        ax.grid(True)
        ax.legend()

    plt.tight_layout()
    plt.savefig(f'resultadosTFG/set_2/td3/trainingStatsResults.png')
    plt.close()

df = learning_process
plot_metrics(df, n_rows=4, n_cols=1)  # Modify n_rows and n_cols as needed

# SAC

Create the agent, set parameters for training, and set logger

In [60]:
agent = DRLAgent(env = env_train)
SAC_PARAMS = {
    "batch_size": 128,
    "buffer_size": 100000,
    "learning_rate": 0.0001,
    "learning_starts": 100,
    "ent_coef": "auto_0.1",
}

model_sac = agent.get_model("sac",model_kwargs = SAC_PARAMS)

if if_using_sac:
  # set up logger
  tmp_path = 'resultadosTFG' + '/set_2' + '/sac'
  new_logger_sac = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_sac.set_logger(new_logger_sac)

{'batch_size': 128, 'buffer_size': 100000, 'learning_rate': 0.0001, 'learning_starts': 100, 'ent_coef': 'auto_0.1'}
Using cpu device
Logging to resultadosTFG/set_2/sac


Train SAC

In [61]:
trained_sac = agent.train_model(model=model_sac, 
                             tb_log_name='sac',
                             total_timesteps=50000) if if_using_sac else None

-----------------------------------
| time/              |            |
|    episodes        | 4          |
|    fps             | 69         |
|    time_elapsed    | 169        |
|    total_timesteps | 11828      |
| train/             |            |
|    actor_loss      | 1.62e+03   |
|    critic_loss     | 637        |
|    ent_coef        | 0.286      |
|    ent_coef_loss   | 134        |
|    learning_rate   | 0.0001     |
|    n_updates       | 11727      |
|    reward          | -11.618405 |
-----------------------------------
day: 2956, episode: 80
begin_total_asset: 1000000.00
end_total_asset: 6855602.72
total_reward: 5855602.72
total_cost: 44364.41
total_trades: 58350
Sharpe: 0.906
-----------------------------------
| time/              |            |
|    episodes        | 8          |
|    fps             | 68         |
|    time_elapsed    | 346        |
|    total_timesteps | 23656      |
| train/             |            |
|    actor_loss      | 754        |
|    critic

In [62]:
trained_sac.policy

SACPolicy(
  (actor): Actor(
    (features_extractor): FlattenExtractor(
      (flatten): Flatten(start_dim=1, end_dim=-1)
    )
    (latent_pi): Sequential(
      (0): Linear(in_features=175, out_features=256, bias=True)
      (1): ReLU()
      (2): Linear(in_features=256, out_features=256, bias=True)
      (3): ReLU()
    )
    (mu): Linear(in_features=256, out_features=29, bias=True)
    (log_std): Linear(in_features=256, out_features=29, bias=True)
  )
  (critic): ContinuousCritic(
    (features_extractor): FlattenExtractor(
      (flatten): Flatten(start_dim=1, end_dim=-1)
    )
    (qf0): Sequential(
      (0): Linear(in_features=204, out_features=256, bias=True)
      (1): ReLU()
      (2): Linear(in_features=256, out_features=256, bias=True)
      (3): ReLU()
      (4): Linear(in_features=256, out_features=1, bias=True)
    )
    (qf1): Sequential(
      (0): Linear(in_features=204, out_features=256, bias=True)
      (1): ReLU()
      (2): Linear(in_features=256, out_features=2

Let's see agent's learning process

In [63]:
learning_process = pd.read_csv("resultadosTFG/set_2/sac/progress.csv")
learning_process

Unnamed: 0,time/fps,train/ent_coef_loss,train/actor_loss,train/critic_loss,train/ent_coef,train/n_updates,time/total_timesteps,train/reward,train/learning_rate,time/time_elapsed,time/episodes
0,69,134.300369,1622.065186,637.314087,0.286007,11727,11828,-11.618405,0.0001,169,4
1,68,-96.564575,753.885803,51.017242,0.111147,23555,23656,-17.192493,0.0001,346,8
2,68,-125.984932,342.645355,25.723206,0.034592,35383,35484,-11.897618,0.0001,521,12
3,66,-132.403061,175.426392,7.123978,0.010854,47211,47312,-9.79655,0.0001,710,16


Agent's training results

In [64]:
def plot_metrics(df, n_rows, n_cols):
    fig, axes = plt.subplots(nrows=n_rows, ncols=n_cols, figsize=(5 * n_cols, 5 * n_rows))
    axes = axes.flatten()  # Flatten the array of axes if more than one row or column

    metrics = [
        ('train/reward', 'Reward'),
        ('train/actor_loss', 'Actor Loss'),
        ('train/ent_coef_loss', 'Entropy coefficient Loss'),
        ('train/ent_coef', 'Entropy coefficient'),
        ('train/critic_loss', 'Critic Loss')
    ]

    # Plot each metric
    for ax, (metric, title) in zip(axes, metrics):
        ax.plot(df['time/total_timesteps'], df[metric], label=title)
        ax.set_xlabel('Total Timesteps')
        ax.set_ylabel(title)
        ax.set_title(title)
        ax.grid(True)
        ax.legend()

    plt.tight_layout()
    plt.savefig(f'resultadosTFG/set_2/sac/trainingStatsResults.png')
    plt.close()

df = learning_process
plot_metrics(df, n_rows=5, n_cols=1)  # Modify n_rows and n_cols as needed

# Testing

There is a new environment for testing

We feed the environment with the testing dataset we prepared before (from start test date to end test date: with closing prices, etc)

In [65]:
e_trade_gym = StockTradingEnv(df = trade, turbulence_threshold = 70,risk_indicator_col='vix', **env_kwargs)
e_trade_gym

<finrl.meta.env_stock_trading.env_stocktrading.StockTradingEnv at 0x3711d5b40>

Environment from Stable baselines3; observations is a matrix containing the inputs {balance, close price, shares, N-technical indicators}

In [66]:
env_trade, obs_trade = e_trade_gym.get_sb_env()
env_trade

<stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv at 0x33ec9b970>

In [67]:
obs_trade

array([[ 1.0000000e+06,  1.4065350e+02,  1.9582227e+02,  1.6772746e+02,
         2.2600000e+02,  1.8351401e+02,  2.7490091e+02,  5.0711670e+01,
         9.3807114e+01,  1.7543822e+02,  3.5233069e+02,  3.0777106e+02,
         2.0362250e+02,  1.2042436e+02,  5.0125828e+01,  1.4843323e+02,
         1.5452451e+02,  4.8765835e+01,  2.2827776e+02,  1.2634031e+02,
         7.5069099e+01,  2.8285742e+02,  1.4231580e+02,  1.3033841e+02,
         1.4367574e+02,  3.7738498e+02,  2.2568965e+02,  4.5867680e+01,
         4.0303814e+01,  4.3821526e+01,  0.0000000e+00,  0.0000000e+00,
         0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00,
         0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00,
         0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00,
         0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00,
         0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00,
         0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  0.00000

In [68]:
trade

Unnamed: 0,date,tic,open,high,low,close,volume,day,wr,tema,tr,ppo,vix,turbulence
0,2021-10-01,AAPL,141.899994,142.919998,139.110001,140.653503,94639600.0,4.0,-87.094465,140.060799,3.809998,-1.173437,21.100000,120.031078
0,2021-10-01,AMGN,213.589996,214.610001,210.800003,195.822266,2629400.0,4.0,-264.229665,195.572024,19.950317,-1.490716,21.100000,120.031078
0,2021-10-01,AXP,168.500000,175.119995,168.479996,167.727463,3956000.0,4.0,-53.105693,165.806757,13.573593,1.390034,21.100000,120.031078
0,2021-10-01,BA,222.850006,226.720001,220.600006,226.000000,9113600.0,4.0,-16.830422,224.813103,6.779999,0.333270,21.100000,120.031078
0,2021-10-01,CAT,192.899994,195.869995,191.240005,183.514008,3695500.0,4.0,-117.674595,183.268519,14.584625,-1.871326,21.100000,120.031078
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
352,2023-02-27,UNH,488.769989,490.940002,481.959991,472.278229,3006200.0,0.0,-89.965306,472.307670,17.674835,-0.443907,20.950001,7.163790
352,2023-02-27,V,220.729996,221.440002,219.339996,218.230591,4255300.0,0.0,-94.869609,217.546534,4.001724,-0.111513,20.950001,7.163790
352,2023-02-27,VZ,38.990002,39.150002,38.630001,35.602482,14210900.0,0.0,-179.728178,35.438624,3.675713,-0.842037,20.950001,7.163790
352,2023-02-27,WBA,36.049999,36.080002,35.270000,32.621929,5580100.0,0.0,-242.369377,32.709821,3.080139,-0.497050,20.950001,7.163790


## A2C

Test, predict

In [69]:
trained_model = trained_a2c
df_account_value_a2c, df_actions_a2c = DRLAgent.DRL_prediction(
    model=trained_model, 
    environment = e_trade_gym)

el modelo es 
<stable_baselines3.a2c.a2c.A2C object at 0x30f681600>
estos son los test obs
[[ 1.0000000e+06  1.4065350e+02  1.9582227e+02  1.6772746e+02
   2.2600000e+02  1.8351401e+02  2.7490091e+02  5.0711670e+01
   9.3807114e+01  1.7543822e+02  3.5233069e+02  3.0777106e+02
   2.0362250e+02  1.2042436e+02  5.0125828e+01  1.4843323e+02
   1.5452451e+02  4.8765835e+01  2.2827776e+02  1.2634031e+02
   7.5069099e+01  2.8285742e+02  1.4231580e+02  1.3033841e+02
   1.4367574e+02  3.7738498e+02  2.2568965e+02  4.5867680e+01
   4.0303814e+01  4.3821526e+01  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.000000

Plot and Save testing results, Portfolio Value

In [70]:
df_account_value_a2c['date'] = pd.to_datetime(df_account_value_a2c['date']).dt.normalize()
plt.figure(figsize=(14, 7))
plt.plot(df_account_value_a2c['date'], df_account_value_a2c['account_value'])

# Setting major locator
locator = mdates.MonthLocator(interval=1)  # Show a tick every week
plt.gca().xaxis.set_major_locator(locator)

# Setting formatter
formatter = mdates.DateFormatter('%Y-%m-%d')
plt.gca().xaxis.set_major_formatter(formatter)

# Optionally, auto-format date labels to prevent overlap
plt.gcf().autofmt_xdate()

plt.title('Account Value Testing for A2C')
plt.xlabel('Date')
plt.ylabel('Account Value')
plt.grid(True)

plt.savefig(f'resultadosTFG/set_2/a2c/testing/accountValue.png')
plt.close()

Trades made in testing

In [71]:
display(df_actions_a2c)

Unnamed: 0_level_0,AAPL,AMGN,AXP,BA,CAT,CRM,CSCO,CVX,DIS,GS,...,MRK,MSFT,NKE,PG,TRV,UNH,V,VZ,WBA,WMT
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-10-01,0,0,0,27,0,0,55,3,29,100,...,100,0,100,0,0,79,0,0,100,54
2021-10-04,0,0,0,27,0,0,55,3,29,100,...,100,0,100,0,0,79,0,0,100,54
2021-10-05,0,0,0,27,0,0,55,3,29,100,...,100,0,100,0,0,79,0,0,100,54
2021-10-06,0,0,0,27,0,0,55,3,29,100,...,100,0,100,0,0,79,0,0,100,54
2021-10-07,0,0,0,27,0,0,55,3,29,100,...,100,0,100,0,0,79,0,0,100,54
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-02-17,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2023-02-21,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2023-02-22,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2023-02-23,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## PPO

Test, predict

In [72]:
trained_model = trained_ppo
df_account_value_ppo, df_actions_ppo = DRLAgent.DRL_prediction(
    model=trained_model, 
    environment = e_trade_gym)

el modelo es 
<stable_baselines3.ppo.ppo.PPO object at 0x33f43db10>
estos son los test obs
[[ 1.0000000e+06  1.4065350e+02  1.9582227e+02  1.6772746e+02
   2.2600000e+02  1.8351401e+02  2.7490091e+02  5.0711670e+01
   9.3807114e+01  1.7543822e+02  3.5233069e+02  3.0777106e+02
   2.0362250e+02  1.2042436e+02  5.0125828e+01  1.4843323e+02
   1.5452451e+02  4.8765835e+01  2.2827776e+02  1.2634031e+02
   7.5069099e+01  2.8285742e+02  1.4231580e+02  1.3033841e+02
   1.4367574e+02  3.7738498e+02  2.2568965e+02  4.5867680e+01
   4.0303814e+01  4.3821526e+01  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.000000

Save Plot of Portfolio Value

In [73]:
df_account_value_ppo['date'] = pd.to_datetime(df_account_value_ppo['date']).dt.normalize()
plt.figure(figsize=(14, 7))
plt.plot(df_account_value_ppo['date'], df_account_value_ppo['account_value'])

# Setting major locator
locator = mdates.MonthLocator(interval=1)  # Show a tick every week
plt.gca().xaxis.set_major_locator(locator)

# Setting formatter
formatter = mdates.DateFormatter('%Y-%m-%d')
plt.gca().xaxis.set_major_formatter(formatter)

# Optionally, auto-format date labels to prevent overlap
plt.gcf().autofmt_xdate()

plt.title('Account Value Testing for PPO')
plt.xlabel('Date')
plt.ylabel('Account Value')
plt.grid(True)

plt.savefig(f'resultadosTFG/set_2/ppo/testing/accountValue.png')
plt.close()


Trades made in testing

In [74]:
df_actions_ppo.tail(40)

Unnamed: 0_level_0,AAPL,AMGN,AXP,BA,CAT,CRM,CSCO,CVX,DIS,GS,...,MRK,MSFT,NKE,PG,TRV,UNH,V,VZ,WBA,WMT
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-12-28,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2022-12-29,0,0,2,0,0,0,0,0,0,0,...,0,-1,0,0,0,0,0,0,0,0
2022-12-30,0,0,4,0,0,0,0,0,0,0,...,0,-2,0,0,0,0,0,0,0,0
2023-01-03,0,0,0,0,0,0,0,0,0,0,...,0,-2,0,0,0,0,0,0,0,0
2023-01-04,0,0,8,0,0,0,0,0,0,0,...,0,-4,0,0,0,0,0,0,0,0
2023-01-05,0,0,3,0,0,0,0,0,0,0,...,0,-2,0,0,0,0,0,0,0,0
2023-01-06,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2023-01-09,0,0,8,0,0,0,0,0,0,0,...,0,-4,0,0,0,0,0,0,0,0
2023-01-10,0,0,8,0,0,0,0,0,0,0,...,0,-4,0,0,0,0,0,0,0,0
2023-01-11,0,0,0,0,0,0,0,0,0,0,...,0,-2,0,0,0,0,0,0,0,0


## DDPG

Test DDPG, predict

In [75]:
trained_model = trained_ddpg
df_account_value_ddpg, df_actions_ddpg = DRLAgent.DRL_prediction(
    model=trained_model, 
    environment = e_trade_gym)

el modelo es 
<stable_baselines3.ddpg.ddpg.DDPG object at 0x33f4d7520>
estos son los test obs
[[ 1.0000000e+06  1.4065350e+02  1.9582227e+02  1.6772746e+02
   2.2600000e+02  1.8351401e+02  2.7490091e+02  5.0711670e+01
   9.3807114e+01  1.7543822e+02  3.5233069e+02  3.0777106e+02
   2.0362250e+02  1.2042436e+02  5.0125828e+01  1.4843323e+02
   1.5452451e+02  4.8765835e+01  2.2827776e+02  1.2634031e+02
   7.5069099e+01  2.8285742e+02  1.4231580e+02  1.3033841e+02
   1.4367574e+02  3.7738498e+02  2.2568965e+02  4.5867680e+01
   4.0303814e+01  4.3821526e+01  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.000

Store graph of the value of portfolio

In [76]:
df_account_value_ddpg['date'] = pd.to_datetime(df_account_value_ddpg['date']).dt.normalize()
plt.figure(figsize=(14, 7))
plt.plot(df_account_value_ddpg['date'], df_account_value_ddpg['account_value'])

# Setting major locator
locator = mdates.MonthLocator(interval=1)  # Show a tick every week
plt.gca().xaxis.set_major_locator(locator)

# Setting formatter
formatter = mdates.DateFormatter('%Y-%m-%d')
plt.gca().xaxis.set_major_formatter(formatter)

# Optionally, auto-format date labels to prevent overlap
plt.gcf().autofmt_xdate()

plt.title('Account Value Testing for DDPG')
plt.xlabel('Date')
plt.ylabel('Account Value')
plt.grid(True)

plt.savefig(f'resultadosTFG/set_2/ddpg/testing/accountValue.png')
plt.close()


Trades made in testing

In [77]:
df_actions_ddpg.head(30)

Unnamed: 0_level_0,AAPL,AMGN,AXP,BA,CAT,CRM,CSCO,CVX,DIS,GS,...,MRK,MSFT,NKE,PG,TRV,UNH,V,VZ,WBA,WMT
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-10-01,0,0,100,100,0,0,100,100,100,100,...,100,0,0,0,0,100,100,0,0,100
2021-10-04,0,0,100,100,0,0,100,100,100,100,...,100,0,0,0,0,100,100,0,0,100
2021-10-05,0,0,100,100,0,0,100,100,100,100,...,100,0,0,0,0,100,100,0,0,100
2021-10-06,0,0,100,100,0,0,100,100,2,0,...,0,0,0,0,0,0,0,0,0,100
2021-10-07,0,0,100,100,0,0,100,-100,100,-100,...,0,0,0,0,0,-100,0,0,0,100
2021-10-08,0,0,100,100,0,0,100,-100,100,-100,...,0,0,0,0,0,-100,0,0,0,100
2021-10-11,0,0,100,100,0,0,100,-100,100,-100,...,0,0,0,0,0,-100,0,0,0,100
2021-10-12,0,0,0,0,0,0,0,-100,0,0,...,0,0,0,0,0,0,0,0,0,100
2021-10-13,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2021-10-14,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## TD3

Test TD3, predict

In [78]:
trained_model = trained_td3
df_account_value_td3, df_actions_td3 = DRLAgent.DRL_prediction(
    model=trained_model, 
    environment = e_trade_gym)

el modelo es 
<stable_baselines3.td3.td3.TD3 object at 0x36f67a530>
estos son los test obs
[[ 1.0000000e+06  1.4065350e+02  1.9582227e+02  1.6772746e+02
   2.2600000e+02  1.8351401e+02  2.7490091e+02  5.0711670e+01
   9.3807114e+01  1.7543822e+02  3.5233069e+02  3.0777106e+02
   2.0362250e+02  1.2042436e+02  5.0125828e+01  1.4843323e+02
   1.5452451e+02  4.8765835e+01  2.2827776e+02  1.2634031e+02
   7.5069099e+01  2.8285742e+02  1.4231580e+02  1.3033841e+02
   1.4367574e+02  3.7738498e+02  2.2568965e+02  4.5867680e+01
   4.0303814e+01  4.3821526e+01  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.000000

Store graph of the value of portfolio

In [79]:
df_account_value_td3['date'] = pd.to_datetime(df_account_value_td3['date']).dt.normalize()
plt.figure(figsize=(14, 7))
plt.plot(df_account_value_td3['date'], df_account_value_td3['account_value'])

# Setting major locator
locator = mdates.MonthLocator(interval=1)  # Show a tick every week
plt.gca().xaxis.set_major_locator(locator)

# Setting formatter
formatter = mdates.DateFormatter('%Y-%m-%d')
plt.gca().xaxis.set_major_formatter(formatter)

# Optionally, auto-format date labels to prevent overlap
plt.gcf().autofmt_xdate()

plt.title('Account Value Testing for TD3')
plt.xlabel('Date')
plt.ylabel('Account Value')
plt.grid(True)

plt.savefig(f'resultadosTFG/set_2/td3/testing/accountValue.png')
plt.close()


Trades made in testing

In [80]:
df_actions_td3.head(30)

Unnamed: 0_level_0,AAPL,AMGN,AXP,BA,CAT,CRM,CSCO,CVX,DIS,GS,...,MRK,MSFT,NKE,PG,TRV,UNH,V,VZ,WBA,WMT
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-10-01,0,0,100,0,100,0,0,0,100,0,...,100,100,100,100,0,0,0,100,0,100
2021-10-04,0,0,100,0,100,0,0,0,100,0,...,100,100,100,100,0,0,0,100,0,100
2021-10-05,0,0,100,0,100,0,0,0,100,0,...,100,100,100,100,0,0,0,100,0,100
2021-10-06,0,0,100,0,100,0,0,0,100,0,...,100,100,100,100,0,0,0,100,0,100
2021-10-07,0,0,0,0,100,0,0,0,100,0,...,65,100,0,0,0,0,0,1,0,100
2021-10-08,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2021-10-11,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2021-10-12,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2021-10-13,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2021-10-14,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## SAC

Test SAC, predict

Account value is the DF that stores the history of the value of the assets (portfolio value)

In [81]:
trained_model = trained_sac
df_account_value_sac, df_actions_sac = DRLAgent.DRL_prediction(
    model=trained_model, 
    environment = e_trade_gym)

el modelo es 
<stable_baselines3.sac.sac.SAC object at 0x371177eb0>
estos son los test obs
[[ 1.0000000e+06  1.4065350e+02  1.9582227e+02  1.6772746e+02
   2.2600000e+02  1.8351401e+02  2.7490091e+02  5.0711670e+01
   9.3807114e+01  1.7543822e+02  3.5233069e+02  3.0777106e+02
   2.0362250e+02  1.2042436e+02  5.0125828e+01  1.4843323e+02
   1.5452451e+02  4.8765835e+01  2.2827776e+02  1.2634031e+02
   7.5069099e+01  2.8285742e+02  1.4231580e+02  1.3033841e+02
   1.4367574e+02  3.7738498e+02  2.2568965e+02  4.5867680e+01
   4.0303814e+01  4.3821526e+01  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.000000

Store graph of the value of portfolio

In [82]:
df_account_value_sac['date'] = pd.to_datetime(df_account_value_sac['date']).dt.normalize()
plt.figure(figsize=(14, 7))
plt.plot(df_account_value_sac['date'], df_account_value_sac['account_value'])

# Setting major locator
locator = mdates.MonthLocator(interval=1)  # Show a tick every week
plt.gca().xaxis.set_major_locator(locator)

# Setting formatter
formatter = mdates.DateFormatter('%Y-%m-%d')
plt.gca().xaxis.set_major_formatter(formatter)

# Optionally, auto-format date labels to prevent overlap
plt.gcf().autofmt_xdate()

plt.title('Account Value Testing for SAC')
plt.xlabel('Date')
plt.ylabel('Account Value')
plt.grid(True)

plt.savefig(f'resultadosTFG/set_2/sac/testing/accountValue.png')
plt.close()


Trades made in testing

In [83]:
df_actions_sac.head(30)

Unnamed: 0_level_0,AAPL,AMGN,AXP,BA,CAT,CRM,CSCO,CVX,DIS,GS,...,MRK,MSFT,NKE,PG,TRV,UNH,V,VZ,WBA,WMT
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-10-01,0,92,67,0,95,77,0,29,92,30,...,0,0,0,84,20,71,0,0,0,0
2021-10-04,0,92,67,0,95,77,0,29,92,30,...,0,0,0,84,20,71,0,0,0,0
2021-10-05,0,92,67,0,95,77,0,29,92,30,...,0,0,0,84,20,71,0,0,0,0
2021-10-06,0,92,67,0,95,77,0,29,92,30,...,0,0,0,84,20,71,0,0,0,0
2021-10-07,0,92,67,0,95,77,0,29,92,30,...,0,0,0,84,20,71,0,0,0,0
2021-10-08,0,92,2,0,95,77,0,0,92,0,...,0,0,0,84,0,37,0,0,0,0
2021-10-11,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2021-10-12,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2021-10-13,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2021-10-14,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Obtain results to then merge: merge in one DF the Portfolio Value given by each algorithm

In [84]:
df_result_a2c = df_account_value_a2c.set_index(df_account_value_a2c.columns[0])
df_result_a2c.rename(columns = {'account_value':'a2c'}, inplace = True)
df_result_ddpg = df_account_value_ddpg.set_index(df_account_value_ddpg.columns[0])
df_result_ddpg.rename(columns = {'account_value':'ddpg'}, inplace = True)
df_result_td3 = df_account_value_td3.set_index(df_account_value_td3.columns[0])
df_result_td3.rename(columns = {'account_value':'td3'}, inplace = True)
df_result_ppo = df_account_value_ppo.set_index(df_account_value_ppo.columns[0])
df_result_ppo.rename(columns = {'account_value':'ppo'}, inplace = True)
df_result_sac = df_account_value_sac.set_index(df_account_value_sac.columns[0])
df_result_sac.rename(columns = {'account_value':'sac'}, inplace = True)

We can also get stats from each stock individually (even though the analysis won't focus on that)

In [85]:
#baseline stats

print("==============Get Baseline Stats===========")
print("==============Baseline Stats of Profitability from DJIA===========")
print("This is the real performance of the index")
df_dji_ = get_baseline(
        ticker="^DJI", 
        start = TEST_START_DATE,
        end = TEST_END_DATE)
stats = backtest_stats(df_dji_, value_col_name = 'close')

This is the real performance of the index


[*********************100%%**********************]  1 of 1 completed

Shape of DataFrame:  (354, 8)
Annual return         -0.034876
Cumulative returns    -0.048644
Annual volatility      0.181612
Sharpe ratio          -0.105351
Calmar ratio          -0.158953
Stability              0.280983
Max drawdown          -0.219408
Omega ratio            0.982546
Sortino ratio         -0.146974
Skew                        NaN
Kurtosis                    NaN
Tail ratio             0.970602
Daily value at risk   -0.022957
dtype: float64





get_baseline vuelve a fetchear los stocks de YahooFinance dadas esas fechas.

backtest_stats llama a get daily return y luego saca stats de esa Serie con metodo que pide:

    """
    Calculates various performance metrics of a strategy, for use in
    plotting.show_perf_stats.

    Parameters
    ----------
    returns : pd.Series
        Daily returns of the strategy, noncumulative.
         - See full explanation in tears.create_full_tear_sheet.
    factor_returns : pd.Series, optional
        Daily noncumulative returns of the benchmark factor to which betas are
        computed. Usually a benchmark such as market returns.
         - This is in the same style as returns.
         - If None, do not compute alpha, beta, and information ratio.
    positions : pd.DataFrame
        Daily net position values.
         - See full explanation in tears.create_full_tear_sheet.
    transactions : pd.DataFrame
        Prices and amounts of executed trades. One row per trade.
        - See full explanation in tears.create_full_tear_sheet.
    turnover_denom : str
        Either AGB or portfolio_value, default AGB.
        - See full explanation in txn.get_turnover.

    Returns
    -------
    pd.Series
        Performance metrics.
    """

get daily profitability * invested amount = portfolio value in case of investing 1000000$ on DJ and not touching it

In [86]:
df_dji = pd.DataFrame()
df_dji['date'] = df_account_value_a2c['date']
df_dji['account_value'] = df_dji_['close'] / df_dji_['close'][0] * env_kwargs["initial_amount"] # get daily profitability * invested amount = portfolio value in case of investing 1000000$ on DJ and not touching it
#df_dji.to_csv("df_dji.csv")
df_dji = df_dji.set_index(df_dji.columns[0])
#df_dji.to_csv("df_dji+.csv")

In [87]:
df_dji.rename(columns = {'account_value':'dji'}, inplace = True)

In [88]:
df_result_td3

Unnamed: 0_level_0,td3
date,Unnamed: 1_level_1
2021-10-01,1.000000e+06
2021-10-04,9.983242e+05
2021-10-05,1.001515e+06
2021-10-06,1.003014e+06
2021-10-07,1.011902e+06
...,...
2023-02-21,9.314267e+05
2023-02-22,9.277799e+05
2023-02-23,9.313544e+05
2023-02-24,9.230470e+05


Compare DJIA with the rest of the algorithms

In [89]:
result = pd.DataFrame()
result = pd.merge(result, df_result_a2c, how='outer', left_index=True, right_index=True)
result = pd.merge(result, df_result_ddpg, how='outer', left_index=True, right_index=True)
result = pd.merge(result, df_result_td3, how='outer', left_index=True, right_index=True)
result = pd.merge(result, df_result_ppo, how='outer', left_index=True, right_index=True)
result = pd.merge(result, df_result_sac, how='outer', left_index=True, right_index=True)
result = pd.merge(result, df_dji, how='outer', left_index=True, right_index=True)
display(result)

Unnamed: 0_level_0,a2c,ddpg,td3,ppo,sac,dji
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-10-01,1.000000e+06,1.000000e+06,1.000000e+06,1.000000e+06,1.000000e+06,1.000000e+06
2021-10-04,9.985258e+05,9.970111e+05,9.983242e+05,9.995007e+05,9.980367e+05,9.905746e+05
2021-10-05,1.002754e+06,1.002779e+06,1.001515e+06,1.000379e+06,1.000660e+06,9.996566e+05
2021-10-06,1.003453e+06,1.004773e+06,1.003014e+06,1.000891e+06,1.001784e+06,1.002637e+06
2021-10-07,1.012252e+06,1.015889e+06,1.011902e+06,1.002264e+06,1.008467e+06,1.012483e+06
...,...,...,...,...,...,...
2023-02-21,9.817478e+05,9.167017e+05,9.314267e+05,8.995006e+05,1.000690e+06,9.651327e+05
2023-02-22,9.800151e+05,9.157490e+05,9.277799e+05,8.971849e+05,9.974051e+05,9.626710e+05
2023-02-23,9.857733e+05,9.198156e+05,9.313544e+05,9.002953e+05,1.000550e+06,9.658412e+05
2023-02-24,9.759689e+05,9.061383e+05,9.230470e+05,8.904929e+05,9.903151e+05,9.560240e+05


Save and Plot!

In [90]:
plt.rcParams["figure.figsize"] = (15,5)
plt.figure();
result.plot();
plt.savefig(f'resultadosTFG/set_2/AlgorithmsVsDJI.png')
plt.close()