# Asset Portfolio Management using Deep Reinforcement Learning
---

#### 4.0 Data Split
---

We will split both the close prices and the whole dataset into train and test (trade) data.

We will use 80% of the data for training and then test on the remaining 20%.

### 4.1 Import Relevant Libraries

In [15]:
#conda env base
import pandas as pd
import my_config
import numpy as np
import ta
from ta import add_all_ta_features
from ta.utils import dropna

### 4.2 Load the data

In [16]:
%store -r df
%store -r ticker_list
df_close = pd.read_csv('./datasets/close_prices.csv')

In [17]:
df

Unnamed: 0,date,tic,open,high,low,close,volume,atr,sma,rsi,oil_chg,yield_chg
0,2015-01-02,BOREO,3.800000,3.810000,3.800000,3.058948,137,0.733282,3.333072,0.000000,-0.015873,-0.021659
1,2015-01-02,ELISA,22.610001,22.870001,22.549999,14.783914,206279,7.582569,14.951996,37.589471,-0.015873,-0.021659
2,2015-01-02,ICP1V,0.316783,0.342330,0.316783,0.342330,11807,0.024087,0.320433,62.053425,-0.015873,-0.021659
3,2015-01-02,MEKKO,1.800000,1.850000,1.800000,1.356999,19580,0.472451,1.327764,57.769155,-0.015873,-0.021659
4,2015-01-02,NDA-FI,9.665000,9.740000,9.580000,4.964346,873881,4.398773,4.937608,44.544858,-0.015873,-0.021659
...,...,...,...,...,...,...,...,...,...,...,...,...
24455,2024-12-30,NESTE,11.785000,12.125000,11.760000,12.125000,2236292,0.484141,12.310714,39.223432,0.002966,-0.016021
24456,2024-12-30,OLVAS,29.400000,29.400000,28.950001,29.200001,9640,0.472103,29.425000,48.853910,0.002966,-0.016021
24457,2024-12-30,SAMPO,39.070000,39.380001,38.959999,39.380001,600560,0.493714,39.519286,43.366923,0.002966,-0.016021
24458,2024-12-30,UPM,26.500000,26.780001,26.430000,26.559999,1045678,0.538349,26.238571,55.556743,0.002966,-0.016021


In [18]:
df.columns

Index(['date', 'tic', 'open', 'high', 'low', 'close', 'volume', 'atr', 'sma',
       'rsi', 'oil_chg', 'yield_chg'],
      dtype='object')

In [19]:
df_close

Unnamed: 0,date,BOREO,ELISA,ICP1V,MEKKO,NDA-FI,NESTE,OLVAS,SAMPO,UPM,YIT
0,2014-12-09,3.412212,15.383613,0.311674,1.334994,5.168291,4.881361,15.422766,21.767355,8.450420,3.470012
1,2014-12-10,3.412212,15.455315,0.316783,1.334994,5.178615,4.937263,15.551346,21.717148,8.494963,3.443397
2,2014-12-11,3.412212,15.572650,0.332111,1.334994,5.150218,4.949968,15.929948,21.912397,8.456783,3.420108
3,2014-12-12,3.412212,15.188060,0.332111,1.334994,4.987580,4.700944,16.179976,21.438225,8.259521,3.356896
4,2014-12-15,3.412212,14.692656,0.301455,1.342329,4.858502,4.739060,16.058537,21.025414,8.074987,3.114028
...,...,...,...,...,...,...,...,...,...,...,...
2454,2024-12-19,10.000000,41.459999,10.090000,11.520000,10.465000,11.455000,28.900000,38.810001,25.709999,2.366000
2455,2024-12-20,10.400000,41.560001,10.250000,11.880000,10.245000,11.225000,28.850000,39.009998,25.660000,2.384000
2456,2024-12-23,10.250000,41.400002,10.200000,12.100000,10.300000,11.155000,29.000000,38.990002,25.719999,2.442000
2457,2024-12-27,10.150000,42.119999,10.190000,11.860000,10.450000,11.785000,29.400000,39.130001,26.570000,2.498000


In [20]:
start_date = df['date'].min()

In [21]:
df_close = df_close[df_close['date'] >= start_date]

In [22]:
df_close = df_close.reset_index(drop=True)

### 4.3 Split the Data

In [23]:
train_start_date = "2015-01-01"                        # Start of the training data
train_end_date = "2022-12-31"        # End of the training data
test_start_date = "2023-01-01"            # Start of the testing data
test_end_date = "2024-12-31"                          # End of the testing data

print(f"Training Data: from {train_start_date} to {train_end_date}")
print(f"Testing Data: from {test_start_date} to {test_end_date}")

train_data = df[(df['date'] >= train_start_date) & (df['date'] <= train_end_date)]
test_data = df[(df['date'] >= test_start_date) & (df['date'] <= test_end_date)]

train_close = df_close[(df_close['date'] >= train_start_date) & (df_close['date'] <= train_end_date)]
test_close = df_close[(df_close['date'] >= test_start_date) & (df_close['date'] <= test_end_date)]

print(f"Main training data size: {len(train_data)} rows")
print(f"Main testing data size: {len(test_data)} rows")
print(f"Close training data size: {len(train_close)} rows")
print(f"Close testing data size: {len(test_close)} rows")

Training Data: from 2015-01-01 to 2022-12-31
Testing Data: from 2023-01-01 to 2024-12-31
Main training data size: 19590 rows
Main testing data size: 4870 rows
Close training data size: 1959 rows
Close testing data size: 487 rows


### 4.4 Store the Dataframes

In [24]:
train_data

Unnamed: 0,date,tic,open,high,low,close,volume,atr,sma,rsi,oil_chg,yield_chg
0,2015-01-02,BOREO,3.800000,3.810000,3.800000,3.058948,137,0.733282,3.333072,0.000000,-0.015873,-0.021659
1,2015-01-02,ELISA,22.610001,22.870001,22.549999,14.783914,206279,7.582569,14.951996,37.589471,-0.015873,-0.021659
2,2015-01-02,ICP1V,0.316783,0.342330,0.316783,0.342330,11807,0.024087,0.320433,62.053425,-0.015873,-0.021659
3,2015-01-02,MEKKO,1.800000,1.850000,1.800000,1.356999,19580,0.472451,1.327764,57.769155,-0.015873,-0.021659
4,2015-01-02,NDA-FI,9.665000,9.740000,9.580000,4.964346,873881,4.398773,4.937608,44.544858,-0.015873,-0.021659
...,...,...,...,...,...,...,...,...,...,...,...,...
19585,2022-12-30,NESTE,43.549999,43.889999,43.020000,40.038315,399469,3.798320,41.643756,37.274401,0.044372,0.011473
19586,2022-12-30,OLVAS,32.599998,33.250000,32.250000,30.017570,9305,3.407005,28.717519,61.894918,0.044372,0.011473
19587,2022-12-30,SAMPO,44.921577,45.012550,44.412140,43.337219,720631,1.404920,43.441205,53.287462,0.044372,0.011473
19588,2022-12-30,UPM,35.200001,35.360001,34.930000,31.637262,520036,3.678595,32.222754,44.977286,0.044372,0.011473


In [25]:
test_data

Unnamed: 0,date,tic,open,high,low,close,volume,atr,sma,rsi,oil_chg,yield_chg
19590,2023-01-03,BOREO,37.000000,38.000000,37.000000,37.469444,203,1.803586,37.377882,46.860296,-0.044349,-0.022171
19591,2023-01-03,ELISA,49.689999,50.259998,49.520000,45.747658,170351,4.721769,45.693527,51.083398,-0.044349,-0.022171
19592,2023-01-03,ICP1V,17.020000,17.320000,16.820000,17.299999,19175,0.504482,17.022857,59.291322,-0.044349,-0.022171
19593,2023-01-03,MEKKO,8.900000,9.020000,8.840000,8.308312,38658,0.689253,8.236055,50.236238,-0.044349,-0.022171
19594,2023-01-03,NDA-FI,10.178000,10.366000,10.160000,8.686369,5322614,1.629920,8.457981,62.915507,-0.044349,-0.022171
...,...,...,...,...,...,...,...,...,...,...,...,...
24455,2024-12-30,NESTE,11.785000,12.125000,11.760000,12.125000,2236292,0.484141,12.310714,39.223432,0.002966,-0.016021
24456,2024-12-30,OLVAS,29.400000,29.400000,28.950001,29.200001,9640,0.472103,29.425000,48.853910,0.002966,-0.016021
24457,2024-12-30,SAMPO,39.070000,39.380001,38.959999,39.380001,600560,0.493714,39.519286,43.366923,0.002966,-0.016021
24458,2024-12-30,UPM,26.500000,26.780001,26.430000,26.559999,1045678,0.538349,26.238571,55.556743,0.002966,-0.016021


In [26]:
train_data=train_data.copy()
test_data=test_data.copy()

train_close=train_close.copy()
test_close=test_close.copy()

In [28]:
train_data.to_csv('datasets/train_data.csv', index=False)

In [29]:
test_data.to_csv('datasets/test_data.csv', index=False)

In [30]:
train_close.to_csv('datasets/train_close.csv', index=False)

In [31]:
test_close.to_csv('datasets/test_close.csv', index=False)