# Asset Portfolio Management using Deep Reinforcement Learning
---

## 3.0 Feature Engineering and Data Preprocessing
---
We perform feature engineering and data preprocessing by:
* Adding Technical Indicators to the data. The technical inicators are used as inputs in the training of our Reinforcement Learning Model

### 3.1 Import Relevant Libraries

In [1]:
from ta import add_all_ta_features

In [12]:
#python env
import pandas as pd
from ta import add_all_ta_features
import numpy as np
from ta.utils import dropna
import my_config
from ta import volatility, trend, momentum

### 3.2 Load the Data from the csv Files

In [18]:
data = pd.read_csv('./datasets/merged_data.csv')
prices_data = pd.read_csv('./datasets/close_prices.csv')

In [19]:
data.head()

Unnamed: 0,date,tic,open,high,low,close,volume,oil_chg,yield_chg
0,2014-12-09,BOREO,4.25,4.25,4.25,3.412212,0,0.00982,-0.016393
1,2014-12-09,ELISA,23.610001,23.82,23.440001,15.383613,389383,0.00982,-0.016393
2,2014-12-09,ICP1V,0.321893,0.321893,0.311674,0.311674,2388,0.00982,-0.016393
3,2014-12-09,MEKKO,1.85,1.85,1.806,1.334994,11685,0.00982,-0.016393
4,2014-12-09,NDA-FI,10.08,10.15,9.965,5.168291,1273926,0.00982,-0.016393


In [20]:
data = data.sort_values(by=['tic', 'date']).reset_index(drop=True)

In [21]:
data.head()

Unnamed: 0,date,tic,open,high,low,close,volume,oil_chg,yield_chg
0,2014-12-09,BOREO,4.25,4.25,4.25,3.412212,0,0.00982,-0.016393
1,2014-12-10,BOREO,4.25,4.25,4.25,3.412212,1000,-0.038899,-0.022973
2,2014-12-11,BOREO,4.25,4.25,4.25,3.412212,0,-0.008717,0.004149
3,2014-12-12,BOREO,4.25,4.25,4.25,3.412212,0,-0.028737,-0.034435
4,2014-12-15,BOREO,4.25,4.25,4.25,3.412212,0,-0.012773,0.006182


In [23]:
data.tic.unique()

array(['BOREO', 'ELISA', 'ICP1V', 'MEKKO', 'NDA-FI', 'NESTE', 'OLVAS',
       'SAMPO', 'UPM', 'YIT'], dtype=object)

### 3.3 Add Technical Indicators
---
We define a function to add technical indicators to the dataset by making use of the ta library
* Volatility Average True Range (ATR)
* Trend Fast Simple Moving Average (SMA)
* Momentum Relative Strength Index (RSI)

In [24]:
#VOLATILITY
# Average True Range (ATR)
#from ta.volatility import AverageTrueRange
#help(AverageTrueRange)

In [25]:
#TREND
# Exponential Moving Average (EMA)
#from ta.trend import EMAIndicator
#help(EMAIndicator)

In [26]:
#TREND
#Relative Strength Index (RSI)
#from ta.momentum import RSIIndicator
#help(RSIIndicator)

In [27]:
def calculate_indicators(group):
    # ATR
    group['atr'] = volatility.AverageTrueRange(
        high=group['high'], low=group['low'], close=group['close']
    ).average_true_range()

    # SMA
    group['sma'] = trend.SMAIndicator(
        close=group['close'], window=14
    ).sma_indicator()

    # RSI
    group['rsi'] = momentum.RSIIndicator(
        close=group['close'], window=14
    ).rsi()

    return group

data = data.groupby('tic').apply(calculate_indicators)
data = data.reset_index(drop=True)
data.head(17)

Unnamed: 0,date,tic,open,high,low,close,volume,oil_chg,yield_chg,atr,sma,rsi
0,2014-12-09,BOREO,4.25,4.25,4.25,3.412212,0,0.00982,-0.016393,0.0,,
1,2014-12-10,BOREO,4.25,4.25,4.25,3.412212,1000,-0.038899,-0.022973,0.0,,
2,2014-12-11,BOREO,4.25,4.25,4.25,3.412212,0,-0.008717,0.004149,0.0,,
3,2014-12-12,BOREO,4.25,4.25,4.25,3.412212,0,-0.028737,-0.034435,0.0,,
4,2014-12-15,BOREO,4.25,4.25,4.25,3.412212,0,-0.012773,0.006182,0.0,,
5,2014-12-16,BOREO,4.15,4.25,4.15,3.412212,630,-0.019653,-0.021266,0.0,,
6,2014-12-17,BOREO,4.15,4.15,4.15,3.331925,250,-0.001002,0.03718,0.0,,
7,2014-12-18,BOREO,4.15,4.15,4.15,3.331925,230,-0.008863,0.026071,0.0,,
8,2014-12-19,BOREO,4.15,4.15,4.15,3.331925,0,0.0356,-0.012704,0.0,,
9,2014-12-22,BOREO,4.15,4.15,4.15,3.331925,600,-0.020691,-0.006434,0.0,,


In [28]:
data = data.sort_values(by=['date', 'tic']).reset_index(drop=True)
data.head()

Unnamed: 0,date,tic,open,high,low,close,volume,oil_chg,yield_chg,atr,sma,rsi
0,2014-12-09,BOREO,4.25,4.25,4.25,3.412212,0,0.00982,-0.016393,0.0,,
1,2014-12-09,ELISA,23.610001,23.82,23.440001,15.383613,389383,0.00982,-0.016393,0.0,,
2,2014-12-09,ICP1V,0.321893,0.321893,0.311674,0.311674,2388,0.00982,-0.016393,0.0,,
3,2014-12-09,MEKKO,1.85,1.85,1.806,1.334994,11685,0.00982,-0.016393,0.0,,
4,2014-12-09,NDA-FI,10.08,10.15,9.965,5.168291,1273926,0.00982,-0.016393,0.0,,


In [29]:
ticker_counts = data.groupby('tic').size()
print(ticker_counts)
data.isna().sum()

tic
BOREO     2459
ELISA     2459
ICP1V     2459
MEKKO     2459
NDA-FI    2459
NESTE     2459
OLVAS     2459
SAMPO     2459
UPM       2459
YIT       2459
dtype: int64


date           0
tic            0
open           0
high           0
low            0
close          0
volume         0
oil_chg        0
yield_chg      0
atr            0
sma          130
rsi          130
dtype: int64

In [30]:
data=data.dropna()
columns = [col for col in data.columns if col not in ['oil_chg', 'yield_chg']] + ['oil_chg', 'yield_chg']
data = data[columns]
data.head()

Unnamed: 0,date,tic,open,high,low,close,volume,atr,sma,rsi,oil_chg,yield_chg
130,2015-01-02,BOREO,3.8,3.81,3.8,3.058948,137,0.733282,3.333072,0.0,-0.015873,-0.021659
131,2015-01-02,ELISA,22.610001,22.870001,22.549999,14.783914,206279,7.582569,14.951996,37.589471,-0.015873,-0.021659
132,2015-01-02,ICP1V,0.316783,0.34233,0.316783,0.34233,11807,0.024087,0.320433,62.053425,-0.015873,-0.021659
133,2015-01-02,MEKKO,1.8,1.85,1.8,1.356999,19580,0.472451,1.327764,57.769155,-0.015873,-0.021659
134,2015-01-02,NDA-FI,9.665,9.74,9.58,4.964346,873881,4.398773,4.937608,44.544858,-0.015873,-0.021659


In [31]:
data.to_csv('./datasets/data_with_indicators.csv', index=False)

In [32]:
indicator_list = list(data.columns)[7:]
indicator_list

['atr', 'sma', 'rsi', 'oil_chg', 'yield_chg']

In [33]:
df=data

In [34]:
df=df.reset_index(drop=True)

In [35]:
%store df

Stored 'df' (DataFrame)


In [36]:
df

Unnamed: 0,date,tic,open,high,low,close,volume,atr,sma,rsi,oil_chg,yield_chg
0,2015-01-02,BOREO,3.800000,3.810000,3.800000,3.058948,137,0.733282,3.333072,0.000000,-0.015873,-0.021659
1,2015-01-02,ELISA,22.610001,22.870001,22.549999,14.783914,206279,7.582569,14.951996,37.589471,-0.015873,-0.021659
2,2015-01-02,ICP1V,0.316783,0.342330,0.316783,0.342330,11807,0.024087,0.320433,62.053425,-0.015873,-0.021659
3,2015-01-02,MEKKO,1.800000,1.850000,1.800000,1.356999,19580,0.472451,1.327764,57.769155,-0.015873,-0.021659
4,2015-01-02,NDA-FI,9.665000,9.740000,9.580000,4.964346,873881,4.398773,4.937608,44.544858,-0.015873,-0.021659
...,...,...,...,...,...,...,...,...,...,...,...,...
24455,2024-12-30,NESTE,11.785000,12.125000,11.760000,12.125000,2236292,0.484141,12.310714,39.223432,0.002966,-0.016021
24456,2024-12-30,OLVAS,29.400000,29.400000,28.950001,29.200001,9640,0.472103,29.425000,48.853910,0.002966,-0.016021
24457,2024-12-30,SAMPO,39.070000,39.380001,38.959999,39.380001,600560,0.493714,39.519286,43.366923,0.002966,-0.016021
24458,2024-12-30,UPM,26.500000,26.780001,26.430000,26.559999,1045678,0.538349,26.238571,55.556743,0.002966,-0.016021
