# LSTM Strategy playground
---

## 1. Import required libraries
---

In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [2]:
import datetime

!pip3 install ta
import ta
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(98)

from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM
from keras.callbacks import ModelCheckpoint
from keras.models import load_model

from sklearn import metrics
from sklearn.preprocessing import StandardScaler



## 2. Configuration variables
---

In [3]:
company_ticker = "ADANIPORTS.NS"
test_train_split_factor = 0.80
prediction_period = 5
activation = 'relu'
optimizer = 'adam'
loss = 'mse'
epochs = 5
batch_size = 1
hidden_layer = []

## 3. Load data
---

In [4]:
df = pd.read_csv(f'./drive/MyDrive/Colab Notebooks/CAPSTONE PROJECT/Storage/{company_ticker}.csv')
df.head()

Unnamed: 0,time_stamp,open,high,low,close,Adj Close,volume
0,2017-01-02 00:00:00+05:30,269.799988,276.899994,267.0,273.799988,266.555084,2622173
1,2017-01-03 00:00:00+05:30,275.200012,277.700012,272.049988,273.950012,266.701111,2064823
2,2017-01-04 00:00:00+05:30,275.149994,276.899994,272.200012,274.899994,267.625977,2741205
3,2017-01-05 00:00:00+05:30,276.299988,288.899994,275.5,287.5,279.892609,5997054
4,2017-01-06 00:00:00+05:30,288.799988,291.799988,285.0,286.049988,278.480957,5651466


In [5]:
df.tail()

Unnamed: 0,time_stamp,open,high,low,close,Adj Close,volume
980,2020-12-23 00:00:00+05:30,467.149994,476.700012,462.5,470.75,467.475006,7943560
981,2020-12-24 00:00:00+05:30,474.899994,482.899994,466.0,478.549988,475.220703,7679078
982,2020-12-28 00:00:00+05:30,480.0,492.950012,480.0,483.549988,480.185944,8372924
983,2020-12-29 00:00:00+05:30,485.0,489.850006,478.200012,483.399994,480.036987,5866837
984,2020-12-30 00:00:00+05:30,484.0,487.600006,478.149994,485.399994,482.023071,3736356


## 4. Calculate returns according to prediction period

In [6]:
df['returns'] = list(df['close'][prediction_period:]) + [None for i in range(prediction_period)]
df['returns'] = ((df['returns'] - df['close']) / df['close'])*100
df.dropna(inplace=True)

## 4. Split into test and train sets

In [7]:
msk = np.random.rand(len(df)) < test_train_split_factor
train_df = df.copy()[msk]
test_df = df.copy()[~msk]

In [8]:
X_train = train_df.copy()
X_train.drop(['returns', 'time_stamp'], axis=1, inplace=True)
y_train = train_df.copy()['returns']

X_test = test_df.copy()
X_test.drop(['returns', 'time_stamp'], axis=1, inplace=True)
y_test = test_df.copy()['returns']

## 5.Calculate all indicators
---

In [9]:
def add_indicators(df):
    df = ta.add_all_ta_features(df, open="open", high="high", low="low", close="close", volume="volume",       fillna=True)
    
    return df


In [10]:
X_train = add_indicators(X_train)
X_test = add_indicators(X_test)

  dip[i] = 100 * (self._dip[i] / self._trs[i])
  din[i] = 100 * (self._din[i] / self._trs[i])
  dip[i] = 100 * (self._dip[i] / self._trs[i])
  din[i] = 100 * (self._din[i] / self._trs[i])


## 6. Scale and Reshape data

---

In [11]:
scaler = StandardScaler()

def scale_and_reshape(df):
    df = df.values
    if len(df.shape) == 1:
        df = df.reshape(df.shape[0], 1)
        df = scaler.fit_transform(df)
    else:
        df = scaler.fit_transform(df)
        df = df.reshape(df.shape[0], df.shape[1], 1)
    return df


In [12]:
print("Before: ")
print("X_train.shape ", X_train.shape)
print("X_test.shape ", X_test.shape)
print("y_train.shape ", y_train.shape)
print("y_test.shape ", y_test.shape)
print()

# Scale and Reshape X_train
X_train_standard_scaled = scale_and_reshape(X_train)

# Scale and Reshape X_test
X_test_standard_scaled = scale_and_reshape(X_test)

# Scale and Reshape y_train
y_train_standard_scaled = scale_and_reshape(y_train)

# Scale and Reshape y_test
y_test_standard_scaled = scale_and_reshape(y_test)

print()
print("After: ")
print("X_train.shape ", X_train_standard_scaled.shape)
print("X_test.shape ", X_test_standard_scaled.shape)
print("y_train.shape ", y_train_standard_scaled.shape)
print("y_test.shape ", y_test_standard_scaled.shape)

Before: 
X_train.shape  (802, 89)
X_test.shape  (178, 89)
y_train.shape  (802,)
y_test.shape  (178,)


After: 
X_train.shape  (802, 89, 1)
X_test.shape  (178, 89, 1)
y_train.shape  (802, 1)
y_test.shape  (178, 1)


In [13]:
# Check if y_test has scaled
# for i in list(zip(list(y_test), list(y_test_standard_scaled))):
#   print(i)

# Check if y_train
# for i in list(zip(list(y_train), list(y_train_standard_scaled))):
#   print(i)

# Check if X_train has scaled
# for i in list(zip( list(X_train['close']), list(X_train_standard_scaled))):
#   print(i[0], i[1][0])

# Check if X_test has scaled
# for i in list(zip( list(X_test['close']), list(X_test_standard_scaled))):
#   print(i[0], i[1][0])


## 7. Define model

In [14]:
model = Sequential()
model.add(LSTM(8, activation=activation, return_sequences=True, input_shape=(89, 1)))
model.add(LSTM(64, activation=activation, return_sequences=True))
model.add(LSTM(8, activation=activation))
model.add(Dense(1))
model.compile(optimizer=optimizer, loss=loss)

In [15]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 89, 8)             320       
_________________________________________________________________
lstm_1 (LSTM)                (None, 89, 64)            18688     
_________________________________________________________________
lstm_2 (LSTM)                (None, 8)                 2336      
_________________________________________________________________
dense (Dense)                (None, 1)                 9         
Total params: 21,353
Trainable params: 21,353
Non-trainable params: 0
_________________________________________________________________


## 8. Train model

In [16]:
checkpoint = ModelCheckpoint(f"./drive/MyDrive/Colab Notebooks/CAPSTONE PROJECT/Models/{company_ticker}/LSTM_test1.h5", monitor='loss', verbose=1,
    save_best_only=True, mode='auto', period=1)

model.fit(X_train_standard_scaled, y_train_standard_scaled, epochs=epochs, batch_size=batch_size, callbacks=[checkpoint])

Epoch 1/5

Epoch 00001: loss improved from inf to 1.00202, saving model to ./drive/MyDrive/Colab Notebooks/CAPSTONE PROJECT/Models/ADANIPORTS.NS/LSTM_test1.h5
Epoch 2/5

Epoch 00002: loss improved from 1.00202 to 1.00064, saving model to ./drive/MyDrive/Colab Notebooks/CAPSTONE PROJECT/Models/ADANIPORTS.NS/LSTM_test1.h5
Epoch 3/5

Epoch 00003: loss did not improve from 1.00064
Epoch 4/5

Epoch 00004: loss did not improve from 1.00064
Epoch 5/5

Epoch 00005: loss did not improve from 1.00064


<keras.callbacks.History at 0x7fcfa45dde10>

## 9. Test model

In [17]:
model = load_model(f"./drive/MyDrive/Colab Notebooks/CAPSTONE PROJECT/Models/{company_ticker}/LSTM_test.h5")

In [18]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 89, 100)           40800     
_________________________________________________________________
lstm_1 (LSTM)                (None, 89, 64)            42240     
_________________________________________________________________
lstm_2 (LSTM)                (None, 64)                33024     
_________________________________________________________________
dense (Dense)                (None, 1)                 65        
Total params: 116,129
Trainable params: 116,129
Non-trainable params: 0
_________________________________________________________________


In [19]:
predictions = model.predict(X_test_standard_scaled)

In [20]:
# for i in range(len(predictions)):
#     print(y_test_standard_scaled[i][0], predictions[i][0])

In [21]:
print('FOR NORMALIZED PRICES')
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test_standard_scaled, predictions))
print('Mean Squared Error:', metrics.mean_squared_error(y_test_standard_scaled, predictions))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test_standard_scaled, predictions)))

FOR NORMALIZED PRICES
Mean Absolute Error: 0.704724294005404
Mean Squared Error: 1.0000277095069157
Root Mean Squared Error: 1.0000138546574822


In [22]:
prediction_percents = scaler.inverse_transform(predictions)
# y_percents = scaler.inverse_transform(y_test)

In [23]:
# for i in range(len(prediction_percents)):
#   print(list(y_test)[i], list(prediction_percents)[i])

In [24]:
print('ERROR VALUES FOR ACTUAL PRICES')
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, prediction_percents))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, prediction_percents))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, prediction_percents)))

ERROR VALUES FOR ACTUAL PRICES
Mean Absolute Error: 3.4431848916380647
Mean Squared Error: 23.872298453953423
Root Mean Squared Error: 4.88592861736164
