In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc

%matplotlib inline
%config InlineBackend.figure_format='retina'

sns.set(style='whitegrid', palette='muted', font_scale=1.5)

In [2]:
from sklearn.preprocessing import MinMaxScaler


In [3]:
# Custom imports
from cleaning_datasets import loading_dissected_datasets, Dataset

In [4]:
# Global variables
SPLIT_RATIO = 0.8 
TIMESTEP = 1

In [12]:
df = loading_dissected_datasets(1, 3)

In [13]:
def splitting_df(df: pd.DataFrame, split_ratio: float):
    train_size = int(len(df) * split_ratio)
    test_size = len(df) - train_size
    train, test = df.iloc[0:train_size], df.iloc[train_size:len(df)]
    print(len(train), len(test))
    return train, test

In [14]:
def create_dataset(X, y, time_steps=1):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        v = X.iloc[i:(i + time_steps)].values
        Xs.append(v)
        ys.append(y.iloc[i + time_steps])
    return np.array(Xs), np.array(ys)

In [15]:
train, test = splitting_df(df, 0.8)

292 73


In [16]:
print(train.columns)

Index(['AT11', 'AT12', 'AT13', 'AT21', 'AT22', 'AT31', 'AT32', 'BE21', 'BE22',
       'BE23',
       ...
       'UKK3', 'UKK4', 'UKL1', 'UKL2', 'UKM2', 'UKM3', 'UKM5', 'UKM6', 'UKN0',
       'mean'],
      dtype='object', length=256)


In [17]:
X_train, y_train = create_dataset(train, train["mean"], 10)

In [18]:
X_test, y_test = create_dataset(test, test["mean"], 10)

In [13]:
y_pred = model.predict(X_test)

In [19]:
scaler = MinMaxScaler()
total_data = df["mean"].values.tolist()
train_data = total_data[:-30]
test_data = total_data[-30:]
train_data = np.array(train_data).reshape(-1,1)
test_data = np.array(test_data).reshape(-1,1)

train_data = scaler.fit_transform(train_data)
train_data = train_data.reshape(-1)

test_data = scaler.transform(test_data).reshape(-1)

#%% Curve Smoothening (Exponential Moving Average)
'''
Exponential Moving Average is a concept of finance, which removes random noises from the data and gives a clearer picture of the trend of the stock price
'''
EMA = 0.0
gamma = 0.3
for ti in range(train_data.shape[0]):
  EMA = gamma*train_data[ti] + (1-gamma)*EMA
  train_data[ti] = EMA

#%% Getting training and testing data

'''
Now, this code creates a series of matrices.
X_train is the matrix which contains prices of 80 consecutive days, starting from day 80th to the last day of training data
Y_train is the target value of 81st day.
The concept is to train 80 days of data to predict the 81st day price, now this is done again with shifting the date window by one day
'''

jump=1
lookback = 80
X_train,y_train = [],[]
for i in range(lookback,train_data.size,jump):
    X_train.append(train_data[i-lookback:i])
    y_train.append(train_data[i])
X_train,y_train = np.array(X_train),np.array(y_train)
 
X_train = np.reshape(X_train,(X_train.shape[0],X_train.shape[1],1))

In [20]:
regressor = keras.Sequential()

regressor.add(keras.layers.LSTM(units = 128, return_sequences = True, input_shape=(lookback,1)))
regressor.add(keras.layers.Dropout(0.2))

regressor.add(keras.layers.LSTM(units = 64, return_sequences = True))
regressor.add(keras.layers.Dropout(0.15))

regressor.add(keras.layers.LSTM(units = 32, return_sequences = True))
regressor.add(keras.layers.Dropout(0.15))

regressor.add(keras.layers.LSTM(units = 64, return_sequences = False))
regressor.add(keras.layers.Dropout(0.15))

regressor.add(keras.layers.Dense(units=64,activation='relu'))
regressor.add(keras.layers.Dense(units=32,activation='relu'))
regressor.add(keras.layers.Dense(units=16,activation='relu'))
regressor.add(keras.layers.Dense(units=8,activation='tanh'))
regressor.add(keras.layers.Dense(units=1))

regressor.compile(optimizer='adam',loss="mean_squared_error",metrics=["accuracy"])

regressor.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_4 (LSTM)                (None, 80, 128)           66560     
_________________________________________________________________
dropout_4 (Dropout)          (None, 80, 128)           0         
_________________________________________________________________
lstm_5 (LSTM)                (None, 80, 64)            49408     
_________________________________________________________________
dropout_5 (Dropout)          (None, 80, 64)            0         
_________________________________________________________________
lstm_6 (LSTM)                (None, 80, 32)            12416     
_________________________________________________________________
dropout_6 (Dropout)          (None, 80, 32)            0         
_________________________________________________________________
lstm_7 (LSTM)                (None, 64)               

In [21]:
history = regressor.fit(
    X_train, y_train,
    epochs=30,
    batch_size=16,
    validation_split=0.1,
    verbose=1,
    shuffle=False
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [35]:
X_test = []

inputs =  df["mean"][(len(df)-lookback):].values
inputs = inputs.reshape(-1,1)
inputs = scaler.transform(inputs)

for i in range(lookback, lookback+20):
    X_test.append(inputs[i-lookback:i, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (20, lookback ,1))

# real_stock_price = np.array(real_stock_price)

#%%
'''Predicting and inverse-transform the prices for the 30 days'''
predicted = regressor.predict(X_test)
predicted = scaler.inverse_transform(predicted)



  X_test = np.array(X_test)


ValueError: cannot reshape array of size 20 into shape (20,80,1)

In [30]:
X_test.shape

(63, 10, 256)