In [90]:
import yfinance as yf
import numpy as np
import pandas as pd
import tensorflow as tf

In [91]:
data = yf.download("GOOGL" , start = "2018-01-01" , interval = '1d')

[*********************100%***********************]  1 of 1 completed


In [92]:
data.shape

(935, 6)

In [93]:
data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-02,1053.02002,1075.97998,1053.02002,1073.209961,1073.209961,1588300
2018-01-03,1073.930054,1096.099976,1073.430054,1091.52002,1091.52002,1565900
2018-01-04,1097.089966,1104.079956,1094.26001,1095.76001,1095.76001,1302600
2018-01-05,1103.449951,1113.579956,1101.800049,1110.290039,1110.290039,1512500
2018-01-08,1111.0,1119.160034,1110.0,1114.209961,1114.209961,1232200


Understanding Trends with in the Data

In [94]:
# Sort the data points based on indexes just for confirmation 
data.sort_index(inplace = True)

In [95]:
# Remove any duplicate index 
data = data.loc[~data.index.duplicated(keep='first')]

In [96]:
data.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-09-13,2838.98999,2859.0,2825.0,2846.649902,2846.649902,1104700
2021-09-14,2859.909912,2867.26001,2835.0,2850.889893,2850.889893,983700
2021-09-15,2855.429932,2889.01001,2826.949951,2888.590088,2888.590088,1370400
2021-09-16,2879.01001,2886.0,2848.030029,2872.199951,2872.199951,1309500
2021-09-17,2860.610107,2869.0,2809.399902,2816.0,2816.0,2666800


In [97]:
# Check for missing values 
data.isnull().sum()

Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [98]:
# Get the statistics of the data
data.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,935.0,935.0,935.0,935.0,935.0,935.0
mean,1467.277314,1482.341516,1452.696515,1468.255208,1468.255208,1789722.0
std,474.353982,477.024648,471.882986,474.930518,474.930518,843575.4
min,984.320007,1012.119995,977.659973,984.669983,984.669983,465600.0
25%,1135.734985,1146.934998,1123.565002,1136.279968,1136.279968,1250250.0
50%,1252.209961,1266.079956,1240.209961,1255.839966,1255.839966,1572200.0
75%,1586.980042,1611.150024,1571.410034,1585.570007,1585.570007,2067150.0
max,2904.320068,2925.080078,2897.669922,2904.310059,2904.310059,6658900.0


In [99]:
import plotly.graph_objects as go

# Check the trend in Closing Values 
fig = go.Figure()

fig.add_trace(go.Scatter(x = data.index , y = data['Close'] , mode = 'lines'))
fig.update_layout(height = 500 , width = 900, 
                  xaxis_title='Date' , yaxis_title='Close')
fig.show()

In [100]:
# Check the trend in Volume Traded
fig = go.Figure()

fig.add_trace(go.Scatter(x = data.index , y = data['Volume'] , mode = 'lines'))
fig.update_layout(height = 500 , width = 900, 
                  xaxis_title='Date' , yaxis_title='Volume')
fig.show()

Data Preparation

In [101]:
from sklearn.preprocessing import MinMaxScaler 
import pickle 
from tqdm.notebook import tnrange

In [102]:
# Filter only required data 
data = data[['Close' , 'Volume']]
data.head(3)

Unnamed: 0_level_0,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-01-02,1073.209961,1588300
2018-01-03,1091.52002,1565900
2018-01-04,1095.76001,1302600


In [103]:
# Confirm the Testing Set length 
test_length = data[(data.index >= '2020-09-01')].shape[0]

In [104]:
def CreateFeatures_and_Targets(data, feature_length):
    X = []
    Y = []

    for i in tnrange(len(data) - feature_length): 
        X.append(data.iloc[i : i + feature_length,:].values)
        Y.append(data["Close"].values[i+feature_length])

    X = np.array(X)
    Y = np.array(Y)

    return X , Y

In [105]:
X , Y = CreateFeatures_and_Targets(data , 32)

  0%|          | 0/903 [00:00<?, ?it/s]

In [106]:
# Check the shapes
X.shape , Y.shape

((903, 32, 2), (903,))

In [107]:
X_train , X_test , Y_train , Y_test = X[:-test_length] , X[-test_length:] , Y[:-test_length] , Y[-test_length:]

In [108]:
# Check Training Dataset Shape 
X_train.shape , Y_train.shape

((639, 32, 2), (639,))

In [109]:
# Check Testing Dataset Shape
X_test.shape , Y_test.shape

((264, 32, 2), (264,))

In [110]:
# Create a Scaler to Scale Vectors with Multiple Dimensions 
class MultiDimensionScaler():
    def __init__(self):
        self.scalers = []

    def fit_transform(self , X):
        total_dims = X.shape[2]
        for i in range(total_dims):
            Scaler = MinMaxScaler()
            X[:, :, i] = Scaler.fit_transform(X[:, :, i])
            self.scalers.append(Scaler)
        return X

    def transform(self , X):
        for i in range(X.shape[2]):
            X[:, :, i] = self.scalers[i].transform(X[:,:,i])
        return X 

In [111]:
Feature_Scaler = MultiDimensionScaler()
X_train = Feature_Scaler.fit_transform(X_train)
X_test = Feature_Scaler.transform(X_test)

In [112]:
Target_Scaler = MinMaxScaler()
Y_train = Target_Scaler.fit_transform(Y_train.reshape(-1,1))
Y_test = Target_Scaler.transform(Y_test.reshape(-1,1))

In [113]:
def save_object(obj , name : str):
    pickle_out = open(f"{name}.pck","wb")
    pickle.dump(obj, pickle_out)
    pickle_out.close()

def load_object(name : str):
    pickle_in = open(f"{name}.pck","rb")
    data = pickle.load(pickle_in)
    return data

In [114]:
# Save your objects for future purposes 
save_object(Feature_Scaler , "Feature_Scaler")
save_object(Target_Scaler , "Target_Scaler")

Model Building

In [115]:
from tensorflow.keras.callbacks import ModelCheckpoint , ReduceLROnPlateau

save_best = ModelCheckpoint("best_weights.h5", monitor='val_loss', save_best_only=True, save_weights_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.25,patience=4, min_lr=0.00001,verbose = 1)

In [116]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense , Dropout , LSTM , Bidirectional

model = Sequential()

model.add(Bidirectional(LSTM(512 ,return_sequences=True , recurrent_dropout=0.1, input_shape=(32, 2))))
model.add(LSTM(256 ,recurrent_dropout=0.1))
model.add(Dropout(0.3))
model.add(Dense(64 , activation='elu'))
model.add(Dropout(0.3))
model.add(Dense(32 , activation='elu'))
model.add(Dense(1 , activation='linear'))



In [117]:
#optimizer = tf.keras.optimizers.Adam(learning_rate=0.002)
optimizer = tf.keras.optimizers.SGD(learning_rate = 0.002)
model.compile(loss='mse', optimizer=optimizer)

In [118]:
!nvidia-smi

Sun Sep 19 13:09:50 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 471.96       Driver Version: 471.96       CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ... WDDM  | 00000000:01:00.0 Off |                  N/A |
| N/A   45C    P0    12W /  N/A |   2721MiB /  4096MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [119]:
history = model.fit(X_train, Y_train,
            epochs=10,
            batch_size = 1,
            verbose=1,
            shuffle=False ,
            validation_data=(X_test , Y_test),
            callbacks=[reduce_lr , save_best])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [120]:
# Checking the model Structure 
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_3 (Bidirection (1, 32, 1024)             2109440   
_________________________________________________________________
lstm_7 (LSTM)                (1, 256)                  1311744   
_________________________________________________________________
dropout_6 (Dropout)          (1, 256)                  0         
_________________________________________________________________
dense_9 (Dense)              (1, 64)                   16448     
_________________________________________________________________
dropout_7 (Dropout)          (1, 64)                   0         
_________________________________________________________________
dense_10 (Dense)             (1, 32)                   2080      
_________________________________________________________________
dense_11 (Dense)             (1, 1)                   

In [121]:
# Load the best weights
model.load_weights("best_weights.h5")

Visualize prediction on Test Set

In [122]:
Predictions = model.predict(X_test)

In [123]:
Predictions = Target_Scaler.inverse_transform(Predictions)
Actual = Target_Scaler.inverse_transform(Y_test)

In [124]:
Predictions = np.squeeze(Predictions , axis = 1)
Actual = np.squeeze(Actual , axis = 1)

In [125]:
# Creating Sample Test Dataframe
test_dataframe_dict = {'Actual' : list(Actual) , 'Predicted' : list(Predictions)}
test_df = pd.DataFrame.from_dict(test_dataframe_dict)

test_df.index = data.index[-test_length:]

In [126]:
test_df.head()

Unnamed: 0_level_0,Actual,Predicted
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-09-01,1655.079956,1610.120483
2020-09-02,1717.390015,1617.095581
2020-09-03,1629.51001,1622.568848
2020-09-04,1581.209961,1621.039062
2020-09-08,1523.599976,1617.3927


In [127]:
# Check the trend in Volume Traded
fig = go.Figure()

fig.add_trace(go.Scatter(x = test_df.index , y = Actual , mode = 'lines' , name='Actual'))
fig.add_trace(go.Scatter(x = test_df.index , y = Predictions , mode = 'lines' , name='Predicted'))
fig.show()

Visualize Prediction on whole data

In [128]:
Total_features = np.concatenate((X_train , X_test) , axis = 0)

In [129]:
Total_Targets = np.concatenate((Y_train , Y_test) , axis = 0)

In [130]:
Predictions = model.predict(Total_features)

In [131]:
Predictions = Target_Scaler.inverse_transform(Predictions)
Actual = Target_Scaler.inverse_transform(Total_Targets)

In [132]:
Predictions = np.squeeze(Predictions , axis = 1)
Actual = np.squeeze(Actual , axis = 1)

In [133]:
# Check the trend in Volume Traded
fig = go.Figure()

fig.add_trace(go.Scatter(x = data.index , y = Actual , mode = 'lines' , name='Actual'))
fig.add_trace(go.Scatter(x = data.index , y = Predictions , mode = 'lines' , name='Predicted'))
fig.show()