In [1]:
!pip install yfinance



Loading main data

In [81]:
import yfinance as yf
import numpy as np
import pandas as pd
import tensorflow as tf

In [157]:
data = yf.download("GOOGL" , start = "2018-01-01" , interval = '1d')

[*********************100%%**********************]  1 of 1 completed


In [158]:
data.head(3)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-02,52.651001,53.799,52.651001,53.6605,53.6605,31766000
2018-01-03,53.696499,54.805,53.671501,54.576,54.576,31318000
2018-01-04,54.8545,55.203999,54.713001,54.787998,54.787998,26052000


In [159]:
# Sort the data points based on indexes just for confirmation
data.sort_index(inplace = True)

In [160]:
# Remove any duplicate index
data = data.loc[~data.index.duplicated(keep='first')]

In [161]:
data.tail(3)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-09-29,133.279999,134.050003,130.360001,130.860001,130.860001,30848100
2023-10-02,131.210007,134.419998,131.169998,134.169998,134.169998,22288000
2023-10-03,133.940002,134.259995,131.839996,132.429993,132.429993,22971100


In [162]:
data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-02,52.651001,53.799,52.651001,53.6605,53.6605,31766000
2018-01-03,53.696499,54.805,53.671501,54.576,54.576,31318000
2018-01-04,54.8545,55.203999,54.713001,54.787998,54.787998,26052000
2018-01-05,55.172501,55.679001,55.09,55.5145,55.5145,30250000
2018-01-08,55.549999,55.958,55.5,55.710499,55.710499,24644000


In [163]:
# Check for missing values
data.isnull().sum()

Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [164]:
# Get the statistics of the data
data.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,1448.0,1448.0,1448.0,1448.0,1448.0,1448.0
mean,89.328625,90.356061,88.346511,89.371988,89.371988,35091170.0
std,30.697306,31.013744,30.360253,30.679212,30.679212,15735670.0
min,49.216,50.605999,48.882999,49.233501,49.233501,9312000.0
25%,59.789001,60.28925,59.201624,59.823126,59.823126,25149500.0
50%,86.328999,87.320499,85.433002,86.379498,86.379498,31138000.0
75%,116.218748,117.904623,114.866999,116.57325,116.57325,40074500.0
max,151.25,151.546494,148.899002,149.838501,149.838501,133178000.0


Understanding Trends with in the Data

In [165]:
import plotly.graph_objects as go

In [166]:
# Check the trend in Closing Values
fig = go.Figure()

fig.add_trace(go.Scatter(x = data.index , y = data['Close'] , mode = 'lines'))
fig.update_layout(height = 500 , width = 900,
                  xaxis_title='Date' , yaxis_title='Close')
fig.show()

In [167]:
# Check the trend in Volume Traded
fig = go.Figure()

fig.add_trace(go.Scatter(x = data.index , y = data['Volume'] , mode = 'lines'))
fig.update_layout(height = 500 , width = 900,
                  xaxis_title='Date' , yaxis_title='Volume')
fig.show()

Data Preparation

In [168]:
from sklearn.preprocessing import MinMaxScaler
import pickle
from tqdm.notebook import tnrange

In [169]:
# Filter only required data
data = data[['Close' , 'Volume']]
data.head(3)

Unnamed: 0_level_0,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-01-02,53.6605,31766000
2018-01-03,54.576,31318000
2018-01-04,54.787998,26052000


Scrapping extra information

In [170]:
import requests

response = requests.get('https://www.alphavantage.co/query?function=RSI&symbol=GOOGL&interval=daily&time_period=5&series_type=close&apikey=43T9T17VCV2ME4SM')
response = response.json()

In [171]:
response.keys()

dict_keys(['Meta Data', 'Technical Analysis: RSI'])

In [172]:
rsi_data = pd.DataFrame.from_dict(response['Technical Analysis: RSI'] , orient='index')

In [173]:
rsi_data.head()

Unnamed: 0,RSI
2004-08-26,72.7683
2004-08-27,64.2686
2004-08-30,47.8388
2004-08-31,49.249
2004-09-01,41.0745


In [174]:
rsi_data = rsi_data[rsi_data.index >= '2018-01-01']

In [175]:
rsi_data['RSI'] = rsi_data['RSI'].astype(np.float64)

In [176]:
rsi_data.head()

Unnamed: 0,RSI
2018-01-02,65.431
2018-01-03,78.3686
2018-01-04,80.483
2018-01-05,86.2429
2018-01-08,87.4882


In [177]:
data = data.reset_index()
rsi_data = rsi_data.reset_index().rename(columns = {'index':'Date'})

In [178]:
data

Unnamed: 0,Date,Close,Volume
0,2018-01-02,53.660500,31766000
1,2018-01-03,54.576000,31318000
2,2018-01-04,54.787998,26052000
3,2018-01-05,55.514500,30250000
4,2018-01-08,55.710499,24644000
...,...,...,...
1443,2023-09-27,130.539993,22746500
1444,2023-09-28,132.309998,22513100
1445,2023-09-29,130.860001,30848100
1446,2023-10-02,134.169998,22288000


In [183]:
rsi_data['Date'] = pd.to_datetime(rsi_data['Date'])

In [185]:
data = data.merge(rsi_data, how='left', on = 'Date')

In [187]:
data = data.set_index('Date')

In [188]:
data

Unnamed: 0_level_0,Close,Volume,RSI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-01-02,53.660500,31766000,65.4310
2018-01-03,54.576000,31318000,78.3686
2018-01-04,54.787998,26052000,80.4830
2018-01-05,55.514500,30250000,86.2429
2018-01-08,55.710499,24644000,87.4882
...,...,...,...
2023-09-27,130.539993,22746500,38.2581
2023-09-28,132.309998,22513100,50.9562
2023-09-29,130.860001,30848100,42.0916
2023-10-02,134.169998,22288000,61.3015


In [189]:
# Confirm the Testing Set length
test_length = data[(data.index >= '2023-05-01')].shape[0]

In [190]:
def CreateFeatures_and_Targets(data, feature_length):
    X = []
    Y = []

    for i in tnrange(len(data) - feature_length):
        X.append(data.iloc[i : i + feature_length,:].values)
        Y.append(data["Close"].values[i+feature_length])

    X = np.array(X)
    Y = np.array(Y)

    return X , Y

In [191]:
X , Y = CreateFeatures_and_Targets(data , 32)

  0%|          | 0/1416 [00:00<?, ?it/s]

In [192]:
# Check the shapes
X.shape , Y.shape

((1416, 32, 3), (1416,))

In [193]:
Xtrain , Xtest , Ytrain , Ytest = X[:-test_length] , X[-test_length:] , Y[:-test_length] , Y[-test_length:]

In [194]:
# Check Training Dataset Shape
Xtrain.shape , Ytrain.shape

((1308, 32, 3), (1308,))

In [195]:
# Check Testing Dataset Shape
Xtest.shape , Ytest.shape

((108, 32, 3), (108,))

In [196]:
# Create a Scaler to Scale Vectors with Multiple Dimensions
class MultiDimensionScaler():
    def __init__(self):
        self.scalers = []

    def fit_transform(self , X):
        total_dims = X.shape[2]
        for i in range(total_dims):
            Scaler = MinMaxScaler()
            X[:, :, i] = Scaler.fit_transform(X[:,:,i])
            self.scalers.append(Scaler)
        return X

    def transform(self , X):
        for i in range(X.shape[2]):
            X[:, :, i] = self.scalers[i].transform(X[:,:,i])
        return X

In [197]:
Feature_Scaler = MultiDimensionScaler()
Xtrain = Feature_Scaler.fit_transform(Xtrain)
Xtest = Feature_Scaler.transform(Xtest)

In [198]:
Target_Scaler = MinMaxScaler()
Ytrain = Target_Scaler.fit_transform(Ytrain.reshape(-1,1))
Ytest = Target_Scaler.transform(Ytest.reshape(-1,1))

In [199]:
def save_object(obj , name : str):
    pickle_out = open(f"{name}.pck","wb")
    pickle.dump(obj, pickle_out)
    pickle_out.close()

def load_object(name : str):
    pickle_in = open(f"{name}.pck","rb")
    data = pickle.load(pickle_in)
    return data

In [200]:
# Save your objects for future purposes
save_object(Feature_Scaler , "Feature_Scaler")
save_object(Target_Scaler , "Target_Scaler")

Model Building

In [201]:
from tensorflow.keras.callbacks import ModelCheckpoint , ReduceLROnPlateau

save_best = ModelCheckpoint("best_weights.h5", monitor='val_loss', save_best_only=True, save_weights_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.25,patience=5, min_lr=0.00001,verbose = 1)

In [202]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense , Dropout , LSTM , Bidirectional , BatchNormalization

model = Sequential()

model.add(Bidirectional(LSTM(512 ,return_sequences=True , recurrent_dropout=0.1, input_shape=(32, 3))))
model.add(LSTM(256 ,recurrent_dropout=0.1))
model.add(Dropout(0.3))
model.add(Dense(64 , activation='elu'))
model.add(Dropout(0.3))
model.add(Dense(32 , activation='elu'))
model.add(Dense(1 , activation='linear'))



In [203]:
optimizer = tf.keras.optimizers.SGD(learning_rate = 0.002)
model.compile(loss='mse', optimizer=optimizer)

In [204]:
history = model.fit(Xtrain, Ytrain,
            epochs=10,
            batch_size = 1,
            verbose=1,
            shuffle=False ,
            validation_data=(Xtest , Ytest),
            callbacks=[reduce_lr , save_best])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [205]:
# Load the best weights
model.load_weights("best_weights.h5")

Visualize prediction on Test Set

In [206]:
Predictions = model.predict(Xtest)



In [207]:
Predictions = Target_Scaler.inverse_transform(Predictions)
Actual = Target_Scaler.inverse_transform(Ytest)

In [208]:
Predictions.shape

(108, 1)

In [209]:
Predictions = np.squeeze(Predictions , axis = 1)
Actual = np.squeeze(Actual , axis = 1)

In [210]:
# Check the Predictions vs Actual
fig = go.Figure()

fig.add_trace(go.Scatter(x = data.index[-test_length:] , y = Actual , mode = 'lines' , name='Actual'))
fig.add_trace(go.Scatter(x = data.index[-test_length:] , y = Predictions , mode = 'lines' , name='Predicted'))
fig.show()

Visualize Prediction on whole data

In [211]:
Total_features = np.concatenate((Xtrain , Xtest) , axis = 0)

In [212]:
Total_Targets = np.concatenate((Ytrain , Ytest) , axis = 0)

In [213]:
Predictions = model.predict(Total_features)



In [214]:
Predictions = Target_Scaler.inverse_transform(Predictions)
Actual = Target_Scaler.inverse_transform(Total_Targets)

In [215]:
Predictions = np.squeeze(Predictions , axis = 1)
Actual = np.squeeze(Actual , axis = 1)

In [216]:
# Check the trend in Volume Traded
fig = go.Figure()

fig.add_trace(go.Scatter(x = data.index , y = Actual , mode = 'lines' , name='Actual'))
fig.add_trace(go.Scatter(x = data.index , y = Predictions , mode = 'lines' , name='Predicted'))
fig.show()

In [217]:
# Save and Load the whole model
model.save("Model.h5")
loaded_model = tf.keras.models.load_model("Model.h5")


You are saving your model as an HDF5 file via `model.save()`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')`.



Realtime Prediction

In [218]:
def PredictStockPrice(Model , DataFrame , PreviousDate , feature_length = 32):
    idx_location = DataFrame.index.get_loc(PreviousDate)
    Features = DataFrame.iloc[idx_location - feature_length : idx_location,:].values
    Features = np.expand_dims(Features , axis = 0)
    Features = Feature_Scaler.transform(Features)
    Prediction = Model.predict(Features)
    Prediction = Target_Scaler.inverse_transform(Prediction)
    return Prediction[0][0]

In [219]:
PredictStockPrice(loaded_model , data , '2021-01-14')



97.657364