In [None]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, SimpleRNN
import numpy as np
import yfinance as yf
from sklearn.model_selection import train_test_split


In [None]:
import yfinance as yf
import numpy as np
import pandas as pd
import tensorflow as tf

In [None]:
from sklearn.preprocessing import MinMaxScaler
import pickle
from tqdm.notebook import tnrange

In [None]:
data = yf.download("AAPL" , start = "2019-01-01" , interval = '1d')

[*********************100%***********************]  1 of 1 completed


In [None]:
data.shape

(1101, 6)

In [None]:
data.head(3)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-01-02,38.7225,39.712502,38.557499,39.48,37.994488,148158800
2019-01-03,35.994999,36.43,35.5,35.547501,34.209965,365248800
2019-01-04,36.1325,37.137501,35.950001,37.064999,35.670353,234428400


In [None]:
# Get the statistics of the data
data.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,1101.0,1101.0,1101.0,1101.0,1101.0,1101.0
mean,114.296687,115.70193,112.994105,114.417915,113.088138,108017500.0
std,43.221281,43.732936,42.705536,43.236375,43.438259,52837270.0
min,35.994999,36.43,35.5,35.547501,34.209965,35195900.0
25%,69.949997,70.752502,69.43,70.004997,68.383171,74602000.0
50%,127.82,129.100006,126.209999,127.449997,125.946915,93604600.0
75%,148.990005,150.720001,147.679993,149.320007,148.274902,124814400.0
max,182.630005,182.940002,179.119995,182.009995,180.43428,426510000.0


In [None]:
import plotly.graph_objects as go

# Check the trend in Closing Values
fig = go.Figure()

fig.add_trace(go.Scatter(x = data.index , y = data['Close'] , mode = 'lines'))
fig.update_layout(height = 500 , width = 900,
                  xaxis_title='Date' , yaxis_title='Close')
fig.show()

In [None]:
# Check the trend in Volume Traded
fig = go.Figure()

fig.add_trace(go.Scatter(x = data.index , y = data['Volume'] , mode = 'lines'))
fig.update_layout(height = 500 , width = 900,
                  xaxis_title='Date' , yaxis_title='Volume')
fig.show()

In [None]:

# Normalize the data
data_close = data["Close"].values
data_mean = np.mean(data_close)
data_std = np.std(data_close)
data_close = (data_close - data_mean) / data_std

In [None]:
# Confirm the Testing Set length
test_length = data[(data.index >= '2022-09-01')].shape[0]

In [None]:
def CreateFeatures_and_Targets(data, feature_length):
    X = []
    Y = []

    for i in tnrange(len(data) - feature_length):
        X.append(data.iloc[i : i + feature_length,:].values)
        Y.append(data["Close"].values[i+feature_length])

    X = np.array(X)
    Y = np.array(Y)

    return X , Y

In [None]:
# Prepare the data
lookback = 50
X = []
Y = []
for i in range(len(data_close) - lookback - 1):
    X.append(data_close[i:(i+lookback)])
    Y.append(data_close[i+lookback])
X = np.array(X)
Y = np.array(Y)

In [None]:

# Split the data into training and testing sets
train_X, test_X, train_Y, test_Y = train_test_split(X, Y, test_size=0.2, random_state=42)


In [None]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler

class MultiDimensionScaler():
    def __init__(self):
        self.scalers = []

    def fit_transform(self, X):
        total_dims = X.shape[1]  # Get the second dimension of X
        for i in range(total_dims):
            Scaler = MinMaxScaler()
            X[:, i] = Scaler.fit_transform(X[:, i].reshape(-1, 1)).flatten()
            self.scalers.append(Scaler)
        return X

    def transform(self, X):
        for i in range(X.shape[1]):
            X[:, i] = self.scalers[i].transform(X[:, i].reshape(-1, 1)).flatten()
        return X




In [None]:
Feature_Scaler = MultiDimensionScaler()
train_X = Feature_Scaler.fit_transform(train_X)
test_X = Feature_Scaler.transform(test_X)

In [None]:
Target_Scaler = MinMaxScaler()
train_Y = Target_Scaler.fit_transform(train_Y.reshape(-1,1))
test_Y = Target_Scaler.transform(test_Y.reshape(-1,1))

In [None]:
def save_object(obj , name : str):
    pickle_out = open(f"{name}.pck","wb")
    pickle.dump(obj, pickle_out)
    pickle_out.close()

def load_object(name : str):
    pickle_in = open(f"{name}.pck","rb")
    data = pickle.load(pickle_in)
    return data

In [None]:
# Save your objects for future purposes
save_object(Feature_Scaler , "Feature_Scaler")
save_object(Target_Scaler , "Target_Scaler")

In [None]:
# Ensure the sizes of training and testing sets
print("Train X shape:", train_X.shape)
print("Train Y shape:", train_Y.shape)
print("Test X shape:", test_X.shape)
print("Test Y shape:", test_Y.shape)

train_X = np.reshape(train_X, (train_X.shape[0], train_X.shape[1], 1))
test_X = np.reshape(test_X, (test_X.shape[0], test_X.shape[1], 1))


Train X shape: (840, 50)
Train Y shape: (840, 1)
Test X shape: (210, 50)
Test Y shape: (210, 1)


In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint , ReduceLROnPlateau

save_best = ModelCheckpoint("best_weights.h5", monitor='val_loss', save_best_only=True, save_weights_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.25,patience=4, min_lr=0.00001,verbose = 1)

In [None]:
# Build the LSTM model
lstm_model = Sequential()
lstm_model.add(LSTM(64, input_shape=(lookback, 1)))
lstm_model.add(Dense(1))
lstm_model.compile(optimizer='adam', loss='mse')



In [None]:
# Train the LSTM model
lstm_model.fit(train_X.reshape(-1, lookback, 1), train_Y, epochs=10, batch_size=32)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f1310117190>

In [None]:

# Extract the output of the LSTM
lstm_output = lstm_model.predict(test_X.reshape(-1, lookback, 1))




In [None]:
# Build the RNN model
rnn_model = Sequential()
rnn_model.add(Dense(64, input_shape=(lookback, 1)))
rnn_model.add(SimpleRNN(16))
rnn_model.add(Dense(1))
rnn_model.compile(optimizer='adam', loss='mse')



In [None]:
# Train the RNN model
rnn_model.fit(lstm_output[lookback-1:-1], test_Y[lookback:], epochs=10, batch_size=32,validation_data=(test_X , test_Y),
            callbacks=[reduce_lr , save_best])


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
1/5 [=====>........................] - ETA: 0s - loss: 0.1270
Epoch 5: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
1/5 [=====>........................] - ETA: 0s - loss: 0.1124
Epoch 9: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 10/10


<keras.callbacks.History at 0x7f1320ffbb50>

In [None]:
# Checking the model Structure
rnn_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_1 (Dense)             (None, 50, 64)            128       
                                                                 
 simple_rnn (SimpleRNN)      (None, 16)                1296      
                                                                 
 dense_2 (Dense)             (None, 1)                 17        
                                                                 
Total params: 1,441
Trainable params: 1,441
Non-trainable params: 0
_________________________________________________________________


In [None]:
# Load the best weights
rnn_model.load_weights("best_weights.h5")

In [None]:
Predictions = rnn_model.predict(test_X)



In [None]:
Predictions = Target_Scaler.inverse_transform(Predictions)
Actual = Target_Scaler.inverse_transform(test_Y)

In [None]:
Predictions = np.squeeze(Predictions , axis = 1)
Actual = np.squeeze(Actual , axis = 1)

In [None]:
# Confirm the Testing Set length
test_length = data[(data.index >= '2022-12-10')].shape[0]

# Creating Sample Test Dataframe
test_dataframe_dict = {'Actual': Actual[-test_length:], 'Predicted': Predictions[-test_length:]}
test_df = pd.DataFrame(test_dataframe_dict)

test_df.index = data.index[-test_length:]


In [None]:
test_df.head()

Unnamed: 0_level_0,Actual,Predicted
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-12-12,0.515358,0.400716
2022-12-13,-1.645032,-0.269609
2022-12-14,-1.472472,-0.182897
2022-12-15,0.892527,0.43755
2022-12-16,-1.453035,-0.169723


In [None]:
# Check the trend in Volume Traded
fig = go.Figure()

fig.add_trace(go.Scatter(x = test_df.index , y = Actual , mode = 'lines' , name='Actual'))
fig.add_trace(go.Scatter(x = test_df.index , y = Predictions , mode = 'lines' , name='Predicted'))
fig.show()

In [None]:
Total_features = np.concatenate((train_X , test_X) , axis = 0)

In [None]:
Total_Targets = np.concatenate((train_Y , test_Y) , axis = 0)

In [None]:
Predictions = rnn_model.predict(Total_features)



In [None]:
Predictions = Target_Scaler.inverse_transform(Predictions)
Actual = Target_Scaler.inverse_transform(Total_Targets)

In [None]:
Predictions = np.squeeze(Predictions , axis = 1)
Actual = np.squeeze(Actual , axis = 1)

In [None]:
# Check the trend in Volume Traded
fig = go.Figure()

fig.add_trace(go.Scatter(x = data.index , y = Actual , mode = 'lines' , name='Actual'))
fig.add_trace(go.Scatter(x = data.index , y = Predictions , mode = 'lines' , name='Predicted'))
fig.show()

In [None]:
test_df


Unnamed: 0_level_0,Actual,Predicted
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-12-12,0.515358,0.400716
2022-12-13,-1.645032,-0.269609
2022-12-14,-1.472472,-0.182897
2022-12-15,0.892527,0.437550
2022-12-16,-1.453035,-0.169723
...,...,...
2023-05-10,1.386317,0.715315
2023-05-11,0.316592,0.285030
2023-05-12,0.382770,0.475797
2023-05-15,0.881420,0.473917


In [None]:
rnn_preds = rnn_model.predict(test_X)



In [None]:
mse = np.mean((test_Y - rnn_preds) ** 2)

In [None]:
print(mse)

0.05397509537244705


In [None]:
rmse = np.sqrt(mse)

In [None]:
print(rmse)

0.23232540836603957
