In [None]:
from keras.models import Sequential
import numpy as np
import yfinance as yf
from sklearn.model_selection import train_test_split
from keras.layers import LSTM, Dense, Dropout, Conv1D, MaxPooling1D, Flatten


In [None]:
import yfinance as yf
import numpy as np
import pandas as pd
import tensorflow as tf

In [None]:
from sklearn.preprocessing import MinMaxScaler
import pickle
from tqdm.notebook import tnrange

In [None]:
data = yf.download("AAPL" , start = "2019-01-01" , interval = '1d')

[*********************100%***********************]  1 of 1 completed


In [None]:
data.shape

(1126, 6)

In [None]:
data.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-06-15,183.960007,186.520004,183.779999,186.009995,186.009995,65433200
2023-06-16,186.729996,186.990005,184.270004,184.919998,184.919998,101235600
2023-06-20,184.410004,186.100006,184.410004,185.009995,185.009995,49799100
2023-06-21,184.899994,185.410004,182.589996,183.960007,183.960007,49515700
2023-06-22,183.740005,187.050003,183.669998,187.0,187.0,51158300


In [None]:
data.head(3)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-01-02,38.7225,39.712502,38.557499,39.48,37.994492,148158800
2019-01-03,35.994999,36.43,35.5,35.547501,34.209961,365248800
2019-01-04,36.1325,37.137501,35.950001,37.064999,35.670345,234428400


In [None]:
# Get the statistics of the data
data.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,1126.0,1126.0,1126.0,1126.0,1126.0,1126.0
mean,115.728528,117.137349,114.430773,115.856772,114.556519,106993800.0
std,43.788278,44.287583,43.297391,43.813022,44.050809,52758130.0
min,35.994999,36.43,35.5,35.547501,34.209961,35195900.0
25%,70.560627,71.589373,69.831875,71.077497,69.472042,73969800.0
50%,128.834999,130.464996,127.035,129.009995,127.045498,92502550.0
75%,149.879997,151.487503,148.470005,150.369995,149.487064,124138100.0
max,186.729996,187.050003,184.410004,187.0,187.0,426510000.0


In [None]:
import plotly.graph_objects as go

# Check the trend in Closing Values
fig = go.Figure()

fig.add_trace(go.Scatter(x = data.index , y = data['Close'] , mode = 'lines'))
fig.update_layout(height = 500 , width = 900,
                  xaxis_title='Date' , yaxis_title='Close')
fig.show()

In [None]:
# Check the trend in Volume Traded
fig = go.Figure()

fig.add_trace(go.Scatter(x = data.index , y = data['Volume'] , mode = 'lines'))
fig.update_layout(height = 500 , width = 900,
                  xaxis_title='Date' , yaxis_title='Volume')
fig.show()

In [None]:

# Normalize the data
data_close = data["Close"].values
data_mean = np.mean(data_close)
data_std = np.std(data_close)
data_close = (data_close - data_mean) / data_std

In [None]:
# Confirm the Testing Set length
test_length = data[(data.index >= '2022-09-01')].shape[0]

In [None]:
def CreateFeatures_and_Targets(data, feature_length):
    X = []
    Y = []

    for i in tnrange(len(data) - feature_length):
        X.append(data.iloc[i : i + feature_length,:].values)
        Y.append(data["Close"].values[i+feature_length])

    X = np.array(X)
    Y = np.array(Y)

    return X , Y

In [None]:
# Prepare the data
lookback = 50
X = []
Y = []
for i in range(len(data_close) - lookback - 1):
    X.append(data_close[i:(i+lookback)])
    Y.append(data_close[i+lookback])
X = np.array(X)
Y = np.array(Y)

In [None]:

# Split the data into training and testing sets
train_X, test_X, train_Y, test_Y = train_test_split(X, Y, test_size=0.2, random_state=42)


In [None]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler

class MultiDimensionScaler():
    def __init__(self):
        self.scalers = []

    def fit_transform(self, X):
        total_dims = X.shape[1]  # Get the second dimension of X
        for i in range(total_dims):
            Scaler = MinMaxScaler()
            X[:, i] = Scaler.fit_transform(X[:, i].reshape(-1, 1)).flatten()
            self.scalers.append(Scaler)
        return X

    def transform(self, X):
        for i in range(X.shape[1]):
            X[:, i] = self.scalers[i].transform(X[:, i].reshape(-1, 1)).flatten()
        return X




In [None]:
Feature_Scaler = MultiDimensionScaler()
train_X = Feature_Scaler.fit_transform(train_X)
test_X = Feature_Scaler.transform(test_X)

In [None]:
Target_Scaler = MinMaxScaler()
train_Y = Target_Scaler.fit_transform(train_Y.reshape(-1,1))
test_Y = Target_Scaler.transform(test_Y.reshape(-1,1))

In [None]:
def save_object(obj , name : str):
    pickle_out = open(f"{name}.pck","wb")
    pickle.dump(obj, pickle_out)
    pickle_out.close()

def load_object(name : str):
    pickle_in = open(f"{name}.pck","rb")
    data = pickle.load(pickle_in)
    return data

In [None]:
# Save your objects for future purposes
save_object(Feature_Scaler , "Feature_Scaler")
save_object(Target_Scaler , "Target_Scaler")

In [None]:
# Ensure the sizes of training and testing sets
print("Train X shape:", train_X.shape)
print("Train Y shape:", train_Y.shape)
print("Test X shape:", test_X.shape)
print("Test Y shape:", test_Y.shape)

train_X = np.reshape(train_X, (train_X.shape[0], train_X.shape[1], 1))
test_X = np.reshape(test_X, (test_X.shape[0], test_X.shape[1], 1))


Train X shape: (860, 50)
Train Y shape: (860, 1)
Test X shape: (215, 50)
Test Y shape: (215, 1)


In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint , ReduceLROnPlateau

save_best = ModelCheckpoint("new_weights.h5", monitor='val_loss', save_best_only=True, save_weights_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.25,patience=4, min_lr=0.00001,verbose = 1)

In [None]:
from keras.layers import Concatenate, Input
from keras.models import Model

# Define the input layer shape for both LSTM and CNN
input_layer = Input(shape=(train_X.shape[1], 1))

# Build the LSTM model
lstm_model = Sequential()
lstm_model.add(LSTM(units=50, return_sequences=True, input_shape=(train_X.shape[1], 1)))
lstm_model.add(Dropout(0.2))
lstm_model.add(LSTM(units=50))
lstm_model.add(Dropout(0.2))
lstm_model.add(Dense(units=1))

# Build the CNN model
cnn_model = Sequential()
cnn_model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(train_X.shape[1], 1)))
cnn_model.add(MaxPooling1D(pool_size=2))
cnn_model.add(Flatten())
cnn_model.add(Dense(units=50, activation='relu'))

# Concatenate the output from LSTM and CNN
concat_layer = Concatenate()([cnn_model(input_layer) ,lstm_model(input_layer)])

# Build the final model
output_layer = Dense(units=1, activation='linear')(concat_layer)
model = Model(inputs=input_layer, outputs=output_layer)

# Compile and train the CNN model
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(train_X, train_Y, epochs=10, batch_size=32,validation_data=(test_X , test_Y),
            callbacks=[reduce_lr , save_best])


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f2b5826a5c0>

In [None]:
model.save_weights("new_weights.h5")
model.load_weights("new_weights.h5")

In [None]:
# Checking the model Structure
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 50, 1)]      0           []                               
                                                                                                  
 sequential_1 (Sequential)      (None, 50)           77106       ['input_1[0][0]']                
                                                                                                  
 sequential (Sequential)        (None, 1)            30651       ['input_1[0][0]']                
                                                                                                  
 concatenate (Concatenate)      (None, 51)           0           ['sequential_1[0][0]',           
                                                                  'sequential[0][0]']         

In [None]:
Predictions = model.predict(test_X)



In [None]:
Predictions = Target_Scaler.inverse_transform(Predictions)
Actual = Target_Scaler.inverse_transform(test_Y)

In [None]:
# Check the shape of the arrays before squeezing
print(Predictions.shape)
print(Actual.shape)

# Reshape the arrays to remove the axis you don't need
Predictions = Predictions.reshape(-1)
Actual = Actual.reshape(-1)

# Alternatively, you can squeeze the axis with the smallest size
if Predictions.shape[0] == 1:
    Predictions = np.squeeze(Predictions, axis=0)
if Actual.shape[0] == 1:
    Actual = np.squeeze(Actual, axis=0)


(215, 1)
(215, 1)


In [None]:
# Confirm the Testing Set length
test_length = data[(data.index >= '2022-12-10')].shape[0]

# Creating Sample Test Dataframe
test_dataframe_dict = {'Actual': Actual[-test_length:], 'Predicted': Predictions[-test_length:]}
test_df = pd.DataFrame(test_dataframe_dict)

test_df.index = data.index[-test_length:]


In [None]:
test_df.head()

Unnamed: 0_level_0,Actual,Predicted
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-12-12,-0.881848,-0.790369
2022-12-13,-1.030158,-1.043891
2022-12-14,1.286336,1.369719
2022-12-15,0.4184,0.181145
2022-12-16,1.367398,1.182939


In [None]:
# Check the trend in Volume Traded
fig = go.Figure()
fig.add_trace(go.Scatter(x = test_df.index , y = Actual , mode = 'lines' , name='Actual'))
fig.add_trace(go.Scatter(x = test_df.index , y = Predictions , mode = 'lines' , name='Predicted'))
fig.show()

In [None]:
Total_features = np.concatenate((train_X , test_X) , axis = 0)

In [None]:
Total_Targets = np.concatenate((train_Y , test_Y) , axis = 0)

In [None]:
Predictions = model.predict(Total_features)



In [None]:

Predictions = Target_Scaler.inverse_transform(Predictions)
Actual = Target_Scaler.inverse_transform(Total_Targets)

In [None]:
# Check the shape of the arrays before squeezing
print(Predictions.shape)
print(Actual.shape)

# Reshape the arrays to remove the axis you don't need
Predictions = Predictions.reshape(-1)
Actual = Actual.reshape(-1)

# Alternatively, you can squeeze the axis with the smallest size
if Predictions.shape[0] == 1:
    Predictions = np.squeeze(Predictions, axis=0)
if Actual.shape[0] == 1:
    Actual = np.squeeze(Actual, axis=0)


(1075, 1)
(1075, 1)


In [None]:
# Check the trend in Volume Traded
fig = go.Figure()

fig.add_trace(go.Scatter(x = data.index , y = Actual , mode = 'lines' , name='Actual'))
fig.add_trace(go.Scatter(x = data.index , y = Predictions , mode = 'lines' , name='Predicted'))
fig.show()

In [None]:
cnn_preds = model.predict(test_X)



In [None]:
mse = np.mean((test_Y - cnn_preds) ** 2)

In [None]:
mse

0.0008625825064076412

In [None]:
rmse = np.sqrt(mse)

In [None]:
rmse

0.029369754959952272

In [None]:
# Evaluate the model on the test data
test_loss = model.evaluate(test_X, test_Y)

# Print the test loss
print('Test loss:', test_loss)


Test loss: 0.0008625824120827019
