In [1]:
import pandas as pd
data = pd.read_csv('apple_stock_data.csv')

In [2]:
data

Unnamed: 0,Date,Adj Close,Close,High,Low,Open,Volume
0,2023-11-02 00:00:00+00:00,176.665985,177.570007,177.779999,175.460007,175.520004,77334800
1,2023-11-03 00:00:00+00:00,175.750671,176.649994,176.820007,173.350006,174.240005,79763700
2,2023-11-06 00:00:00+00:00,178.317520,179.229996,179.429993,176.210007,176.380005,63841300
3,2023-11-07 00:00:00+00:00,180.894333,181.820007,182.440002,178.970001,179.179993,70530000
4,2023-11-08 00:00:00+00:00,181.958893,182.889999,183.449997,181.589996,182.350006,49340300
...,...,...,...,...,...,...,...
247,2024-10-28 00:00:00+00:00,233.399994,233.399994,234.729996,232.550003,233.320007,36087100
248,2024-10-29 00:00:00+00:00,233.669998,233.669998,234.330002,232.320007,233.100006,35417200
249,2024-10-30 00:00:00+00:00,230.100006,230.100006,233.470001,229.550003,232.610001,47070900
250,2024-10-31 00:00:00+00:00,225.910004,225.910004,229.830002,225.369995,229.339996,64370100


In [3]:
data['Date'] = pd.to_datetime(data['Date'])
data.set_index('Date', inplace=True)
data=data[['Close']]

In [4]:
data

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2023-11-02 00:00:00+00:00,177.570007
2023-11-03 00:00:00+00:00,176.649994
2023-11-06 00:00:00+00:00,179.229996
2023-11-07 00:00:00+00:00,181.820007
2023-11-08 00:00:00+00:00,182.889999
...,...
2024-10-28 00:00:00+00:00,233.399994
2024-10-29 00:00:00+00:00,233.669998
2024-10-30 00:00:00+00:00,230.100006
2024-10-31 00:00:00+00:00,225.910004


In [5]:
# using LSTM (as it is time series data) and Linear regression
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,1))
data['Close'] = scaler.fit_transform(data[['Close']])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Close'] = scaler.fit_transform(data[['Close']])


In [6]:
data

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2023-11-02 00:00:00+00:00,0.175853
2023-11-03 00:00:00+00:00,0.162983
2023-11-06 00:00:00+00:00,0.199077
2023-11-07 00:00:00+00:00,0.235311
2023-11-08 00:00:00+00:00,0.250280
...,...
2024-10-28 00:00:00+00:00,0.956911
2024-10-29 00:00:00+00:00,0.960688
2024-10-30 00:00:00+00:00,0.910744
2024-10-31 00:00:00+00:00,0.852127


In [7]:
# preparign data for LSTM by creating sequences of a defined length to predict next day's price
import numpy as np
def create_sequences(data,seq_length=60):
    X,y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)

seq_length = 60
X,y = create_sequences(data['Close'].values, seq_length)

In [8]:
# splitting the dataset
train_size = int(len(X)*0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

In [32]:
y

array([0.27140452, 0.30581984, 0.29169009, 0.31729147, 0.3399553 ,
       0.3414942 , 0.32624523, 0.33365987, 0.30987682, 0.28035806,
       0.26790704, 0.26385005, 0.24216562, 0.23167317, 0.24230566,
       0.27098484, 0.2451036 , 0.22607729, 0.2466425 , 0.22971459,
       0.22034137, 0.2050924 , 0.14129836, 0.07162836, 0.05763844,
       0.05595971, 0.08016223, 0.10842194, 0.11513705, 0.08575833,
       0.11191942, 0.10660318, 0.12199219, 0.15500843, 0.19124229,
       0.08911577, 0.10184666, 0.08184116, 0.06589266, 0.11625627,
       0.09065467, 0.07036932, 0.05372127, 0.06505308, 0.05344163,
       0.0640739 , 0.04826521, 0.06533294, 0.03889198, 0.14045878,
       0.16158371, 0.10758258, 0.06127595, 0.04196978, 0.02853936,
       0.        , 0.01175149, 0.02658078, 0.05623957, 0.06841074,
       0.06015673, 0.11891439, 0.07456634, 0.06015673, 0.11233911,
       0.25713495, 0.23377179, 0.24342466, 0.2481814 , 0.273783  ,
       0.25251824, 0.29770565, 0.31379398, 0.34583104, 0.34750

In [10]:
# building the sequential LSTM model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

lstm_model = Sequential()
lstm_model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
lstm_model.add(LSTM(units=50))
lstm_model.add(Dense(1))

In [11]:
#compiling model
lstm_model.compile(optimizer='adam', loss='mean_squared_error')
lstm_model.fit(X_train,y_train,epochs=20, batch_size = 32)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x1b33ad5ccd0>

In [12]:
#Training second model, Linear regression (by generating lagged features)
data['Lag_1']=data['Close'].shift(1) # t-1
data['Lag_2']=data['Close'].shift(2) # t-2
data['Lag_3']=data['Close'].shift(3) # t-3
data = data.dropna() # dropping the Nan value time stamps (first 3)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Lag_1']=data['Close'].shift(1) # t-1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Lag_2']=data['Close'].shift(2) # t-2
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Lag_3']=data['Close'].shift(3) # t-3


In [30]:
data_lin = data
data_lin = data_lin.iloc[57:].reset_index(drop=True)
data_lin

Unnamed: 0,Close,Lag_1,Lag_2,Lag_3
0,0.271405,0.322328,0.373951,0.383604
1,0.305820,0.271405,0.322328,0.373951
2,0.291690,0.305820,0.271405,0.322328
3,0.317291,0.291690,0.305820,0.271405
4,0.339955,0.317291,0.291690,0.305820
...,...,...,...,...
187,0.956911,0.929071,0.917320,0.919978
188,0.960688,0.956911,0.929071,0.917320
189,0.910744,0.960688,0.956911,0.929071
190,0.852127,0.910744,0.960688,0.956911


In [33]:
#spilitting the dataset
X_lin = data_lin[['Lag_1', 'Lag_2', 'Lag_3']]
y_lin = data_lin['Close']
X_train_lin,X_test_lin = X_lin[:train_size], X_lin[train_size:]
y_train_lin, y_test_lin = y_lin[:train_size], y_lin[train_size:]

In [35]:
#train the model
from sklearn.linear_model import LinearRegression
lin_model = LinearRegression()
lin_model.fit(X_train_lin, y_train_lin)

In [36]:
# LSTM predictions
X_test_lstm = X_test.reshape((X_test.shape[0], X_test.shape[1],1))
lstm_predictions = lstm_model.predict(X_test_lstm)
lstm_predictions = scaler.inverse_transform(lstm_predictions)



In [40]:
# Linear regression predictions
lin_predictions = lin_model.predict(X_test_lin)
lin_predictions_2d = lin_predictions.reshape(-1,1)
lin_predictions = scaler.inverse_transform(lin_predictions_2d)

In [63]:
# Hybrid Model
alpha = 0.3 # tunable parameter
beta = 0.7 # tunable parameter
hybrid_predictions = (alpha * lstm_predictions) + (beta * lin_predictions)

In [64]:
# Model Evaluation
from sklearn.metrics import mean_squared_error
#LSTM
Y_test = scaler.inverse_transform(y_test.reshape(-1,1))
LSTM_MSE = mean_squared_error(Y_test, lstm_predictions)
Linear_reg_MSE = mean_squared_error(Y_test, lin_predictions)
hybrid_MSE = mean_squared_error(Y_test, hybrid_predictions)
print("LSTM : ",LSTM_MSE,"Linear_regression : ",Linear_reg_MSE,"hybrid : ",hybrid_MSE)

LSTM :  25.530041594374676 Linear_regression :  9.547688481665874 hybrid :  9.823698078081806


In [60]:
# Prediction for next 10 days using LSTM
lstm_future_predictions = []
last_sequence = X[-1].reshape(1, seq_length, 1)
for _ in range(10):
    lstm_pred = lstm_model.predict(last_sequence)[0, 0]
    lstm_future_predictions.append(lstm_pred)
    lstm_pred_reshaped = np.array([[lstm_pred]]).reshape(1, 1, 1)
    last_sequence = np.append(last_sequence[:, 1:, :], lstm_pred_reshaped, axis=1)
lstm_future_predictions = scaler.inverse_transform(np.array(lstm_future_predictions).reshape(-1, 1))



In [61]:
# Prediction for next 10 days using Linear regression
recent_data = data['Close'].values[-3:] # last three values 
lin_future_predictions = []
for _ in range(10):
    lin_pred = lin_model.predict(recent_data.reshape(1, -1))[0]
    lin_future_predictions.append(lin_pred)
    recent_data = np.append(recent_data[1:], lin_pred)
lin_future_predictions = scaler.inverse_transform(np.array(lin_future_predictions).reshape(-1, 1))



In [67]:
# Prediction for next 10 days using Hybrid model
hybrid_future_predictions = (alpha * lstm_future_predictions) + (beta * lin_future_predictions)

In [68]:
#Final dataframe to look at prediction
future_dates = pd.date_range(start=data.index[-1] + pd.Timedelta(days=1), periods=10)
predictions_df = pd.DataFrame({
    'Date': future_dates,
    'LSTM Predictions': lstm_future_predictions.flatten(),
    'Linear Regression Predictions': lin_future_predictions.flatten(),
    'Hybrid Model Predictions': hybrid_future_predictions.flatten()
})
print(predictions_df)

                       Date  LSTM Predictions  Linear Regression Predictions  \
0 2024-11-02 00:00:00+00:00        230.934555                     230.325696   
1 2024-11-03 00:00:00+00:00        230.635498                     226.210016   
2 2024-11-04 00:00:00+00:00        230.319412                     222.403088   
3 2024-11-05 00:00:00+00:00        229.994141                     230.535283   
4 2024-11-06 00:00:00+00:00        229.663406                     226.569972   
5 2024-11-07 00:00:00+00:00        229.329391                     221.853999   
6 2024-11-08 00:00:00+00:00        228.992996                     230.722622   
7 2024-11-09 00:00:00+00:00        228.654785                     226.996460   
8 2024-11-10 00:00:00+00:00        228.315002                     221.262765   
9 2024-11-11 00:00:00+00:00        227.973740                     230.880689   

   Hybrid Model Predictions  
0                230.508360  
1                227.537662  
2                224.777987  