In [3]:
# importing pandas
import pandas as pd

In [4]:
# importing and viewing the data
df = pd.read_csv('./data/apple_stock_data.csv')
df.head()

Unnamed: 0,Date,Adj Close,Close,High,Low,Open,Volume
0,2023-11-02 00:00:00+00:00,176.665985,177.570007,177.779999,175.460007,175.520004,77334800
1,2023-11-03 00:00:00+00:00,175.750671,176.649994,176.820007,173.350006,174.240005,79763700
2,2023-11-06 00:00:00+00:00,178.31752,179.229996,179.429993,176.210007,176.380005,63841300
3,2023-11-07 00:00:00+00:00,180.894333,181.820007,182.440002,178.970001,179.179993,70530000
4,2023-11-08 00:00:00+00:00,181.958893,182.889999,183.449997,181.589996,182.350006,49340300


In [5]:
# converting the date column to datetime
df['Date'] = pd.to_datetime(df['Date'])

In [6]:
# checking the conversion effectiveness
df['Date'].dtype

datetime64[ns, UTC]

In [7]:
# setting the 'Date' column as the dataframe index
df.set_index('Date', inplace=True)

In [8]:
#checking the dataframe indexes and their type
df.index

DatetimeIndex(['2023-11-02 00:00:00+00:00', '2023-11-03 00:00:00+00:00',
               '2023-11-06 00:00:00+00:00', '2023-11-07 00:00:00+00:00',
               '2023-11-08 00:00:00+00:00', '2023-11-09 00:00:00+00:00',
               '2023-11-10 00:00:00+00:00', '2023-11-13 00:00:00+00:00',
               '2023-11-14 00:00:00+00:00', '2023-11-15 00:00:00+00:00',
               ...
               '2024-10-21 00:00:00+00:00', '2024-10-22 00:00:00+00:00',
               '2024-10-23 00:00:00+00:00', '2024-10-24 00:00:00+00:00',
               '2024-10-25 00:00:00+00:00', '2024-10-28 00:00:00+00:00',
               '2024-10-29 00:00:00+00:00', '2024-10-30 00:00:00+00:00',
               '2024-10-31 00:00:00+00:00', '2024-11-01 00:00:00+00:00'],
              dtype='datetime64[ns, UTC]', name='Date', length=252, freq=None)

In [9]:
# reducing the dataframe to the 'Close' column
df = df[['Close']]

In [10]:
# checking the new form of the dataframe
df.head()

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2023-11-02 00:00:00+00:00,177.570007
2023-11-03 00:00:00+00:00,176.649994
2023-11-06 00:00:00+00:00,179.229996
2023-11-07 00:00:00+00:00,181.820007
2023-11-08 00:00:00+00:00,182.889999


## Choosing the Hybrid Models


    We will be using LSTM (Long Short-Term Memory) and Linear Regression models for this task. I chose LSTM because it effectively captures sequential dependencies and patterns in time-series data, which makes it suitable for modelling stock price movements influenced by historical trends.

    Linear Regression, on the other hand, is a straightforward model that captures simple linear relationships and long-term trends in data. By combining these two models into a hybrid approach, we leverage the LSTM’s ability to model complex time-dependent patterns alongside the Linear Regression’s ability to identify and follow broader trends. This combination aims to create a more balanced and accurate prediction system.

### Data normalization


In [11]:
# importing required library
from sklearn.preprocessing import MinMaxScaler

In [12]:
# declaring and initializing the scaler
scaler = MinMaxScaler(feature_range=(0,1))

In [13]:
# applying the scaler to the data
df['Close'] = scaler.fit_transform(df[['Close']])

In [14]:
# checking the result
df['Close'].head()

Date
2023-11-02 00:00:00+00:00    0.175853
2023-11-03 00:00:00+00:00    0.162983
2023-11-06 00:00:00+00:00    0.199077
2023-11-07 00:00:00+00:00    0.235311
2023-11-08 00:00:00+00:00    0.250280
Name: Close, dtype: float64

### Preparing data for LSTM

#### Creating sequences of a defined length(e.g: 60days)


In [15]:
# importing the required library
import numpy as np

In [16]:
def create_sequences(data, seq_length=60):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    
    return np.array(X), np.array(y)

In [17]:
seq_length = 60
X, y = create_sequences(df['Close'].values, seq_length)

In [19]:
X.shape

(192, 60)

In [21]:
y.shape


(192,)

In [25]:
#### Splitting the sequences into training and test sets(80-20)
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

#### Building a sequential LSTM model with layers to capture the temporal dependencies in the data

In [None]:
# importing required library
import tensorflow as tf
import keras
from keras import layers

In [35]:
lstm_model = keras.Sequential()
lstm_model.add(layers.Input(shape = (X_train.shape[1], 1)))
lstm_model.add(layers.LSTM(units=50, return_sequences=True))
lstm_model.add(layers.LSTM(units=50))
lstm_model.add(layers.Dense(1))

In [None]:
# compiling the model 
lstm_model.compile(optimizer='adam',loss='mean_squared_error')
lstm_model.fit(X_train, y_train, epochs=20, batch_size=32)

In [None]:
# training the second model(Linear Regression)
# generating lagged features for Linear Regression (e.g., using the past 3 days as predictors)
df['Lag_1'] = df['Close'].shift(1)
df['Lag_2'] = df['Close'].shift(2)
df['Lag_3'] = df['Close'].shift(3)
#df = df.dropna()
df  = df.fillna(method='ffill')

In [None]:
# checking the result
df.head()

In [24]:
# splitting the data accordingly for training and testing
X_lin = df[['Lag_1', 'Lag_2', 'Lag_3']]
y_lin = df['Close']
X_train_lin, X_test_lin = X_lin[:train_size], X_lin[train_size:]
y_train_lin, y_test_lin = y_lin[:train_size], y_lin[train_size:]

In [None]:
# training the linear regression model
from sklearn.linear_model import LinearRegression

lin_model = LinearRegression()
lin_model.fit(X_train_lin, y_train_lin)

### Making predictions using LSTM on the test set and inverse transform the scaled predictions

In [None]:
# reshaping the test set
X_test_lstm = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# making predictions
lstm_predictions = lstm_model.predict(X_test_lstm)

In [None]:
# checking the predictions
lstm_predictions

In [28]:

# inverse transforming the scaled predictions
lstm_predictions = scaler.inverse_transform(lstm_predictions)

In [None]:
# checking the predictions after inverse transforming
lstm_predictions

In [30]:
# using the Linear regression model to make predictions
lin_predictions = lin_model.predict(X_test_lin)

In [None]:
# checking the predictions
lin_predictions

In [32]:
# inverse transforming(denormalization) the predictions
lin_predictions = scaler.inverse_transform(lin_predictions.reshape(-1,1))

In [None]:
# checking the predictions
lin_predictions

In [None]:
# making hybrid predictions using the previous predictions
hybrid_predictions = (0.7 * lstm_predictions) + (0.3 * lin_predictions)