# <center>TW50 LSTM Forecasting</center>

> Authors:
> - D11202805 - Ian Joseph Chandra
> - M11002818 - Wilfrid Azariah




## Step 1: Import Libraries and Load Data

In [78]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense
from datetime import timedelta

# Step 1: Load and Prepare the Data
file_path = "../FTSE TWSE Taiwan 50 Index.csv"
df = pd.read_csv(file_path, usecols=['Date', 'Price Index'])  # Load the data

df['Date'] = pd.to_datetime(df['Date'])  # Convert the 'Date' column to datetime format
df.set_index('Date', inplace=True)
df = df.iloc[::-1]  # Reverse the DataFrame to have the oldest data first

scaler = MinMaxScaler()
df['Price Index'] = scaler.fit_transform(df[['Price Index']])

# Remove rows with NaN values
df = df.dropna()

df

Unnamed: 0_level_0,Price Index
Date,Unnamed: 1_level_1
2019-05-10,0.131186
2019-05-13,0.117371
2019-05-14,0.111688
2019-05-15,0.112493
2019-05-16,0.105687
...,...
2024-05-03,0.935136
2024-05-06,0.955959
2024-05-07,0.974304
2024-05-08,0.979518


## Step 2: Split the Data for Training and Testing

In [79]:
train_size = int(len(df) * 0.8)
train_data, test_data = df.iloc[:train_size], df.iloc[train_size:]

train_data

Unnamed: 0_level_0,Price Index
Date,Unnamed: 1_level_1
2019-05-10,0.131186
2019-05-13,0.117371
2019-05-14,0.111688
2019-05-15,0.112493
2019-05-16,0.105687
...,...
2023-05-03,0.503192
2023-05-04,0.507080
2023-05-05,0.510297
2023-05-08,0.517428


In [80]:
test_data

Unnamed: 0_level_0,Price Index
Date,Unnamed: 1_level_1
2023-05-10,0.515448
2023-05-11,0.508232
2023-05-12,0.502318
2023-05-15,0.501634
2023-05-16,0.518501
...,...
2024-05-03,0.935136
2024-05-06,0.955959
2024-05-07,0.974304
2024-05-08,0.979518


## Step 3: Prepare Training Data for LSTM

In [81]:
def prepare_data(data, look_back=1):
    X, y = [], []
    for i in range(len(data) - look_back):
        X.append(data[i:(i + look_back), 0])
        y.append(data[i + look_back, 0])
    return np.array(X), np.array(y)


look_back = 20  # Adjust this based on your data and model performance

train_sequence = train_data['Price Index'].values.reshape(-1, 1)
test_sequence = test_data['Price Index'].values.reshape(-1, 1)

X_train, y_train = prepare_data(train_sequence, look_back)
X_test, y_test = prepare_data(test_sequence, look_back)

X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

X_train.shape

(954, 20, 1)

In [82]:
y_train.shape

(954,)

In [83]:
X_test.shape

(224, 20, 1)

In [84]:
y_test.shape

(224,)

## Step 4: Build and Train the LSTM Model


In [85]:
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)))  # First LSTM layer
model.add(Dropout(0.2))  # Dropout layer to prevent overfitting
model.add(LSTM(units=50, return_sequences=True))  # Second LSTM layer
model.add(Dropout(0.2))  # Dropout layer to prevent overfitting
model.add(LSTM(units=50))  # Third LSTM layer
model.add(Dropout(0.2))  # Dropout layer to prevent overfitting
model.add(Dense(units=1))  # Output layer

model.compile(optimizer='adam', loss='mean_squared_error')
model.summary()


  super().__init__(**kwargs)


In [86]:
model.fit(X_train, y_train, epochs=100, batch_size=32)

Epoch 1/100
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 17ms/step - loss: 0.1057
Epoch 2/100
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 0.0071
Epoch 3/100
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 0.0051
Epoch 4/100
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 0.0047
Epoch 5/100
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - loss: 0.0040
Epoch 6/100
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - loss: 0.0038
Epoch 7/100
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 0.0042
Epoch 8/100
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 0.0036
Epoch 9/100
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 0.0042
Epoch 10/100
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - lo

<keras.src.callbacks.history.History at 0x23b8664d970>

## Step 5: Forecasting the Next 5 Business Days of Price Index

In [87]:
last_date = df.index[-1]
next_dates = []
for i in range(1, 8):  # Predict for the next 7 days to exclude weekends
    next_date = last_date + timedelta(days=i)
    if next_date.weekday() < 5:  # Check if it's a weekday (Monday to Friday)
        next_dates.append(next_date)

# Prepare input data for prediction
last_sequence = df['Price Index'][-look_back:].values.reshape(-1, 1)
input_sequence = np.array([last_sequence])

# Reshape input for LSTM prediction
input_sequence = np.reshape(input_sequence, (input_sequence.shape[0], input_sequence.shape[1], 1))

# Make predictions
predicted_prices = []
for _ in range(len(next_dates)):
    predicted_price = model.predict(input_sequence)
    predicted_prices.append(predicted_price[0, 0])
    # Shift the input sequence by one day for the next prediction
    input_sequence = np.roll(input_sequence, -1, axis=1)
    input_sequence[0, -1, 0] = predicted_price[0, 0]

# Inverse transform the predicted prices
predicted_prices = np.array(predicted_prices).reshape(-1, 1)
predicted_prices = scaler.inverse_transform(predicted_prices)

print("Predicted Price Index for the next 5 business days (excluding weekends):")
for date, price in zip(next_dates[:5], predicted_prices[:5]):
    print(date.strftime('%Y-%m-%d'), ":", price[0])


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 501ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
Predicted Price Index for the next 5 business days (excluding weekends):
2024-05-10 : 15983.237
2024-05-13 : 15967.462
2024-05-14 : 15918.418
2024-05-15 : 15858.854
2024-05-16 : 15800.044
