<a href="https://colab.research.google.com/github/alisdghnia/rnn-lstm-interest-rate-prediction/blob/main/RNN_LSTM_InterestRate_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import io
from google.colab import files
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import SimpleRNN
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

In [2]:
uploaded = files.upload()

In [39]:
ir = pd.read_excel('IR-data.xlsx')
ir = ir.rename(columns={'Interest rate': 'interest_rate'})
ir = ir.set_index('Date')
ir

Unnamed: 0_level_0,interest_rate
Date,Unnamed: 1_level_1
2010-12-30,4.20
2011-01-06,4.13
2011-01-13,4.08
2011-01-20,4.05
2011-01-27,4.09
...,...
2022-02-24,3.14
2022-03-03,3.01
2022-03-10,3.09
2022-03-17,3.39


In [48]:
def create_sequences(data, window_size):
    train_window = []
    target_window = []
    for i in range(len(data) - window_size):
        train_window.append(data.iloc[i:i+window_size-1, :])
        target_window.append(data.iloc[i+window_size, :])

    return train_window, target_window

In [82]:
train = ir[ir.index < '2021-01-01']
# create a test data with 50 more data points for predictions
# might want to think about just leaving out 50 data points between train and test
# this will prevent the possible data leakage that is happening here
# otherwise only 14 rows of data will be predicted with 50 window size
start_date = pd.to_datetime('2021-01-01') - pd.DateOffset(days=350)
test = ir[ir.index >= start_date]
# test = ir[ir.index >= '2021-01-01']

x_train, y_train = create_sequences(train, window_size= 50)
x_test, y_test = create_sequences(test, window_size= 50)

x_train = np.array(x_train)
y_train = np.array(y_train)
x_test = np.array(x_test)
y_test = np.array(y_test)
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((473, 49, 1), (473, 1), (64, 49, 1), (64, 1))

# RNN Application

In [83]:
window_size = 50
model = Sequential()
model.add(SimpleRNN(50, activation='relu', input_shape=(window_size, 1)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')
history = model.fit(x_train, y_train, epochs=20)

Epoch 1/20


  super().__init__(**kwargs)


[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - loss: 6.8718
Epoch 2/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.4244
Epoch 3/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0722
Epoch 4/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0310
Epoch 5/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0163
Epoch 6/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 0.0130
Epoch 7/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 0.0150
Epoch 8/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0128
Epoch 9/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0174
Epoch 10/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0129
Epoch 11/2

In [86]:
# start with the first sequence from the test data
last_sequence = np.array(test['interest_rate'])[:window_size].reshape(-1, 1)  # reshape to make sure it's 2D

predictions = []

# use the 50 + len(ir[ir.index >= '2021-01-01']) data that we assigned earlier
# to predict for the entire length of the ir[ir.index >= '2021-01-01'] data
for i in range(len(test) - window_size):
    prediction = model.predict(last_sequence.reshape(1, window_size, 1))
    predictions.append(prediction[0, 0])

    # reshape the next value to be appended
    next_value = np.array(test['interest_rate'])[i + window_size].reshape(-1, 1)

    last_sequence = np.append(last_sequence[1:], next_value, axis=0)

print(predictions)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 130ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 184ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 197ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0

In [91]:
final_df = ir[ir.index >= '2021-01-01']
final_df.loc[:, 'interest_rate_rnn'] = predictions
final_df = final_df.rename(columns={'interest_rate': 'interest_rate_original'})
final_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_df.loc[:, 'interest_rate_rnn'] = predictions


Unnamed: 0_level_0,interest_rate_original,interest_rate_rnn
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-01-07,2.16,2.257776
2021-01-14,2.23,2.236353
2021-01-21,2.21,2.232058
2021-01-28,2.20,2.244044
2021-02-04,2.21,2.243789
...,...,...
2022-02-24,3.14,2.921634
2022-03-03,3.01,3.047959
2022-03-10,3.09,3.098802
2022-03-17,3.39,3.105922


In [92]:
rnn_mse = np.mean((final_df['interest_rate_rnn'] - final_df['interest_rate_original'])**2)
rnn_mse

0.01637066414801336

# LSTM Application

In [None]:
window_size = 50
model = Sequential()
model.add(SimpleRNN(50, activation='relu', input_shape=(window_size, 1)))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mean_squared_error')

history = model.fit(x_train, y_train, epochs=20)

In [93]:
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(window_size, 1)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train, y_train, epochs=20)

Epoch 1/20


  super().__init__(**kwargs)


[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 36ms/step - loss: 9.5750
Epoch 2/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 0.5514
Epoch 3/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - loss: 0.0628
Epoch 4/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 0.0376
Epoch 5/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0315
Epoch 6/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - loss: 0.0295
Epoch 7/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0257
Epoch 8/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - loss: 0.0286
Epoch 9/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - loss: 0.0266
Epoch 10/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0250
Epoch 11/2

<keras.src.callbacks.history.History at 0x7d663f0eff70>

In [94]:
# start with the first sequence from the test data
last_sequence = np.array(test['interest_rate'])[:window_size].reshape(-1, 1)  # reshape to make sure it's 2D

lstm_predictions = []

# use the 50 + len(ir[ir.index >= '2021-01-01']) data that we assigned earlier
# to predict for the entire length of the ir[ir.index >= '2021-01-01'] data
for i in range(len(test) - window_size):
    prediction = model.predict(last_sequence.reshape(1, window_size, 1))
    lstm_predictions.append(prediction[0, 0])

    # reshape the next value to be appended
    next_value = np.array(test['interest_rate'])[i + window_size].reshape(-1, 1)

    last_sequence = np.append(last_sequence[1:], next_value, axis=0)

print(lstm_predictions)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 584ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2

In [95]:
final_df.loc[:, 'interest_rate_lstm'] = lstm_predictions
final_df

Unnamed: 0_level_0,interest_rate_original,interest_rate_rnn,interest_rate_lstm
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-01-07,2.16,2.257776,2.330104
2021-01-14,2.23,2.236353,2.310937
2021-01-21,2.21,2.232058,2.302372
2021-01-28,2.20,2.244044,2.296935
2021-02-04,2.21,2.243789,2.290770
...,...,...,...
2022-02-24,3.14,2.921634,2.630768
2022-03-03,3.01,3.047959,2.712975
2022-03-10,3.09,3.098802,2.782902
2022-03-17,3.39,3.105922,2.843653


In [96]:
lstm_mse = np.mean((final_df['interest_rate_lstm'] - final_df['interest_rate_original'])**2)
rnn_mse = np.mean((final_df['interest_rate_rnn'] - final_df['interest_rate_original'])**2)
lstm_rnn_mse = np.mean((final_df['interest_rate_lstm'] - final_df['interest_rate_rnn'])**2)
print(f'lstm_mse: {lstm_mse}')
print(f'rnn_mse: {rnn_mse}')
print(f'lstm_rnn_mse: {lstm_rnn_mse}')

lstm_mse: 0.04351921649338991
rnn_mse: 0.01637066414801336
lstm_rnn_mse: 0.01449275016784668
