#### LSTM For Returns Forecasting 


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
# Load the dataset
file_path = r""
df = pd.read_csv(file_path, parse_dates=True, index_col='date')
print("Dataset loaded. Shape:", df.shape)


Dataset loaded. Shape: (414, 2)


Feature engineering for the LSTM Model

In [4]:
# extract the returns of the BIG HiBM portfolio and reshape for scaler
big_returns = df['BIG HiBM'].dropna().values.reshape(-1, 1)

In [5]:
# scaling the data
scaler = MinMaxScaler(feature_range=(0, 1))
big_scaled = scaler.fit_transform(big_returns)

In [7]:
# create sequences
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)

seq_length = 12  # Using the previous 12 months to predict the next month
X, y = create_sequences(big_scaled, seq_length)
print("Sequences created:", X.shape, y.shape)


Sequences created: (402, 12, 1) (402, 1)


In [9]:
# Aligningn the sequences with the index
dates = df.index[-len(big_scaled):]
dates_seq = dates[seq_length:]

In [11]:
# 5. Train/Test Split Based on Dates
data_seq = pd.DataFrame(y, index=dates_seq, columns=['target'])
train_df = data_seq.loc[:'2015-12-31']
test_df = data_seq.loc['2016-01-01':]

X_train, y_train = X[train_df.index.get_loc(train_df.index[0]) : train_df.shape[0]+train_df.index.get_loc(train_df.index[0])], train_df['target'].values
X_test, y_test = X[test_df.index.get_loc(test_df.index[0]) : test_df.shape[0]+test_df.index.get_loc(test_df.index[0])], test_df['target'].values

print("Training set:", X_train.shape, y_train.shape)
print("Testing set:", X_test.shape, y_test.shape)


Training set: (294, 12, 1) (294,)
Testing set: (108, 12, 1) (108,)


Building the LSTM Model

In [12]:
model = Sequential([
    LSTM(50, activation='tanh', return_sequences=True, input_shape=(seq_length, 1)),
    Dropout(0.2),
    LSTM(50, activation='tanh'),
    Dropout(0.2),
    Dense(1)
])
model.compile(optimizer='adam', loss='mean_squared_error')
model.summary()

  super().__init__(**kwargs)


In [13]:
# 7. Train the LSTM Model with Early Stopping
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = model.fit(X_train, y_train, epochs=100, batch_size=16, 
                    validation_split=0.1, callbacks=[early_stop], verbose=1)


Epoch 1/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 25ms/step - loss: 0.1659 - val_loss: 0.0073
Epoch 2/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0204 - val_loss: 0.0090
Epoch 3/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0187 - val_loss: 0.0074
Epoch 4/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0181 - val_loss: 0.0076
Epoch 5/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0170 - val_loss: 0.0069
Epoch 6/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0194 - val_loss: 0.0070
Epoch 7/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0193 - val_loss: 0.0069
Epoch 8/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0185 - val_loss: 0.0069
Epoch 9/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━

Forecasting on Test Set

In [14]:
predictions = model.predict(X_test)
predicted_returns = scaler.inverse_transform(predictions)
actual_returns = scaler.inverse_transform(y_test.reshape(-1, 1))

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step


In [15]:
# 9. Evaluate Model Performance
mae_lstm = mean_absolute_error(actual_returns, predicted_returns)
rmse_lstm = np.sqrt(mean_squared_error(actual_returns, predicted_returns))
print(f"LSTM Forecast Performance:\nMAE: {mae_lstm:.4f}\nRMSE: {rmse_lstm:.4f}")

LSTM Forecast Performance:
MAE: 0.0402
RMSE: 0.0523


In [18]:

plt.figure(figsize=(12, 6))
plt.plot(test_df.index, actual_returns, label='Actual Returns', color='blue')
plt.plot(test_df.index, predicted_returns, label='LSTM Predictions', color='red', linestyle='--')
plt.title("LSTM Forecast of BIG HiBM Returns")
plt.xlabel("Date")
plt.ylabel("Return (decimal)")
plt.legend()
plt.savefig('plots/lstm_forecast.png')
plt.close()
