In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
file_path = r"\data\selected_portfolios.csv"
df = pd.read_csv(file_path, parse_dates=True, index_col='date')
print("Dataset loaded. Shape:", df.shape)

# Extract BIG HiBM returns
big_returns = df['BIG HiBM'].dropna().values.reshape(-1, 1)


Dataset loaded. Shape: (414, 2)


In [4]:
big_returns = df['BIG HiBM'].dropna().values.reshape(-1, 1)

# -------------------------------
# 2. Scale the Data
# -------------------------------
scaler = MinMaxScaler(feature_range=(0, 1))
big_scaled = scaler.fit_transform(big_returns)

In [5]:
# 3. Create Sequences Using a Sliding Window
# -------------------------------
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)

seq_length = 12  # e.g., 12 months
X, y = create_sequences(big_scaled, seq_length)
print("Sequences created:", X.shape, y.shape)

Sequences created: (402, 12, 1) (402, 1)


In [6]:
# 4. Align Sequences with Date Index
# -------------------------------
dates = df.index[-len(big_scaled):]
dates_seq = dates[seq_length:]

data_seq = pd.DataFrame(y, index=dates_seq, columns=['target'])

# Split by date: training up to 2015-12-31, testing from 2016-01-01
train_df = data_seq.loc[:'2015-12-31']
test_df = data_seq.loc['2016-01-01':]

X_train = X[train_df.index.get_loc(train_df.index[0]) : train_df.shape[0]+train_df.index.get_loc(train_df.index[0])]
y_train = train_df['target'].values
X_test = X[test_df.index.get_loc(test_df.index[0]) : test_df.shape[0]+test_df.index.get_loc(test_df.index[0])]
y_test = test_df['target'].values

print("Training set:", X_train.shape, y_train.shape)
print("Testing set:", X_test.shape, y_test.shape)

Training set: (294, 12, 1) (294,)
Testing set: (108, 12, 1) (108,)


In [7]:
# 5. Build the GRU Model
# -------------------------------
model = Sequential([
    GRU(50, activation='tanh', return_sequences=True, input_shape=(seq_length, 1)),
    Dropout(0.2),
    GRU(50, activation='tanh'),
    Dropout(0.2),
    Dense(1)
])
model.compile(optimizer='adam', loss='mean_squared_error')
model.summary()


  super().__init__(**kwargs)


In [8]:
# 6. Train the GRU Model with Early Stopping
# -------------------------------
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = model.fit(X_train, y_train,
                    epochs=100,
                    batch_size=16,
                    validation_split=0.1,
                    callbacks=[early_stop],
                    verbose=1)

# -------------------------------

Epoch 1/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 28ms/step - loss: 0.0919 - val_loss: 0.0086
Epoch 2/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0203 - val_loss: 0.0087
Epoch 3/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0185 - val_loss: 0.0075
Epoch 4/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0168 - val_loss: 0.0076
Epoch 5/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0184 - val_loss: 0.0074
Epoch 6/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0152 - val_loss: 0.0085
Epoch 7/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0209 - val_loss: 0.0078
Epoch 8/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0215 - val_loss: 0.0083
Epoch 9/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━

In [9]:
# 7. Forecast on the Test Set
# -------------------------------
predictions = model.predict(X_test)
predicted_returns = scaler.inverse_transform(predictions)
actual_returns = scaler.inverse_transform(y_test.reshape(-1, 1))


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step


In [10]:
# 8. Evaluate Performance
# -------------------------------
mae_gru = mean_absolute_error(actual_returns, predicted_returns)
rmse_gru = np.sqrt(mean_squared_error(actual_returns, predicted_returns))
print(f"GRU Forecast Performance:\nMAE: {mae_gru:.4f}\nRMSE: {rmse_gru:.4f}")

GRU Forecast Performance:
MAE: 0.0401
RMSE: 0.0523


In [13]:
# 9. Plot Forecasts vs. Actual Returns
# -------------------------------
plt.figure(figsize=(12, 6))
plt.plot(test_df.index, actual_returns, label='Actual Returns', color='blue')
plt.plot(test_df.index, predicted_returns, label='GRU Predictions', color='red', linestyle='--')
plt.title("GRU Forecast of BIG HiBM Returns")
plt.xlabel("Date")
plt.ylabel("Return (decimal)")
plt.legend()
plt.savefig('plots/GRU_forecast.png')
plt.close()