In [11]:
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt
import keras_tuner as kt
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import LayerNormalization, Dense, Dropout, MultiHeadAttention, Layer,GlobalAveragePooling1D
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler

In [12]:
final_df = pd.read_csv("../data/final_features_with_targets.csv", parse_dates=['Date'], index_col='Date')

final_df = final_df.dropna().copy()

assets = ['EEM', 'Gold', 'FTSE100', 'S&P500', 'Nikkei225', 'UST10Y']

#For each asset to pick pick all columns
feature_suffixes = ['log_ret', 'vol_30d', 'zscore', 'sma5', 'macd', 'vol_spike']
global_features = ['covid_flag']
lookback = 20

In [13]:


lookback = 20
X_seq_dict = {}
y_seq_dict = {}

for asset in assets:
    print(f"Processing {asset}...")
    
    # Define feature and target columns
    feature_cols = [f"{asset}_{f}" for f in feature_suffixes] + global_features
    target_col = f"{asset}_target_5d"

    # Features and target extraction 
    data = final_df[feature_cols + [target_col]].copy()

    # Chronological split
    split_idx = int(0.8 * len(data))
    train_data = data.iloc[:split_idx]
    test_data = data.iloc[split_idx:]

  
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(train_data[feature_cols].values)
    X_test_scaled = scaler.transform(test_data[feature_cols].values)

    
    y_train = train_data[target_col].values
    y_test = test_data[target_col].values

    # Function to create sequences 
    def create_sequences(X, y, lookback):
        Xs, ys = [], []
        for i in range(lookback, len(X)):
            Xs.append(X[i - lookback:i])
            ys.append(y[i])
        return np.array(Xs), np.array(ys)

    # Sequences for both train and test
    X_train_seq, y_train_seq = create_sequences(X_train_scaled, y_train, lookback)
    X_test_seq, y_test_seq = create_sequences(X_test_scaled, y_test, lookback)

    # Save to dictionary
    X_seq_dict[asset] = (X_train_seq, X_test_seq)
    y_seq_dict[asset] = (y_train_seq, y_test_seq)



Processing EEM...
Processing Gold...
Processing FTSE100...
Processing S&P500...
Processing Nikkei225...
Processing UST10Y...


In [14]:
#Testing the EEM data as a sample
asset_to_check = 'Gold'

X_train_seq, X_test_seq = X_seq_dict[asset_to_check]
y_train_seq, y_test_seq = y_seq_dict[asset_to_check]

print(f"{asset_to_check} →")
print(f"X_train_seq shape: {X_train_seq.shape}")
print(f"y_train_seq shape: {y_train_seq.shape}")
print(f"X_test_seq shape:  {X_test_seq.shape}")
print(f"y_test_seq shape:  {y_test_seq.shape}")

Gold →
X_train_seq shape: (2739, 20, 7)
y_train_seq shape: (2739,)
X_test_seq shape:  (670, 20, 7)
y_test_seq shape:  (670,)


In [8]:
seq_len = 20           # lookback already defined but name change for easier understanding
n_features = 7         # All the features we have creared initially 
embed_dim = 128          # Dimention embedding for each step 
num_heads = 8          # Multi head attention, can change this based on results 
ff_dim = 256           # Feedforward layer size inside Transformer block
dropout_rate = 0.05     # Regularization

In [15]:
input_layer = Input(shape=(seq_len, n_features))

In [16]:
class TransformerEncoderBlock(Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, dropout_rate=0.05):
        super(TransformerEncoderBlock, self).__init__()
        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential([
            Dense(ff_dim, activation="relu"),
            Dense(embed_dim),
        ])
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(dropout_rate)
        self.dropout2 = Dropout(dropout_rate)

    def call(self, inputs, training=None):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)

        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

In [36]:
from tensorflow.keras.models import Model

def transformer_regressor(seq_len, n_features, embed_dim=128, ff_dim=256, num_heads=8):
    inputs = Input(shape=(seq_len, n_features))
    x = Dense(embed_dim)(inputs)

    # --- Use your best architecture (e.g., stacked blocks) ---
    num_encoder_blocks = 6
    for _ in range(num_encoder_blocks):
        encoder_block = TransformerEncoderBlock(embed_dim=embed_dim, ff_dim=ff_dim, num_heads=num_heads)
        x = encoder_block(x)
    # ---

    x = GlobalAveragePooling1D()(x)
    x = Dropout(0.1)(x)
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.05)(x)
    
    outputs = Dense(1, activation='linear')(x) 

    model = Model(inputs=inputs, outputs=outputs)
    return model
    
model = transformer_regressor(
    seq_len=seq_len, 
    n_features=n_features,
    embed_dim=128,       # Or use your defined variables
    ff_dim=256,
    num_heads=8
)

model.compile(
    loss='mean_squared_error',
    optimizer='adam'           
)
model.summary()

In [37]:

import os

models_dir = "regression_models"
results_dir = "regression_results"
os.makedirs(models_dir, exist_ok=True)
os.makedirs(results_dir, exist_ok=True)

# Initiating dicts to store models and history 
trained_models = {}
history_dict = {}

for asset in assets:
    print(f"--- Training regression model for {asset} ---")

    X_train_seq, X_test_seq = X_seq_dict[asset]
    y_train_seq, y_test_seq = y_seq_dict[asset]

    tf.keras.backend.clear_session()
    
    model = transformer_regressor(
        seq_len=seq_len,
        n_features=n_features,
        embed_dim=embed_dim,
        ff_dim=ff_dim,
        num_heads=num_heads
    )
    
    model.compile(
        optimizer='adam',
        loss='mean_squared_error',
        metrics=['mae']
    )

    early_stopping = EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True
    )

    history = model.fit(
        X_train_seq,
        y_train_seq,
        epochs=100,
        batch_size=32,
        validation_split=0.2, 
        callbacks=[early_stopping],
        verbose=1
    )

    trained_models[asset] = model
    history_dict[asset] = history
    
    
    print(f"\n Saving results for {asset} ")

  
    model_path = os.path.join(models_dir, f"{asset}_regressor_model.keras")
    model.save(model_path)
    print(f"Model saved to: {model_path}")

    
    history_df = pd.DataFrame(history.history)
    history_path = os.path.join(results_dir, f"{asset}_regressor_history.csv")
    history_df.to_csv(history_path)
    print(f"History saved to: {history_path}")

    y_pred = model.predict(X_test_seq)
    predictions_df = pd.DataFrame({
        'actual_return': y_test_seq.flatten(),
        'predicted_return': y_pred.flatten()
    })
    predictions_path = os.path.join(results_dir, f"{asset}_regressor_predictions.csv")
    predictions_df.to_csv(predictions_path, index=False)
    print(f"Predictions saved to: {predictions_path}")

    print(f"\n--- Evaluating {asset} on Test Data ---")
    test_loss, test_mae = model.evaluate(X_test_seq, y_test_seq, verbose=0)
    print(f"Test Loss (MSE) for {asset}: {test_loss:.6f}")
    print(f"Test Mean Absolute Error (MAE) for {asset}: {test_mae:.6f}")
    print("-" * 50)


--- Training regression model for EEM ---
Epoch 1/100
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 350ms/step - loss: 0.8682 - mae: 0.6176 - val_loss: 0.0051 - val_mae: 0.0547
Epoch 2/100
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 272ms/step - loss: 0.1316 - mae: 0.2888 - val_loss: 0.0040 - val_mae: 0.0460
Epoch 3/100
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 216ms/step - loss: 0.0902 - mae: 0.2267 - val_loss: 0.0035 - val_mae: 0.0402
Epoch 4/100
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 219ms/step - loss: 0.0959 - mae: 0.2436 - val_loss: 0.0096 - val_mae: 0.0920
Epoch 5/100
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 217ms/step - loss: 0.1135 - mae: 0.2562 - val_loss: 7.7715e-04 - val_mae: 0.0200
Epoch 6/100
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 222ms/step - loss: 0.0927 - mae: 0.2227 - val_loss: 8.6968e-04 - val_mae: 0.0245
Epoch 7/100
[1m69/69[0m 

In [38]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error
import os

# Define the folder where your results are stored
results_dir = "regression_results"
print("--- Calculating Final Performance Metrics for All Assets ---")

for asset in assets:
    try:
        # Construct the path to the prediction file
        predictions_path = os.path.join(results_dir, f"{asset}_regressor_predictions.csv")
        
        # Check if the prediction file exists
        if not os.path.exists(predictions_path):
            print(f"\n--- SKIPPING: Prediction file for {asset} not found. ---")
            continue

        print(f"\n--- Performance Metrics for: {asset} ---")
        df = pd.read_csv(predictions_path)

        # Robustly clean the data to ensure it's numeric
        df['actual_return'] = pd.to_numeric(df['actual_return'], errors='coerce')
        df['predicted_return'] = pd.to_numeric(df['predicted_return'], errors='coerce')
        df.dropna(inplace=True)

        # Check if the DataFrame is empty after cleaning
        if df.empty:
            print("WARNING: No valid data after cleaning. Cannot calculate metrics.")
            continue

        # Extract the true and predicted values
        y_true = df['actual_return']
        y_pred = df['predicted_return']
        
        # Calculate and print performance metrics
        mse = mean_squared_error(y_true, y_pred)
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(y_true, y_pred)
        
        print(f"Mean Squared Error (MSE):      {mse:.6f}")
        print(f"Root Mean Squared Error (RMSE):  {rmse:.6f}")
        print(f"Mean Absolute Error (MAE):     {mae:.6f}")

    except Exception as e:
        # If any other error occurs, print it and move to the next asset
        print(f"--- FAILED TO PROCESS {asset}: An error occurred ---")
        print(f"Error message: {e}")
        continue


--- Calculating Final Performance Metrics for All Assets ---

--- Performance Metrics for: EEM ---
Mean Squared Error (MSE):      0.000151
Root Mean Squared Error (RMSE):  0.012292
Mean Absolute Error (MAE):     0.009209

--- Performance Metrics for: Gold ---
Mean Squared Error (MSE):      0.000121
Root Mean Squared Error (RMSE):  0.010981
Mean Absolute Error (MAE):     0.008661

--- Performance Metrics for: FTSE100 ---
Mean Squared Error (MSE):      0.000100
Root Mean Squared Error (RMSE):  0.010013
Mean Absolute Error (MAE):     0.007939

--- Performance Metrics for: S&P500 ---
Mean Squared Error (MSE):      0.000176
Root Mean Squared Error (RMSE):  0.013277
Mean Absolute Error (MAE):     0.010233

--- Performance Metrics for: Nikkei225 ---
Mean Squared Error (MSE):      0.000195
Root Mean Squared Error (RMSE):  0.013973
Mean Absolute Error (MAE):     0.009804

--- Performance Metrics for: UST10Y ---
Mean Squared Error (MSE):      0.000613
Root Mean Squared Error (RMSE):  0.024754
Me

### Insights
best Performing Models: The models for FTSE100 and Gold exhibit the lowest prediction error, with MAEs of 0.69% and 0.76%, respectively. These models provide the most reliable forecasts.
Highest Error Model: The S&P500 model has the highest MAE at 2.46%, indicating its predictions are currently the least reliable.
Asset Class Variation: There is significant performance variation across assets. The models appear to be more effective for assets that historically have different volatility profiles compared to major US equities.
Error Magnitude: The Root Mean Squared Error (RMSE) is consistently higher than the MAE for all models. This is expected and indicates the presence of some larger prediction errors, which are penalized more heavily by the RMSE calculation.

### Transformer Regression Model: Improved Performance Metrics (Post-Tuning)

The previous insights were with minimal tuning and after retuning hyperparameters and retraining, the regression models now provide better out-of-sample predictive performance, as measured by Mean Absolute Error (MAE) and Root Mean Squared Error (RMSE):

| Asset      | MSE      | RMSE    | MAE    |
| :----------|--------: |-------: |------: |
| **EEM**    | 0.000151 | 0.0123  | 0.0092 |
| **Gold**   | 0.000121 | 0.0110  | 0.0087 |
| **FTSE100**| 0.000100 | 0.0100  | 0.0079 |
| **S&P500** | 0.000176 | 0.0133  | 0.0102 |
| **Nikkei225** | 0.000195 | 0.0140 | 0.0098 |
| **UST10Y** | 0.000613 | 0.0248  | 0.0192 |

**Key observations:**
- **FTSE100, Gold, and EEM models now achieve sub-1% average prediction error (MAE),** with FTSE100 being the most accurate.
- **S&P500 and Nikkei225 show improvement with MAEs around 1%,** validating impact of hyperparameter tuning.
- **UST10Y remains the hardest to predict (MAE: 1.92%),** possibly reflecting the higher unpredictability in bond yields.

**Interpretation and Next Steps:**
- The tighter range and improved accuracy should enable more meaningful, differentiated portfolio allocations—minimizing fallback to equal weighting.
- Proceed to the portfolio management backtest and verify that allocations truly vary over time in response to model signals. Compare the Sharpe ratio and drawdown to earlier baseline runs.
