In [62]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict
from itertools import product


pairs = [['IEMG', 'EEM'], ['ARKK', 'ARKW'], ['TLT', 'SPTL'], ['SHY', 'VGSH'], ['SOXX', 'ITA']]

downloadable_tickers = [ticker for pair in pairs for ticker in pair]

training_data = yf.download(downloadable_tickers, start = '2015-01-01', end = '2020-01-01')['Close']
testing_data = yf.download(downloadable_tickers, start = '2020-01-02', end = '2024-12-31')['Close']

  training_data = yf.download(downloadable_tickers, start = '2015-01-01', end = '2020-01-01')['Close']
[*********************100%***********************]  10 of 10 completed
  testing_data = yf.download(downloadable_tickers, start = '2020-01-02', end = '2024-12-31')['Close']
[*********************100%***********************]  10 of 10 completed


In [63]:
def predict_mean_reversion_label(spread, window, threshold):
    labels = []
    mean = spread.mean()

    for i in range(len(spread)):
        future_spread = spread[i + 1 : i + 1 + window]
        if len(future_spread) < window:
            labels.append(np.nan)
        elif any(abs(j - mean) < threshold for j in future_spread):
            labels.append(1)
        else:
            labels.append(0)

    return pd.Series(labels, index = spread.index)

In [64]:
def zscore_calc_test(series, mean, std):
    return (series - mean) / std

def zscore_calc_train(series):
    return (series - series.mean()) / series.std(), series.mean(), series.std()

def create_features(spread, pair, window):
    df = pd.DataFrame({'spread': spread})
    df['z-score'] = (df['spread'] - df['spread'].mean()) / df['spread'].std()
    df['z-score_lag1'] = df['z-score'].shift(1)
    df['spread_lag1'] = df['spread'].shift(1)
    df['rolling_mean'] = df['spread'].rolling(window).mean()
    df['rolling_std'] = df['spread'].rolling(window).std()
    df['volatility'] = df['spread'].pct_change().rolling(window).std()
    df = df.dropna()
    return df

training_spreads = {}
testing_spreads = {}

X_train_dict = {}
y_train_dict = {}
X_test_dict = {}
y_test_dict = {}

optimal_window_threshold = {'IEMG_EEM': {'window': 3, 'threshold': 0.01},
                            'ARKK_ARKW': {'window': 7, 'threshold': 0.03},
                            'TLT_SPTL': {'window': 7, 'threshold': 0.01},
                            'SHY_VGSH': {'window': 7, 'threshold': 0.01},
                            'SOXX_ITA': {'window': 20, 'threshold': 0.1}
}

for etf1, etf2 in pairs:

    pair_name = f'{etf1}_{etf2}'

    window = optimal_window_threshold[pair_name]['window']

    threshold = optimal_window_threshold[pair_name]['threshold']

    train_z1, mean_z1, std_z1 = zscore_calc_train(training_data[etf1])
    train_z2, mean_z2, std_z2 = zscore_calc_train(training_data[etf2])

    test_z1 = zscore_calc_test(testing_data[etf1], mean_z1, std_z1)
    test_z2 = zscore_calc_test(testing_data[etf2], mean_z2, std_z2)

    training_spread, testing_spread = train_z1 - train_z2, test_z1 - test_z2
    training_spreads[pair_name], testing_spreads[pair_name] = training_spread, testing_spread

    train_features = create_features(training_spread, pair_name, window = window)
    test_features = create_features(testing_spread, pair_name, window = window)

    train_labels = predict_mean_reversion_label(training_spread, window = window, threshold = threshold)
    train_features['label'] = train_labels.loc[train_features.index]
    train_features = train_features.dropna(subset=['label'])

    test_labels = predict_mean_reversion_label(testing_spread, window = window, threshold = threshold)
    test_features['label'] = test_labels.loc[test_features.index]
    test_features = test_features.dropna(subset=['label'])

    X_train_dict[pair_name] = train_features.drop(columns=['label'])
    y_train_dict[pair_name] = train_features['label']
    X_test_dict[pair_name] = test_features.drop(columns=['label'])
    y_test_dict[pair_name] = test_features['label']


training_spreads_df = pd.DataFrame(training_spreads)
testing_spreads_df = pd.DataFrame(testing_spreads)

In [65]:
def create_sequences(X, y, window_size):
    X_seq, y_seq  = [], []

    for i in range(len(X) - window_size):
        X_seq.append(X.iloc[i : i + window_size].values)
        y_seq.append(y.iloc[i + window_size])

    return np.array(X_seq), np.array(y_seq)

In [66]:
class ClassificationLSTM(nn.Module):
    def __init__(self, input_size = 7, hidden_size = 50, num_layers = 1):
        super(ClassificationLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first = True)
        self.linear = nn.Linear(hidden_size, 1)
        # self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]
        out = self.linear(out)
        # out = self.sigmoid(out)
        return out.squeeze()

In [67]:
def train_model(model, train_loader, criterion, optimizer, epochs = 50, device = 'cpu'):
    model.train()
    
    for epoch in range(epochs):
        total_loss = 0
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            preds = model(xb)
            loss = criterion(preds, yb.view(-1))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs} - Loss: {total_loss / len(train_loader):.4f}")

In [68]:
def evaluate_model(model, pair_name, test_loader, device = 'cpu'):
    model.eval()
    all_preds = []
    all_true = []
    
    optimal_thresholds = {'IEMG_EEM': 0.5,
                          'ARKK_ARKW': 0.45,
                          'TLT_SPTL': 0.5,
                          'SHY_VGSH': 0.45,
                          'SOXX_ITA': 0.105
                          }
    
    threshold = optimal_thresholds[pair_name]

    with torch.no_grad():
        for xb, yb in test_loader:
            xb, yb = xb.to(device), yb.to(device)
            logits = model(xb)
            preds = torch.sigmoid(logits)
            all_preds.append(preds.cpu())
            all_true.append(yb.cpu())
    y_pred = torch.cat(all_preds).numpy()
    y_true = torch.cat(all_true).numpy()
    y_pred_label = (y_pred > threshold).astype(int)
    accuracy = accuracy_score(y_true, y_pred_label)
    print("Test Accuracy:", accuracy)
    return y_true, y_pred, y_pred_label

In [69]:
def plot_labels(X_df, y_pred_label, pair_name):
    spread = X_df['spread']
    
    aligned_spread = spread[-len(y_pred_label):]
    predicted_points = aligned_spread[y_pred_label == 1]

    plt.figure(figsize=(12, 5))
    sns.lineplot(x=aligned_spread.index, y=aligned_spread.values, label='Spread', color='black')
    sns.scatterplot(x=predicted_points.index, y=predicted_points.values, color='green', label='Predicted Reversion', s=100, marker='o')
    plt.title(f"Predicted Mean Reversions of {pair_name}")
    plt.xlabel("Date")
    plt.ylabel("Spread")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

In [70]:
def print_confusion_matrix(y_true, y_pred_label):
    cm = confusion_matrix(y_true, y_pred_label, labels = [0, 1])
    print("Confusion Matrix:")
    print(cm)

In [71]:
def print_f1(y_true, y_pred_label):
    f1 = f1_score(y_true, y_pred_label)
    print(f"F1 Score: {f1:.4f}")

In [72]:
def run_pair(pair_name, X_train_dict, y_train_dict, X_test_dict, y_test_dict, window_size, epochs, device='cpu'):
    
    X_df = X_train_dict[pair_name]
    y_series = y_train_dict[pair_name]

    X_seq, y_seq = create_sequences(X_df, y_series, window_size = window_size)

    X_train = torch.tensor(X_seq, dtype=torch.float32)
    y_train = torch.tensor(y_seq, dtype=torch.float32).unsqueeze(-1)

    pos_weight = torch.tensor([(len(y_train) - y_train.sum()) / y_train.sum()]).to(device)
    criterion = nn.BCEWithLogitsLoss(pos_weight = pos_weight)

    X_test_df = X_test_dict[pair_name]
    y_test_series = y_test_dict[pair_name]

    X_test_seq, y_test_seq = create_sequences(X_test_df, y_test_series, window_size = window_size)
    
    X_test = torch.tensor(X_test_seq, dtype=torch.float32)
    y_test = torch.tensor(y_test_seq, dtype=torch.float32).unsqueeze(-1)
    
    train_dataset = TensorDataset(X_train, y_train)
    test_dataset = TensorDataset(X_test, y_test)
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle = False)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle = False)
    
    model = ClassificationLSTM(input_size = 7).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    
    print(f"\nTraining model for pair: {pair_name}")
    train_model(model, train_loader, criterion, optimizer, epochs=epochs, device=device)
    
    print(f"\nEvaluating model for pair: {pair_name}")
    y_true, y_pred, y_pred_label = evaluate_model(model, pair_name, test_loader, device=device)

    plot_labels(X_test_df, y_pred_label, pair_name)

    print_confusion_matrix(y_true, y_pred_label)

    print_f1(y_true, y_pred_label)
    
    return model, y_true, y_pred

In [73]:
# grid search

def grid_search(pair_name, params, device='cpu'):
    window_size = params['window_size']
    epochs = params['epochs']
    hidden_size = params['hidden_size']
    lr = params['learning_rate']
    num_layers = params['num_layers']

    X_df = X_train_dict[pair_name]
    y_series = y_train_dict[pair_name]

    X_seq, y_seq = create_sequences(X_df, y_series, window_size=window_size)

    X_train = torch.tensor(X_seq, dtype=torch.float32)
    y_train = torch.tensor(y_seq, dtype=torch.float32).unsqueeze(-1)

    pos_weight = torch.tensor([(len(y_train) - y_train.sum()) / y_train.sum()]).to(device)
    criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

    X_test_df = X_test_dict[pair_name]
    y_test_series = y_test_dict[pair_name]

    X_test_seq, y_test_seq = create_sequences(X_test_df, y_test_series, window_size=window_size)

    X_test = torch.tensor(X_test_seq, dtype=torch.float32)
    y_test = torch.tensor(y_test_seq, dtype=torch.float32).unsqueeze(-1)

    train_dataset = TensorDataset(X_train, y_train)
    test_dataset = TensorDataset(X_test, y_test)
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

    model = ClassificationLSTM(input_size=X_train.shape[2], hidden_size=hidden_size, num_layers = num_layers).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    train_model(model, train_loader, criterion, optimizer, epochs=epochs, device=device)

    y_true, y_pred, y_pred_label = evaluate_model(model, pair_name, test_loader, device=device)

    f1 = f1_score(y_true, y_pred_label)

    return f1, model, params

In [74]:
def grid_search_for_pair(pair_name, param_grid, device='cpu'):
    keys, values = zip(*param_grid.items())
    best_f1 = -np.inf
    best_params = None
    best_model = None

    for v in product(*values):
        params = dict(zip(keys, v))
        print(f"Testing params: {params}")
        f1, model, _ = grid_search(pair_name, params, device=device)
        print(f"F1 Score: {f1:.4f}")
        if f1 > best_f1:
            best_f1 = f1
            best_params = params
            best_model = model

    print(f"Best params for {pair_name}: {best_params}")
    print(f"Best F1 score: {best_f1:.4f}")
    return best_model, best_params, best_f1

In [75]:
results = {}

param_grid = {
    'hidden_size': [30, 50, 70],
    'learning_rate': [0.001, 0.0005, 0.0001],
    'window_size': [3, 5, 7],
    'epochs': [50],
    'batch_size': [32, 64, 128],
    'num_layers': [1, 2]
}

best_params_all = {}
best_f1_all = {}

for pair_name in training_spreads_df.columns:
    print(f"\nStarting grid search for {pair_name}")
    model, params, f1 = grid_search_for_pair(pair_name, param_grid, device='cpu')
    results[pair_name] = {'model': model}
    best_params_all[pair_name] = params
    best_f1_all[pair_name] = f1

# for pair_name in training_spreads_df.columns:
#     model, y_true, y_pred = run_pair(
#         pair_name,
#         X_train_dict,
#         y_train_dict,
#         X_test_dict,
#         y_test_dict,
#         window_size = 3,
#         epochs = 50,
#         device='cpu'
#     )
#     results[pair_name] = {'model': model, 'y_true': y_true, 'y_pred': y_pred}

# print(results)


Starting grid search for IEMG_EEM
Testing params: {'hidden_size': 30, 'learning_rate': 0.001, 'window_size': 3, 'epochs': 50, 'batch_size': 32, 'num_layers': 1}
Epoch 1/50 - Loss: 0.7742
Epoch 2/50 - Loss: 0.7633
Epoch 3/50 - Loss: 0.7541
Epoch 4/50 - Loss: 0.7439
Epoch 5/50 - Loss: 0.7321
Epoch 6/50 - Loss: 0.7185
Epoch 7/50 - Loss: 0.7031
Epoch 8/50 - Loss: 0.6859
Epoch 9/50 - Loss: 0.6677
Epoch 10/50 - Loss: 0.6499
Epoch 11/50 - Loss: 0.6341
Epoch 12/50 - Loss: 0.6209
Epoch 13/50 - Loss: 0.6099
Epoch 14/50 - Loss: 0.6007
Epoch 15/50 - Loss: 0.5929
Epoch 16/50 - Loss: 0.5862
Epoch 17/50 - Loss: 0.5803
Epoch 18/50 - Loss: 0.5752
Epoch 19/50 - Loss: 0.5707
Epoch 20/50 - Loss: 0.5666
Epoch 21/50 - Loss: 0.5630
Epoch 22/50 - Loss: 0.5597
Epoch 23/50 - Loss: 0.5567
Epoch 24/50 - Loss: 0.5539
Epoch 25/50 - Loss: 0.5514
Epoch 26/50 - Loss: 0.5490
Epoch 27/50 - Loss: 0.5468
Epoch 28/50 - Loss: 0.5447
Epoch 29/50 - Loss: 0.5428
Epoch 30/50 - Loss: 0.5411
Epoch 31/50 - Loss: 0.5395
Epoch 32/5

In [76]:
def simulate_lstm_portfolio(X_df, probs, y_pred_label, initial_cash = 100000, holding_period = 5, entry_threshold = 1, prob_threshold = 0.5):
    df = X_df.copy()
    df = df.iloc[-len(y_pred_label):].copy()

    # df['prediction'] = y_pred_label

    df['probability'] = probs
    df['position'] = 0
    
    cash = initial_cash
    equity_curve = []
    equity_dates = []

    trade_count = 0
    trade_pnls = []
    trade_dates = []
    notional = 10000

    for i in range(len(df) - holding_period):
        current_row = df.iloc[i]
        date = df.index[i]

        prob = current_row['probability']

        # current_row['prediction'] == 1

        if prob > prob_threshold and abs(current_row['z-score']) > entry_threshold:
            entry_spread = current_row['spread']
            exit_spread = df.iloc[i + holding_period]['spread']
            
            if current_row['z-score'] > 0:
                pnl = (entry_spread - exit_spread) * notional * prob
            else:
                pnl = (exit_spread - entry_spread) * notional * prob

            cash += pnl
            trade_count += 1
            trade_pnls.append(pnl)
            trade_dates.append(date)

        equity_curve.append(cash)
        equity_dates.append(date)

    equity_series = pd.Series(equity_curve, index = equity_dates)
    yearly_values = equity_series.resample('YE').last()

    stats = {
        'final_cash': cash,
        'total_trades': trade_count,
        'trade_pnls': trade_pnls,
        'trade_dates': trade_dates,
        'equity_series': equity_series,
        'yearly_values': yearly_values
    }

    return stats

In [77]:
total_cash = 0
pair_trade_counts = defaultdict(int)
pair_pnls = defaultdict(list)
combined_equity = pd.Series(dtype = float)

plt.figure(figsize = (14, 6))

optimal_thresholds = {'IEMG_EEM': 0.5,
                      'ARKK_ARKW': 0.45,
                      'TLT_SPTL': 0.5,
                      'SHY_VGSH': 0.45,
                      'SOXX_ITA': 0.105
                      }
    
for pair_name in training_spreads_df.columns:
    model_info = results[pair_name]
    model = model_info['model']
    y_true = model_info['y_true']
    y_pred = model_info['y_pred']

    threshold = optimal_thresholds[pair_name]

    y_pred_label = (torch.sigmoid(torch.tensor(y_pred)) > threshold).int().numpy()

    probs = torch.sigmoid(torch.tensor(y_pred)).squeeze().cpu().numpy()

    X_test_df = X_test_dict[pair_name].copy()
    stats = simulate_lstm_portfolio(X_test_df, probs, y_pred_label)

    total_cash += stats['final_cash']
    pair_trade_counts[pair_name] = stats['total_trades']
    pair_pnls[pair_name] = stats['trade_pnls']

    combined_equity = combined_equity.add(stats['equity_series'], fill_value = 0)

    plt.plot(stats['equity_series'], label = pair_name)

plt.title("Equity Curve by Pair")
plt.xlabel("Date")
plt.ylabel("Value")
plt.legend()
plt.tight_layout()
plt.show()

print(f"\nFinal Portfolio Value: ${total_cash:,.2f}")
print(f"\nTrade Summary: ")
for pair, count in pair_trade_counts.items():
    total_pnl = sum(pair_pnls[pair])
    avg_pnl = np.mean(pair_pnls[pair]) if pair_pnls[pair] else 0
    print(f"  {pair}: {count} trades | Total PnL: ${total_pnl:.2f} | Avg PnL: ${avg_pnl:.2f}")

yearly = combined_equity.resample('YE').last()
print("\nYear-End Portfolio Values:")
print(yearly)

plt.figure(figsize=(10, 5))
yearly.plot(marker='o')
plt.title("Year-End Portfolio Value (LSTM-based)")
plt.ylabel("Value")
plt.grid(True)
plt.tight_layout()
plt.show()

KeyError: 'y_true'

<Figure size 1400x600 with 0 Axes>