In [22]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold

pairs = [('VO','IWB'), ('VO','SCHB'), ('VO','SPY'), ('VO','SCHX'), ('VO','VOO'), ('DVY','IWR'), ('IWR','DIA'), ('DVY','OEF'), ('DVY','VYM')] # closing

# pairs = [('VO', 'VOO'), ('VTI', 'VO'), ('VO','SCHB'), ('VO','SPLG'), ('DVY','IWB'), ('DVY','IWR'), ('DVY','VTI'), ('VO','ITOT'), ('VO','IWB')] # opening

downloadable_tickers = [ticker for pair in pairs for ticker in pair]

training_data = yf.download(downloadable_tickers, start = '2015-01-01', end = '2020-01-01')['Close']
testing_data = yf.download(downloadable_tickers, start = '2020-01-02', end = '2024-12-31')['Close']


  training_data = yf.download(downloadable_tickers, start = '2015-01-01', end = '2020-01-01')['Close']
[*********************100%***********************]  11 of 11 completed
  testing_data = yf.download(downloadable_tickers, start = '2020-01-02', end = '2024-12-31')['Close']
[*********************100%***********************]  11 of 11 completed


In [23]:
def predict_mean_reversion_label(spread, lower, upper):
    mean = spread.mean()
    std = spread.std()
    z_scores = (spread - mean) / std

    labels = ((z_scores <= lower) | (z_scores >= upper)).astype(int)

    return pd.Series(labels, index=spread.index)

In [24]:
def zscore_calc_test(series, mean, std):
    return (series - mean) / std

def zscore_calc_train(series):
    return (series - series.mean()) / series.std(), series.mean(), series.std()

def create_features(spread, window):
    df = pd.DataFrame({'spread': spread})

    df['z-score'] = (df['spread'] - df['spread'].mean()) / df['spread'].std()
    df['z-score_lag1'] = df['z-score'].shift(1)
    df['z-score_lag2'] = df['z-score'].shift(2)
    df['z-score_lag3'] = df['z-score'].shift(3)
    df['z-score_lag4'] = df['z-score'].shift(4)
    df['z-score_lag5'] = df['z-score'].shift(5)
    df['spread_lag1'] = df['spread'].shift(1)
    df['spread_lag2'] = df['spread'].shift(2)
    df['spread_lag3'] = df['spread'].shift(3)
    df['spread_lag4'] = df['spread'].shift(4)
    df['spread_lag5'] = df['spread'].shift(5)
    df['rolling_mean'] = df['spread'].rolling(window).mean()
    df['volatility'] = df['spread'].pct_change().rolling(window).std()
    
    df = df.dropna()
    return df

training_spreads = {}
testing_spreads = {}

X_train_dict = {}
y_train_dict = {}
X_test_dict = {}
y_test_dict = {}

for etf1, etf2 in pairs:

    pair_name = f'{etf1}_{etf2}'

    window = 5

    train_z1, mean_z1, std_z1 = zscore_calc_train(training_data[etf1])
    train_z2, mean_z2, std_z2 = zscore_calc_train(training_data[etf2])

    test_z1 = zscore_calc_test(testing_data[etf1], mean_z1, std_z1)
    test_z2 = zscore_calc_test(testing_data[etf2], mean_z2, std_z2)

    training_spread, testing_spread = train_z1 - train_z2, test_z1 - test_z2
    training_spreads[pair_name], testing_spreads[pair_name] = training_spread, testing_spread

    train_features = create_features(training_spread, window = window)
    test_features = create_features(testing_spread, window = window)

    train_labels = predict_mean_reversion_label(training_spread, lower = -1, upper = 1)

    train_features['label'] = train_labels.loc[train_features.index]
    train_features = train_features.dropna(subset=['label'])

    test_labels = predict_mean_reversion_label(testing_spread, lower = -1, upper = 1)
    test_features['label'] = test_labels.loc[test_features.index]
    test_features = test_features.dropna(subset=['label'])

    X_train_dict[pair_name] = train_features.drop(columns=['label'])
    y_train_dict[pair_name] = train_features['label']
    X_test_dict[pair_name] = test_features.drop(columns=['label'])
    y_test_dict[pair_name] = test_features['label']


training_spreads_df = pd.DataFrame(training_spreads)
testing_spreads_df = pd.DataFrame(testing_spreads)

In [25]:
scalers = {}
knn_models = {}
reports = {}

for pair_name in X_train_dict.keys():
    print(f"Training KNN for {pair_name}")
    
    # Extract train/test
    X_train, y_train = X_train_dict[pair_name], y_train_dict[pair_name]
    X_test, y_test = X_test_dict[pair_name], y_test_dict[pair_name]
    
    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    scalers[pair_name] = scaler
    
    # Train KNN
    knn = KNeighborsClassifier(n_neighbors = 5)  # you can try k=3,5,7
    knn.fit(X_train_scaled, y_train)
    knn_models[pair_name] = knn
    
    # Evaluate
    y_pred = knn.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {acc:.4f}")
    print(classification_report(y_test, y_pred))
    
    # Save full report
    reports[pair_name] = {
        "accuracy": acc,
        "report": classification_report(y_test, y_pred, output_dict=True)
    }

Training KNN for VO_IWB
Accuracy: 0.5296
              precision    recall  f1-score   support

           0       0.81      0.37      0.51       822
           1       0.41      0.84      0.55       430

    accuracy                           0.53      1252
   macro avg       0.61      0.60      0.53      1252
weighted avg       0.68      0.53      0.52      1252

Training KNN for VO_SCHB
Accuracy: 0.6118
              precision    recall  f1-score   support

           0       0.92      0.48      0.63       860
           1       0.44      0.90      0.59       392

    accuracy                           0.61      1252
   macro avg       0.68      0.69      0.61      1252
weighted avg       0.77      0.61      0.62      1252

Training KNN for VO_SPY
Accuracy: 0.5535
              precision    recall  f1-score   support

           0       0.98      0.28      0.43       766
           1       0.46      0.99      0.63       486

    accuracy                           0.55      1252
   m

In [27]:
param_grid = {
    'n_neighbors': [3, 5, 7, 11, 21],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan', 'minkowski']
}

# Storage dictionaries
best_params_dict = {}
best_scores_dict = {}
knn_models = {}
scalers = {}
test_reports = {}

# Fixed random seed for reproducibility
cv = KFold(n_splits=5, shuffle=True, random_state=42)

# Loop through all ETF pairs
for pair_name in X_train_dict.keys():
    print(f"\nGrid search KNN for {pair_name}")
    
    X_train, y_train = X_train_dict[pair_name], y_train_dict[pair_name]
    X_test, y_test = X_test_dict[pair_name], y_test_dict[pair_name]
    
    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    scalers[pair_name] = scaler
    
    # Grid search with 5-fold CV
    knn = KNeighborsClassifier()
    grid_search = GridSearchCV(knn, param_grid, cv=cv, scoring='f1_macro', n_jobs=-1, verbose=1)
    grid_search.fit(X_train_scaled, y_train)
    
    # Store best parameters & CV score
    best_params_dict[pair_name] = grid_search.best_params_
    best_scores_dict[pair_name] = grid_search.best_score_
    
    # Evaluate on test set
    best_knn = grid_search.best_estimator_
    knn_models[pair_name] = best_knn
    y_pred = best_knn.predict(X_test_scaled)
    
    acc = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, output_dict=True)
    
    print(f"Test Accuracy: {acc:.4f}")
    print(classification_report(y_test, y_pred))
    
    test_reports[pair_name] = {
        'accuracy': acc,
        'report': report
    }

print("\nGrid Search Completed for All Pairs")


Grid search KNN for VO_IWB
Fitting 5 folds for each of 30 candidates, totalling 150 fits
Test Accuracy: 0.5280
              precision    recall  f1-score   support

           0       0.81      0.37      0.50       822
           1       0.41      0.84      0.55       430

    accuracy                           0.53      1252
   macro avg       0.61      0.60      0.53      1252
weighted avg       0.67      0.53      0.52      1252


Grid search KNN for VO_SCHB
Fitting 5 folds for each of 30 candidates, totalling 150 fits
Test Accuracy: 0.6118
              precision    recall  f1-score   support

           0       0.92      0.48      0.63       860
           1       0.44      0.90      0.59       392

    accuracy                           0.61      1252
   macro avg       0.68      0.69      0.61      1252
weighted avg       0.77      0.61      0.62      1252


Grid search KNN for VO_SPY
Fitting 5 folds for each of 30 candidates, totalling 150 fits
Test Accuracy: 0.5543
         