In [34]:
import numpy as np
from sklearn.kernel_ridge import KernelRidge
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from itertools import product
import matplotlib.pyplot as plt
import seaborn as sns

# Load data
X_train_input = np.load("Data/X_train.npy")  # shape (700, 6)
Y_train_input = np.load("Data/Y_train.npy")  # shape (700,)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(
    X_train_input, Y_train_input, test_size=0.2, random_state=19, shuffle=True
)

In [36]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [37]:
from sklearn.decomposition import PCA
pca = PCA(n_components=0.99)  # keep 95% variance
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)


In [38]:
# Hyperparameter grids - Specialized
gamma_list =  [0.1, 0.11, 0.12, 0.13, 0.14, 0.15]
alpha_list = [0.0016, 0.0017, 0.0018, 0.0019, 0.002, 0.0021, 0.0022]

# K-Fold CV
kf = KFold(n_splits=5, shuffle=True, random_state=100)

best_r2 = -np.inf
best_params = None
results = []

In [39]:
for gamma, alpha in product(gamma_list, alpha_list):
    
    r2_scores = []
    
    for train_idx, val_idx in kf.split(X_train_pca):
        X_tr, X_val = X_train_pca[train_idx], X_train_pca[val_idx]
        Y_tr, Y_val = Y_train[train_idx], Y_train[val_idx]
    
        # Train Ridge regression
        model = KernelRidge(kernel='rbf', gamma=gamma, alpha=alpha)
        model.fit(X_tr, Y_tr)
        
        # Predict on validation fold
        Y_val_pred = model.predict(X_val)
        r2_scores.append(r2_score(Y_val, Y_val_pred))
    
    mean_r2 = np.mean(r2_scores)
    results.append({'gamma': gamma, 'alpha': alpha, 'mean_r2': mean_r2})
    
    if mean_r2 > best_r2:
        best_r2 = mean_r2
        best_params = {'gamma': gamma, 'alpha': alpha}

print("Best Cross Validation R2:", best_r2)
print("Best Hyperparameters:", best_params)

Best Cross Validation R2: 0.9845727849694008
Best Hyperparameters: {'gamma': 0.12, 'alpha': 0.0019}


In [40]:
# Refit model on all 500 training samples using best hyperparameters
gamma_best = best_params['gamma']
alpha_best = best_params['alpha']

# Train final model
final_model = KernelRidge(kernel='rbf', gamma=gamma_best, alpha=alpha_best)
final_model.fit(X_train_pca, Y_train)

# Predict on held-out 200 samples
Y_test_pred = final_model.predict(X_test_pca)
test_r2 = r2_score(Y_test, Y_test_pred)
print("R2 on held-out 200 test samples:", test_r2)

R2 on held-out 200 test samples: 0.9899132710215086
