# Remove 1 Sensor from Data

In [22]:
import numpy as np
from sklearn.kernel_ridge import KernelRidge
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from itertools import product, combinations

In [23]:
# Load data
X_train_input = np.load("Data/X_train.npy")  # shape (700, 6)
Y_train_input = np.load("Data/Y_train.npy")  # shape (700,)

# Hyperparameter grids
gamma_list = [0.01, 0.05, 0.10, 0.20, 0.30, 0.40, 0.50]
alpha_list = [0.0001, 0.0005, 0.001, 0.002, 0.003, 0.004, 0.005]

# Loop through sensors
n_sensors = X_train_input.shape[1]

In [24]:
for sensor_idx in range(n_sensors):
    print(f"\n--- Removing sensor at index {sensor_idx} ---")
    
    # Remove one sensor
    X_mod = np.delete(X_train_input, sensor_idx, axis=1)
    print("New shapes:", X_mod.shape, Y_train_input.shape)

    # Train/test split
    X_train, X_test, Y_train, Y_test = train_test_split(
        X_mod, Y_train_input, test_size=200, random_state=42, shuffle=True
    )

    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # K-Fold CV
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    best_r2 = -np.inf
    best_params = None
    results = []

    for gamma, alpha in product(gamma_list, alpha_list):
        r2_scores = []
        
        for train_idx, val_idx in kf.split(X_train_scaled):
            X_tr, X_val = X_train_scaled[train_idx], X_train_scaled[val_idx]
            Y_tr, Y_val = Y_train[train_idx], Y_train[val_idx]
            
            # Train Kernel Ridge
            model = KernelRidge(kernel='rbf', gamma=gamma, alpha=alpha)
            model.fit(X_tr, Y_tr)
            
            # Predict on validation
            Y_val_pred = model.predict(X_val)
            r2_scores.append(r2_score(Y_val, Y_val_pred))
        
        mean_r2 = np.mean(r2_scores)
        results.append({'gamma': gamma, 'alpha': alpha, 'mean_r2': mean_r2})
        
        if mean_r2 > best_r2:
            best_r2 = mean_r2
            best_params = {'gamma': gamma, 'alpha': alpha}

    print("Best CV R2:", best_r2)
    print("Best Hyperparameters:", best_params)

    # Refit on all training data
    final_model = KernelRidge(kernel='rbf', **best_params)
    final_model.fit(X_train_scaled, Y_train)

    # Test evaluation
    Y_test_pred = final_model.predict(X_test_scaled)
    test_r2 = r2_score(Y_test, Y_test_pred)
    print("R2 on held-out test samples:", test_r2)


--- Removing sensor at index 0 ---
New shapes: (700, 5) (700,)
Best CV R2: 0.9859747485354768
Best Hyperparameters: {'gamma': 0.1, 'alpha': 0.0005}
R2 on held-out test samples: 0.9888728787874946

--- Removing sensor at index 1 ---
New shapes: (700, 5) (700,)
Best CV R2: 0.992780521411049
Best Hyperparameters: {'gamma': 0.1, 'alpha': 0.0005}
R2 on held-out test samples: 0.9899839168402337

--- Removing sensor at index 2 ---
New shapes: (700, 5) (700,)
Best CV R2: 0.9822508516785178
Best Hyperparameters: {'gamma': 0.1, 'alpha': 0.0005}
R2 on held-out test samples: 0.9791130574166333

--- Removing sensor at index 3 ---
New shapes: (700, 5) (700,)
Best CV R2: 0.9855875893790627
Best Hyperparameters: {'gamma': 0.1, 'alpha': 0.0005}
R2 on held-out test samples: 0.983089458663895

--- Removing sensor at index 4 ---
New shapes: (700, 5) (700,)
Best CV R2: 0.9686763846266562
Best Hyperparameters: {'gamma': 0.3, 'alpha': 0.002}
R2 on held-out test samples: 0.9719445691158765

--- Removing sens

# Remove 2 Sensors from Data

In [25]:
# Loop through all pairs of sensors
for sensor_pair in combinations(range(n_sensors), 2):
    print(f"\n--- Removing sensors at indices {sensor_pair} ---")
    
    # Remove the two sensors
    X_mod = np.delete(X_train_input, sensor_pair, axis=1)
    print("New shapes:", X_mod.shape, Y_train_input.shape)

    # Train/test split
    X_train, X_test, Y_train, Y_test = train_test_split(
        X_mod, Y_train_input, test_size=200, random_state=42, shuffle=True
    )

    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # K-Fold CV
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    best_r2 = -np.inf
    best_params = None
    results = []

    for gamma, alpha in product(gamma_list, alpha_list):
        r2_scores = []
        
        for train_idx, val_idx in kf.split(X_train_scaled):
            X_tr, X_val = X_train_scaled[train_idx], X_train_scaled[val_idx]
            Y_tr, Y_val = Y_train[train_idx], Y_train[val_idx]
            
            # Train Kernel Ridge
            model = KernelRidge(kernel='rbf', gamma=gamma, alpha=alpha)
            model.fit(X_tr, Y_tr)
            
            # Predict on validation
            Y_val_pred = model.predict(X_val)
            r2_scores.append(r2_score(Y_val, Y_val_pred))
        
        mean_r2 = np.mean(r2_scores)
        results.append({'gamma': gamma, 'alpha': alpha, 'mean_r2': mean_r2})
        
        if mean_r2 > best_r2:
            best_r2 = mean_r2
            best_params = {'gamma': gamma, 'alpha': alpha}

    print("Best CV R2:", best_r2)
    print("Best Hyperparameters:", best_params)

    # Refit on all training data
    final_model = KernelRidge(kernel='rbf', **best_params)
    final_model.fit(X_train_scaled, Y_train)

    # Test evaluation
    Y_test_pred = final_model.predict(X_test_scaled)
    test_r2 = r2_score(Y_test, Y_test_pred)
    print("R2 on held-out test samples:", test_r2)


--- Removing sensors at indices (0, 1) ---
New shapes: (700, 4) (700,)
Best CV R2: 0.9898887153800786
Best Hyperparameters: {'gamma': 0.2, 'alpha': 0.005}
R2 on held-out test samples: 0.9910764631361019

--- Removing sensors at indices (0, 2) ---
New shapes: (700, 4) (700,)
Best CV R2: 0.9675967479562217
Best Hyperparameters: {'gamma': 0.1, 'alpha': 0.0005}
R2 on held-out test samples: 0.9724166867642969

--- Removing sensors at indices (0, 3) ---
New shapes: (700, 4) (700,)
Best CV R2: 0.7845379916908576
Best Hyperparameters: {'gamma': 0.1, 'alpha': 0.005}
R2 on held-out test samples: 0.7383637332384902

--- Removing sensors at indices (0, 4) ---
New shapes: (700, 4) (700,)
Best CV R2: 0.9720893327877661
Best Hyperparameters: {'gamma': 0.4, 'alpha': 0.005}
R2 on held-out test samples: 0.9756493442268288

--- Removing sensors at indices (0, 5) ---
New shapes: (700, 4) (700,)
Best CV R2: 0.9814435187062586
Best Hyperparameters: {'gamma': 0.2, 'alpha': 0.002}
R2 on held-out test samples