python 3.13.5

# reg_part_B_NN

# Data import libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math
from sklearn.model_selection import train_test_split, KFold, LeaveOneOut
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import torch
from sklearn.linear_model import Ridge
from tqdm.auto import tqdm
import scipy.stats as st

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
name_data_file="heart_failure_clinical_records_dataset"

data = pd.read_csv(f"../../raw_data/{name_data_file}.csv", na_values=["?"])

In [3]:
data

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.00,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.00,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.00,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.00,2.7,116,0,0,8,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
294,62.0,0,61,1,38,1,155000.00,1.1,143,1,1,270,0
295,55.0,0,1820,0,38,0,270000.00,1.2,139,0,0,271,0
296,45.0,0,2060,1,60,0,742000.00,0.8,138,0,0,278,0
297,45.0,0,2413,0,38,0,140000.00,1.4,140,1,1,280,0


# Cross validation pipeline

In [4]:
X = data.drop(columns=['time', "DEATH_EVENT"])
y = data['time']   # pandas Series

N, M = X.shape

# X.shape, y.shape print shapes of X and y to undestand their dimensions

## Help Functions

In [5]:
# Normalize data based on training set

def get_fold_data(X, y, train_idx, val_idx):
   
    X_train = X.iloc[train_idx]
    X_val   = X.iloc[val_idx]
    y_train = y.iloc[train_idx]
    y_val   = y.iloc[val_idx]

    return X_train, X_val, y_train, y_val

def get_fold_data_normalized(X, y, train_idx, val_idx):
   
    X_train = X.iloc[train_idx]
    X_val   = X.iloc[val_idx]
    y_train = y.iloc[train_idx]
    y_val   = y.iloc[val_idx]

    mean = X_train.mean(axis=0)
    std  = X_train.std(axis=0)

    y_train_mean = y_train.mean()

    X_train_norm = (X_train - mean) / std
    X_val_norm   = (X_val   - mean) / std
    y_train = y_train - y_train_mean
    y_val   = y_val   - y_train_mean

    return X_train_norm, X_val_norm, y_train, y_val

# Tensor conversion

def torch_tensor_conversion(X_train, y_train, X_val, y_val):

    X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train.values.reshape(-1, 1), dtype=torch.float32).view(-1, 1)
    X_val_tensor   = torch.tensor(X_val.values, dtype=torch.float32)
    y_val_tensor   = torch.tensor(y_val.values.reshape(-1, 1), dtype=torch.float32).view(-1, 1)

    return X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor

def get_model(input_dim, hidden_dim, output_dim):
    return torch.nn.Sequential(
        torch.nn.Linear(in_features=input_dim, out_features=hidden_dim, bias=True),     # Input layer
        torch.nn.Tanh(),                                                                # Activation function
        torch.nn.Linear(in_features=hidden_dim, out_features=output_dim, bias=True),    # Output layer
    )

## 2 layer cross validation

In [6]:
# Parameters:

outer_folds_k_1 = 10
inner_folds_k_2 = 10
random_state = 42

# ANN parameters
input_dim  = M # M number of features
output_dim = 1 # regression problem
lr = 1e-3
n_epochs = 1000
momentum = 0
hyperparameters_ANN = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]  # hidden layer sizes to try


# Regularization parameters for linear regression
lambdas__for_linear_regression = np.logspace(-4, 3, 30)[23:28]

In [7]:
CV_outer = KFold(n_splits=outer_folds_k_1, shuffle=True, random_state=random_state) 

baseline_per_fold = {}   # Outer fold dict (key: outer fold index)
best_hyperparameters_per_fold = {}
best_lambda_per_fold = {}
fold_results = {}
outer_fold_index = 0

for outer_train_idx, outer_test_idx in CV_outer.split(X):
    outer_fold_index += 1
    X_train_outer, X_test_outer, y_train_outer, y_test_outer = get_fold_data(X, y, outer_train_idx, outer_test_idx)

    CV_inner = KFold(n_splits=inner_folds_k_2, shuffle=True, random_state=random_state)
    inner_mse_ANN = {}
    inner_mse_linear_regression = {}
    inner_fold_index = 0

    for inner_train_idx, inner_test_idx in CV_inner.split(X_train_outer):
        inner_fold_index += 1
        print(f"Outer Fold {outer_fold_index} - Inner Fold {inner_fold_index}")

        ############################# DATA Inner Fold ####################################
        X_train_inner_norm, X_test_inner_norm, y_train_inner_norm, y_test_inner_norm = get_fold_data_normalized(X_train_outer, y_train_outer, inner_train_idx, inner_test_idx)

        ############################# Linear Regression Inner Fold ####################################
        
        # Set up a dictionary to store the results for each lambda setting
        results_inner_linear_regression = {lam: {'train': [], 'test': []} for lam in lambdas__for_linear_regression}

        for lam in lambdas__for_linear_regression:

            model = Ridge(alpha=lam, random_state=42)
            model.fit(X_train_inner_norm, y_train_inner_norm)

            y_test_pred_inner = model.predict(X_test_inner_norm)
            mse_test = mean_squared_error(y_test_inner_norm, y_test_pred_inner)

            results_inner_linear_regression[lam]['test'].append(mse_test)
            inner_mse_linear_regression[inner_fold_index] = results_inner_linear_regression

        ############################# ANN Inner Fold ########################################
        X_train_inner_tensor, y_train_inner_tensor, X_test_inner_tensor, y_test_inner_tensor = torch_tensor_conversion(X_train_inner_norm, y_train_inner_norm, X_test_inner_norm, y_test_inner_norm) 
    
        # Set up a dictionary to store the results for each hyperparameter setting
        results_inner_ANN = {hidden_dim: {'train': [], 'test': []} for hidden_dim in hyperparameters_ANN}

        for hidden_dim in hyperparameters_ANN:
            # Define a model instance with a specific number of hidden units
            model = get_model(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim)

            # Define loss criterion
            criterion = torch.nn.MSELoss()

            # Define the optimizer as the Adam optimizer (not needed to know the details)
            optimizer = torch.optim.SGD(params=model.parameters(), lr=lr, momentum=momentum)

            for epoch in range(n_epochs):

                # Set the model to training mode
                model.train()

                # Make a forward pass through the model to compute the outputs
                outputs = model(X_train_inner_tensor)
                # Compute the loss
                loss = criterion(outputs, y_train_inner_tensor)

                # Make sure that the gradients are zero before you use backpropagation
                optimizer.zero_grad()
                # Do a backward pass to compute the gradients wrt. model parameters using backpropagation.
                loss.backward()
                # Update the model parameters by making the optimizer take a gradient descent step
                optimizer.step()
                
                # Store the training loss for this epoch in the dictionary
                #results_inner_ANN[hidden_dim]['train'].append(loss.item())

            # Compute the final test loss on the test set
            with torch.no_grad(): # No need to compute gradients for the validation set
                model.eval()
                val_outputs = model(X_test_inner_tensor)
                val_loss = criterion(val_outputs, y_test_inner_tensor)
                results_inner_ANN[hidden_dim]['test'].append(val_loss.item())
                #print(f'  Hidden units: {hidden_dim}, Validation set MSE: {val_loss.item():.4f}')
                inner_mse_ANN[inner_fold_index] = results_inner_ANN 

        ############################# BASELINE Inner Fold ####################################

        #----

    ############################ OUTER FOLD ##########################################################

    ############################ Data ##########################################################

    X_train_outer_norm, X_test_outer_norm, y_train_outer_norm, y_test_outer_norm = get_fold_data_normalized(X, y, outer_train_idx, outer_test_idx)

    X_train_outer_tensor, y_train_outer_tensor, X_test_outer_tensor, y_test_outer_tensor = torch_tensor_conversion(X_train_outer_norm, y_train_outer_norm, X_test_outer_norm, y_test_outer_norm)

    ############################ Linear Regression Outer Fold ####################################

    avg_mse_per_lambda = {}
    for lam in lambdas__for_linear_regression:
        mse_values = []
        for inner_fold in inner_mse_linear_regression.keys():
            mse_values.append(inner_mse_linear_regression[inner_fold][lam]['test'][0])  # We only have one value of test per fold 
        avg_mse = np.mean(mse_values)
        avg_mse_per_lambda[lam] = avg_mse
    
    best_lambda = min(avg_mse_per_lambda, key=avg_mse_per_lambda.get)
    best_lambda_per_fold[outer_fold_index] = best_lambda

    model = Ridge(alpha=best_lambda, random_state=42)
    model.fit(X_train_outer_norm, y_train_outer_norm)
    y_test_pred_outer = model.predict(X_test_outer_norm)
    mse_test_outer = mean_squared_error(y_test_outer_norm, y_test_pred_outer)
    print(f"For outer fold {outer_fold_index} Best λ (alpha): {best_lambda}, Test MSE: {mse_test_outer}")

    ############################ ANN Outer Fold ####################################
    # Find the best hyperparameter based on inner folds
    avg_mse_per_hyperparam = {}
    for hidden_dim in hyperparameters_ANN:
        mse_values = []
        for inner_fold in inner_mse_ANN.keys():
            mse_values.append(inner_mse_ANN[inner_fold][hidden_dim]['test'][0])  # We only have one value of test per fold 
        avg_mse = np.mean(mse_values)
        avg_mse_per_hyperparam[hidden_dim] = avg_mse
        
    
    best_hyperparam = min(avg_mse_per_hyperparam, key=avg_mse_per_hyperparam.get)
    best_hyperparameters_per_fold[outer_fold_index] = best_hyperparam


    model = get_model(input_dim=input_dim, hidden_dim=best_hyperparam, output_dim=output_dim)
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.SGD(params=model.parameters(), lr=lr, momentum=momentum)
    
    for epoch in range(n_epochs):
        
        model.train()
        outputs = model(X_train_outer_tensor)
        loss = criterion(outputs, y_train_outer_tensor)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    with torch.no_grad(): # No need to compute gradients for the validation set
            model.eval()
            val_outputs = model(X_test_outer_tensor)
            val_loss = criterion(val_outputs, y_test_outer_tensor)
            print(f'For outer fold {outer_fold_index} Best hidden units: {best_hyperparam}, Test MSE: {val_loss.item():.4f}')
    
    ############################ BASELINE Outer Fold ###############################

    y_train_mean = y_train_outer_norm.mean()
    y_test_pred_outer = pd.Series(y_train_mean, index=y_test_outer_norm.index)
    outer_mse_baseline = mean_squared_error(y_test_outer_norm, y_test_pred_outer)
    baseline_per_fold[outer_fold_index] = outer_mse_baseline 
    print(f"For outer fold {outer_fold_index} Mean Inner fold MSE for Baseline:", outer_mse_baseline)

    ############################# STORE RESULTS ####################################

    fold_results[outer_fold_index] = {
        "linear_regression_best_lambda": best_lambda,
        "linear_regression_mse": mse_test_outer,
        "ANN_best_hidden_units": best_hyperparam,
        "ANN_mse": val_loss.item(),
        "baseline_outer_mse": outer_mse_baseline
    }



Outer Fold 1 - Inner Fold 1
Outer Fold 1 - Inner Fold 2
Outer Fold 1 - Inner Fold 3
Outer Fold 1 - Inner Fold 4
Outer Fold 1 - Inner Fold 5
Outer Fold 1 - Inner Fold 6
Outer Fold 1 - Inner Fold 7
Outer Fold 1 - Inner Fold 8
Outer Fold 1 - Inner Fold 9
Outer Fold 1 - Inner Fold 10
For outer fold 1 Best λ (alpha): 188.73918221350996, Test MSE: 5157.651809924352
For outer fold 1 Best hidden units: 1, Test MSE: 5830.5918
For outer fold 1 Mean Inner fold MSE for Baseline: 5543.878799813896
Outer Fold 2 - Inner Fold 1
Outer Fold 2 - Inner Fold 2
Outer Fold 2 - Inner Fold 3
Outer Fold 2 - Inner Fold 4
Outer Fold 2 - Inner Fold 5
Outer Fold 2 - Inner Fold 6
Outer Fold 2 - Inner Fold 7
Outer Fold 2 - Inner Fold 8
Outer Fold 2 - Inner Fold 9
Outer Fold 2 - Inner Fold 10
For outer fold 2 Best λ (alpha): 108.2636733874054, Test MSE: 5539.999850689881
For outer fold 2 Best hidden units: 10, Test MSE: 5868.3828
For outer fold 2 Mean Inner fold MSE for Baseline: 5599.172962415298
Outer Fold 3 - Inner

### functions

In [8]:
def count_best_parameters(dict_parameters_for_each_outer_fold):
    count_parameter_dict = {}
    for outer_fold_index in dict_parameters_for_each_outer_fold.keys():
        if dict_parameters_for_each_outer_fold.get(outer_fold_index) not in count_parameter_dict.keys() :
            count_parameter_dict[dict_parameters_for_each_outer_fold.get(outer_fold_index)] = 1

        else:
            count_parameter_dict[dict_parameters_for_each_outer_fold.get(outer_fold_index)] += 1

    return count_parameter_dict

def best_parameter(count_parameter_dict):
    best_param = max(count_parameter_dict, key=count_parameter_dict.get)
    return best_param

best_lambda = best_parameter(count_best_parameters(best_lambda_per_fold))
best_hyperparameter = best_parameter(count_best_parameters(best_hyperparameters_per_fold))

### learning_rate = 0.001, momentum = 0, n_epochs = 1000

In [9]:
outer_results_df = pd.DataFrame.from_dict(fold_results, orient='index')
print("\n=== Summary of Outer Fold Results ===")
print(f"learning_rate = {lr}, momentum = {momentum}, n_epochs = {n_epochs}")
mean_ANN_mse = outer_results_df['ANN_mse'].mean()
difference_ANN_mse_baseline_mse = (outer_results_df['baseline_outer_mse'] - outer_results_df['ANN_mse']).mean()
print(f"Mean ANN MSE across outer folds: {mean_ANN_mse}")
print(f"Mean difference between baseline and ANN MSE across outer folds: {difference_ANN_mse_baseline_mse}")
outer_results_df


=== Summary of Outer Fold Results ===
learning_rate = 0.001, momentum = 0, n_epochs = 1000
Mean ANN MSE across outer folds: 5915.052783203125
Mean difference between baseline and ANN MSE across outer folds: 112.37105704244668


Unnamed: 0,linear_regression_best_lambda,linear_regression_mse,ANN_best_hidden_units,ANN_mse,baseline_outer_mse
1,188.739182,5157.65181,1,5830.591797,5543.8788
2,108.263673,5539.999851,10,5868.382812,5599.172962
3,188.739182,5635.661464,1,5872.599121,6384.788366
4,188.739182,3895.681959,1,3992.008789,5103.847464
5,188.739182,4070.970269,2,5254.224609,4707.124051
6,108.263673,6070.225766,5,6002.25293,6811.592484
7,188.739182,6354.27928,8,6825.538574,6991.245497
8,108.263673,8347.406782,7,7928.255859,7346.281996
9,188.739182,6645.943883,1,6153.07373,5970.658638
10,108.263673,5220.648247,1,5423.599609,5815.648144


In [10]:
print("Best λ (alpha) for Linear Regression across all outer folds:", best_lambda)
print("Best hidden units for ANN across all outer folds:", best_hyperparameter)

Best λ (alpha) for Linear Regression across all outer folds: 188.73918221350996
Best hidden units for ANN across all outer folds: 1


### learning_rate = 0.001, momentum = 0.5, n_epochs = 1000

In [91]:
outer_results_df = pd.DataFrame.from_dict(fold_results, orient='index')
print("\n=== Summary of Outer Fold Results ===")
print(f"learning_rate = {lr}, momentum = {momentum}, n_epochs = {n_epochs}")
mean_ANN_mse = outer_results_df['ANN_mse'].mean()
difference_ANN_mse_baseline_mse = (outer_results_df['baseline_outer_mse'] - outer_results_df['ANN_mse']).mean()
print(f"Mean ANN MSE across outer folds: {mean_ANN_mse}")
print(f"Mean difference between baseline and ANN MSE across outer folds: {difference_ANN_mse_baseline_mse}")
outer_results_df


=== Summary of Outer Fold Results ===
learning_rate = 0.001, momentum = 0.5, n_epochs = 1000
Mean ANN MSE across outer folds: 6160.3056640625
Mean difference between baseline and ANN MSE across outer folds: -132.88182381692832


Unnamed: 0,linear_regression_best_lambda,linear_regression_mse,ANN_best_hidden_units,ANN_mse,baseline_outer_mse
1,188.739182,5157.65181,3,5940.164062,5543.8788
2,108.263673,5539.999851,5,6087.937012,5599.172962
3,188.739182,5635.661464,2,5815.179199,6384.788366
4,188.739182,3895.681959,1,3975.016602,5103.847464
5,188.739182,4070.970269,1,5228.65332,4707.124051
6,108.263673,6070.225766,3,6504.273926,6811.592484
7,188.739182,6354.27928,4,6552.671387,6991.245497
8,108.263673,8347.406782,1,9074.388672,7346.281996
9,188.739182,6645.943883,1,6105.773438,5970.658638
10,108.263673,5220.648247,3,6318.999023,5815.648144


In [92]:
print("Best λ (alpha) for Linear Regression across all outer folds:", best_lambda)
print("Best hidden units for ANN across all outer folds:", best_hyperparameter)

Best λ (alpha) for Linear Regression across all outer folds: 188.73918221350996
Best hidden units for ANN across all outer folds: 1


### learning_rate = 0.001, momentum = 0.9, n_epochs = 1000

In [95]:
outer_results_df = pd.DataFrame.from_dict(fold_results, orient='index')
print("\n=== Summary of Outer Fold Results ===")
print(f"learning_rate = {lr}, momentum = {momentum}, n_epochs = {n_epochs}")
mean_ANN_mse = outer_results_df['ANN_mse'].mean()
difference_ANN_mse_baseline_mse = (outer_results_df['baseline_outer_mse'] - outer_results_df['ANN_mse']).mean()
print(f"Mean ANN MSE across outer folds: {mean_ANN_mse}")
print(f"Mean difference between baseline and ANN MSE across outer folds: {difference_ANN_mse_baseline_mse}")
outer_results_df


=== Summary of Outer Fold Results ===
learning_rate = 0.001, momentum = 0.9, n_epochs = 1000
Mean ANN MSE across outer folds: 6191.557177734375
Mean difference between baseline and ANN MSE across outer folds: -164.1333374888033


Unnamed: 0,linear_regression_best_lambda,linear_regression_mse,ANN_best_hidden_units,ANN_mse,baseline_outer_mse
1,188.739182,5157.65181,2,6731.850098,5543.8788
2,108.263673,5539.999851,2,6945.989258,5599.172962
3,188.739182,5635.661464,1,5956.526855,6384.788366
4,188.739182,3895.681959,1,4013.535156,5103.847464
5,188.739182,4070.970269,1,5132.63916,4707.124051
6,108.263673,6070.225766,3,6418.666504,6811.592484
7,188.739182,6354.27928,1,6034.097168,6991.245497
8,108.263673,8347.406782,2,8870.155273,7346.281996
9,188.739182,6645.943883,1,6152.852051,5970.658638
10,108.263673,5220.648247,1,5659.260254,5815.648144


In [96]:
print("Best λ (alpha) for Linear Regression across all outer folds:", best_lambda)
print("Best hidden units for ANN across all outer folds:", best_hyperparameter)

Best λ (alpha) for Linear Regression across all outer folds: 108.2636733874054
Best hidden units for ANN across all outer folds: 1


### learning_rate = 0.001, momentum = 0.9, n_epochs = 10000

In [82]:
outer_results_df = pd.DataFrame.from_dict(fold_results, orient='index')
print("\n=== Summary of Outer Fold Results ===")
print(f"learning_rate = {lr}, momentum = {momentum}, n_epochs = {n_epochs}")
mean_ANN_mse = outer_results_df['ANN_mse'].mean()
difference_ANN_mse_baseline_mse = (outer_results_df['baseline_outer_mse'] - outer_results_df['ANN_mse']).mean()
print(f"Mean ANN MSE across outer folds: {mean_ANN_mse}")
print(f"Mean difference between baseline and ANN MSE across outer folds: {difference_ANN_mse_baseline_mse}")
outer_results_df


=== Summary of Outer Fold Results ===
learning_rate = 0.001, momentum = 0.9, n_epochs = 10000
Mean ANN MSE across outer folds: 6448.2427734375
Mean difference between baseline and ANN MSE across outer folds: -420.8189331919283


Unnamed: 0,linear_regression_best_lambda,linear_regression_mse,ANN_best_hidden_units,ANN_mse,baseline_outer_mse
1,188.739182,5157.65181,1,6556.806152,5543.8788
2,108.263673,5539.999851,4,5517.922852,5599.172962
3,188.739182,5635.661464,1,6256.265137,6384.788366
4,188.739182,3895.681959,1,3596.608887,5103.847464
5,188.739182,4070.970269,4,5874.966309,4707.124051
6,108.263673,6070.225766,3,6468.523926,6811.592484
7,188.739182,6354.27928,1,7647.459473,6991.245497
8,108.263673,8347.406782,2,9454.005859,7346.281996
9,188.739182,6645.943883,3,7646.244629,5970.658638
10,108.263673,5220.648247,1,5463.624512,5815.648144


In [None]:
print("Best λ (alpha) for Linear Regression across all outer folds:", best_lambda)
print("Best hidden units for ANN across all outer folds:", best_hyperparameter)

Best λ (alpha) for Linear Regression across all outer folds: 188.73918221350996
Best hidden units for ANN across all outer folds: 1


# Statistical Test

## Help functions

In [11]:
def correlated_ttest(r, rho, alpha=0.05):
    """
    Perform a correlated t-test to compare two models under Setup II.

    Parameters:
    - r (array-like): Array of performance differences across folds (e.g. r_j = error_A - error_B)
    - rho (float): Correlation coefficient between folds (typically 1/K for K-fold CV)
    - alpha (float, optional): Significance level (default: 0.05)

    Returns:
    - p (float): p-value of the test
    - CI (tuple): Confidence interval for the mean difference
    """

    r = np.array(r)
    r_hat = np.mean(r)
    s_hat = np.std(r, ddof=1)
    J = len(r)

    # Adjusted standard deviation accounting for correlation
    sigma_tilde = s_hat * np.sqrt((1 / J) + (rho / (1 - rho)))

    # Confidence interval
    CI = st.t.interval(1 - alpha, df=J - 1, loc=r_hat, scale=sigma_tilde)

    # Two-sided p-value
    p = 2 * st.t.cdf(-np.abs(r_hat) / sigma_tilde, df=J - 1)

    return r_hat, CI, p

def get_fold_data_normalized(X, y, train_idx, val_idx):
   
    X_train = X.iloc[train_idx]
    X_val   = X.iloc[val_idx]
    y_train = y.iloc[train_idx]
    y_val   = y.iloc[val_idx]

    mean = X_train.mean(axis=0)
    std  = X_train.std(axis=0)

    y_train_mean = y_train.mean()

    X_train_norm = (X_train - mean) / std
    X_val_norm   = (X_val   - mean) / std
    y_train = y_train - y_train_mean
    y_val   = y_val   - y_train_mean

    return X_train_norm, X_val_norm, y_train, y_val

def torch_tensor_conversion(X_train, y_train, X_val, y_val):

    X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train.values.reshape(-1, 1), dtype=torch.float32).view(-1, 1)
    X_val_tensor   = torch.tensor(X_val.values, dtype=torch.float32)
    y_val_tensor   = torch.tensor(y_val.values.reshape(-1, 1), dtype=torch.float32).view(-1, 1)

    return X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor

## code

In [12]:
m = 10 # Repetitions
K = 10 # Folds
rho = 1 / K # Correlation heuristic
alpha = 0.05 # Significance level

# ANN parameters

input_dim  = M # M number of features
output_dim = 1 # regression problem
lr = 1e-3
n_epochs = 1000
momentum = 0.9

#Loss Function 
l2_loss = lambda y, y_pred: (y - y_pred)**2
loss_func = l2_loss # Loss function

# Parameters used

best_lambda_statistic_test = best_lambda
best_hyperparameter_statistic_test = best_hyperparameter

In [13]:
print(best_lambda_statistic_test)
print(best_hyperparameter_statistic_test)

188.73918221350996
1


### ANN vs Linear Reg

In [14]:
r = []

for repeat_idx in range(m):
    print(f"Repetition {repeat_idx+1}/{m}")

    # 5.2) Initialize KFold cross-validation, set the seed to repeat_idx
    ### BEGIN SOLUTION
    CV_kfold = KFold(n_splits=K, shuffle=True, random_state=repeat_idx)
    ### END SOLUTION

    for fold, (train_index, test_index) in tqdm(enumerate(CV_kfold.split(X)), total=CV_kfold.get_n_splits(X),desc="Cross-validation fold"):
        # Split data into training and test sets

        ############################################# DATA #################################################

        X_train_norm, X_test_norm, y_train_norm, y_test_norm= get_fold_data_normalized(X, y, train_index, test_index)

        X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor = torch_tensor_conversion(X_train_norm, y_train_norm, X_test_norm, y_test_norm)

        ############################################# LINEAR REGRESSION #################################################

        model = Ridge(alpha = best_lambda_statistic_test, random_state=repeat_idx)
        model.fit(X_train_norm, y_train_norm)

        y_test_linear_reg = model.predict(X_test_norm)
        loss_func_linear_reg = loss_func(y_test_norm, y_test_linear_reg).values.flatten()  # Get individual squared errors as a 1D array

        ##################################################### ANN MODEL #################################################

        model = get_model(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim)

        # Define loss criterion - set reduction to 'none' to get individual errors
        criterion = torch.nn.MSELoss(reduction='none')

        # Define the optimizer as the Adam optimizer (not needed to know the details)
        optimizer = torch.optim.SGD(params=model.parameters(), lr=lr, momentum=momentum)

        for epoch in range(n_epochs):

            # Set the model to training mode
            model.train()

            # Make a forward pass through the model to compute the outputs
            outputs = model(X_train_tensor)
            # Compute the loss (this will still be a tensor of individual losses, so take mean for backward)
            loss = criterion(outputs, y_train_tensor).mean()  # mean needed for backward

            # Make sure that the gradients are zero before you use backpropagation
            optimizer.zero_grad()
            # Do a backward pass to compute the gradients wrt. model parameters using backpropagation.
            loss.backward()
            # Update the model parameters by making the optimizer take a gradient descent step
            optimizer.step()

        with torch.no_grad():
            model.eval()
            val_outputs = model(X_test_tensor)
            val_losses = criterion(val_outputs, y_test_tensor)  # Tensor of individual squared errors
            loss_func_ANN = val_losses.detach().cpu().numpy().flatten()  # Convert to numpy array for all individual errors
        ######################################################### MODELS COMPARISON #######################################

        r_j = np.mean(loss_func_linear_reg - loss_func_ANN)

        r.append(r_j)

# Calculate p-value and confidence interval using correlated t-test
r_hat, CI, p_value = correlated_ttest(r, rho, alpha=alpha)

print(f"\nSetup II results:")
print(f"r_hat: {r_hat:.4f}")
print(f"95% CI: [{CI[0]:.4f}, {CI[1]:.4f}]")
print(f"p-value: {p_value}")

Repetition 1/10


Cross-validation fold: 100%|██████████| 10/10 [00:02<00:00,  4.71it/s]


Repetition 2/10


Cross-validation fold: 100%|██████████| 10/10 [00:02<00:00,  4.90it/s]


Repetition 3/10


Cross-validation fold: 100%|██████████| 10/10 [00:02<00:00,  4.82it/s]


Repetition 4/10


Cross-validation fold: 100%|██████████| 10/10 [00:02<00:00,  4.36it/s]


Repetition 5/10


Cross-validation fold: 100%|██████████| 10/10 [00:02<00:00,  4.45it/s]


Repetition 6/10


Cross-validation fold: 100%|██████████| 10/10 [00:02<00:00,  4.61it/s]


Repetition 7/10


Cross-validation fold: 100%|██████████| 10/10 [00:02<00:00,  4.85it/s]


Repetition 8/10


Cross-validation fold: 100%|██████████| 10/10 [00:02<00:00,  4.84it/s]


Repetition 9/10


Cross-validation fold: 100%|██████████| 10/10 [00:02<00:00,  4.71it/s]


Repetition 10/10


Cross-validation fold: 100%|██████████| 10/10 [00:02<00:00,  4.85it/s]


Setup II results:
r_hat: -1923.9072
95% CI: [-2940.7422, -907.0723]
p-value: 0.00029348441260063687





### ANN vs Baseline

In [15]:
r = []

for repeat_idx in range(m):
    print(f"Repetition {repeat_idx+1}/{m}")

    # 5.2) Initialize KFold cross-validation, set the seed to repeat_idx
    ### BEGIN SOLUTION
    CV_kfold = KFold(n_splits=K, shuffle=True, random_state=repeat_idx)
    ### END SOLUTION

    for fold, (train_index, test_index) in tqdm(enumerate(CV_kfold.split(X)), total=CV_kfold.get_n_splits(X),desc="Cross-validation fold"):
        # Split data into training and test sets

        ############################################# DATA #################################################

        X_train_norm, X_test_norm, y_train_norm, y_test_norm= get_fold_data_normalized(X, y, train_index, test_index)

        X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor = torch_tensor_conversion(X_train_norm, y_train_norm, X_test_norm, y_test_norm)

        ############################################# BASELINE #################################################

        y_train_mean_baseline = y_train_norm.mean()
        y_test_pred_baseline = pd.Series(y_train_mean_baseline, index=y_test_norm.index)
        loss_funcion_baseline = loss_func(y_test_norm, y_test_pred_baseline).values.flatten()

        ##################################################### ANN MODEL #################################################

        model = get_model(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim)

        # Define loss criterion - set reduction to 'none' to get individual errors
        criterion = torch.nn.MSELoss(reduction='none')

        # Define the optimizer as the Adam optimizer (not needed to know the details)
        optimizer = torch.optim.SGD(params=model.parameters(), lr=lr, momentum=momentum)

        for epoch in range(n_epochs):

            # Set the model to training mode
            model.train()

            # Make a forward pass through the model to compute the outputs
            outputs = model(X_train_tensor)
            # Compute the loss (this will still be a tensor of individual losses, so take mean for backward)
            loss = criterion(outputs, y_train_tensor).mean()  # mean needed for backward

            # Make sure that the gradients are zero before you use backpropagation
            optimizer.zero_grad()
            # Do a backward pass to compute the gradients wrt. model parameters using backpropagation.
            loss.backward()
            # Update the model parameters by making the optimizer take a gradient descent step
            optimizer.step()

        with torch.no_grad():
            model.eval()
            val_outputs = model(X_test_tensor)
            val_losses = criterion(val_outputs, y_test_tensor)  # Tensor of individual squared errors
            loss_func_ANN = val_losses.detach().cpu().numpy().flatten()  # Convert to numpy array for all individual errors
        ######################################################### MODELS COMPARISON #######################################

        r_j = np.mean(loss_funcion_baseline - loss_func_ANN)
        r.append(r_j)

# Calculate p-value and confidence interval using correlated t-test
r_hat, CI, p_value = correlated_ttest(r, rho, alpha=alpha)

print(f"\nSetup II results:")
print(f"r_hat: {r_hat:.4f}")
print(f"95% CI: [{CI[0]:.4f}, {CI[1]:.4f}]")
print(f"p-value: {p_value}")

Repetition 1/10


Cross-validation fold: 100%|██████████| 10/10 [00:02<00:00,  4.83it/s]


Repetition 2/10


Cross-validation fold: 100%|██████████| 10/10 [00:02<00:00,  4.64it/s]


Repetition 3/10


Cross-validation fold: 100%|██████████| 10/10 [00:02<00:00,  4.61it/s]


Repetition 4/10


Cross-validation fold: 100%|██████████| 10/10 [00:02<00:00,  4.79it/s]


Repetition 5/10


Cross-validation fold: 100%|██████████| 10/10 [00:02<00:00,  4.41it/s]


Repetition 6/10


Cross-validation fold: 100%|██████████| 10/10 [00:02<00:00,  4.56it/s]


Repetition 7/10


Cross-validation fold: 100%|██████████| 10/10 [00:02<00:00,  4.81it/s]


Repetition 8/10


Cross-validation fold: 100%|██████████| 10/10 [00:02<00:00,  4.50it/s]


Repetition 9/10


Cross-validation fold: 100%|██████████| 10/10 [00:02<00:00,  4.81it/s]


Repetition 10/10


Cross-validation fold: 100%|██████████| 10/10 [00:02<00:00,  4.79it/s]


Setup II results:
r_hat: -1377.2755
95% CI: [-2502.2910, -252.2600]
p-value: 0.016937890092791296





### Linear Reg vs Baseline

In [16]:
r = []

for repeat_idx in range(m):
    print(f"Repetition {repeat_idx+1}/{m}")

    # 5.2) Initialize KFold cross-validation, set the seed to repeat_idx
    ### BEGIN SOLUTION
    CV_kfold = KFold(n_splits=K, shuffle=True, random_state=repeat_idx)
    ### END SOLUTION

    for fold, (train_index, test_index) in tqdm(enumerate(CV_kfold.split(X)), total=CV_kfold.get_n_splits(X),desc="Cross-validation fold"):
        # Split data into training and test sets

        ############################################# DATA #################################################

        X_train_norm, X_test_norm, y_train_norm, y_test_norm= get_fold_data_normalized(X, y, train_index, test_index)

        X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor = torch_tensor_conversion(X_train_norm, y_train_norm, X_test_norm, y_test_norm)

        ############################################# LINEAR REGRESSION #################################################

        model = Ridge(alpha = best_lambda_statistic_test, random_state=repeat_idx)
        model.fit(X_train_norm, y_train_norm)

        y_test_linear_reg = model.predict(X_test_norm)
        loss_func_linear_reg = loss_func(y_test_norm, y_test_linear_reg)

        ############################################# BASELINE #################################################

        y_train_mean_baseline = y_train_norm.mean()
        y_test_pred_baseline = pd.Series(y_train_mean_baseline, index=y_test_norm.index)
        loss_funcion_baseline = loss_func(y_test_norm, y_test_pred_baseline)

        ######################################################### MODELS COMPARISON #######################################
        
        r_j = np.mean(loss_func_linear_reg - loss_funcion_baseline)

        r.append(r_j)

# Calculate p-value and confidence interval using correlated t-test
r_hat, CI, p_value = correlated_ttest(r, rho, alpha=alpha)

print(f"\nSetup II results:")
print(f"r_hat: {r_hat:.4f}")
print(f"95% CI: [{CI[0]:.4f}, {CI[1]:.4f}]")
print(f"p-value: {p_value}")

Repetition 1/10


Cross-validation fold: 100%|██████████| 10/10 [00:00<00:00, 390.72it/s]


Repetition 2/10


Cross-validation fold: 100%|██████████| 10/10 [00:00<00:00, 437.98it/s]


Repetition 3/10


Cross-validation fold: 100%|██████████| 10/10 [00:00<00:00, 466.26it/s]


Repetition 4/10


Cross-validation fold: 100%|██████████| 10/10 [00:00<00:00, 456.77it/s]


Repetition 5/10


Cross-validation fold: 100%|██████████| 10/10 [00:00<00:00, 460.57it/s]


Repetition 6/10


Cross-validation fold: 100%|██████████| 10/10 [00:00<00:00, 455.80it/s]


Repetition 7/10


Cross-validation fold: 100%|██████████| 10/10 [00:00<00:00, 466.83it/s]


Repetition 8/10


Cross-validation fold: 100%|██████████| 10/10 [00:00<00:00, 457.19it/s]


Repetition 9/10


Cross-validation fold: 100%|██████████| 10/10 [00:00<00:00, 462.25it/s]


Repetition 10/10


Cross-validation fold: 100%|██████████| 10/10 [00:00<00:00, 451.53it/s]


Setup II results:
r_hat: -346.5908
95% CI: [-665.1416, -28.0399]
p-value: 0.03327646979684049



