## Leave-One-Out Cross Validation

### Leave-one-out Manually

In [7]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Define the dataset
X = np.array([[1], [2], [3], [4]])
y = np.array([2, 3, 2.5, 5])

# Method 1: LOOCV by training 4 models (leave-one-out manually)
mse_list = []

for i in range(len(X)):
    # Leave out the i-th data point
    X_train = np.delete(X, i, axis=0)
    y_train = np.delete(y, i)
    X_test = X[i].reshape(1, -1)
    y_test = y[i]
    
    # Train the model on the remaining data
    model = LinearRegression()
    model.fit(X_train, y_train)
    
    # Predict the left-out data point and calculate MSE
    y_pred = model.predict(X_test)
    mse_list.append((y_test - y_pred[0]) ** 2)

# Calculate the average MSE for LOOCV
loocv_mse_manual = np.mean(mse_list)
print(loocv_mse_manual)


1.670918367346939


### Using the Hat Matrix

In [8]:
# Method 2: LOOCV MSE using the Hat Matrix formula

# Adding intercept term for linear regression
X_design = np.c_[np.ones(X.shape[0]), X]  # Design matrix with intercept

# Hat Matrix Calculation
XtX_inv = np.linalg.inv(X_design.T @ X_design)
H = X_design @ XtX_inv @ X_design.T

# Calculate the fitted values and residuals
beta = XtX_inv @ X_design.T @ y
y_hat = X_design @ beta
residuals = y - y_hat

# Calculate the LOOCV MSE using Hat Matrix diagonal (leverage values)
h_diag = np.diag(H)
adjusted_residuals = residuals / (1 - h_diag)
squared_adjusted_residuals = adjusted_residuals ** 2
loocv_mse_hat_matrix = squared_adjusted_residuals.mean()

print(loocv_mse_hat_matrix)

1.6709183673469423
