## K-Fold Cross Validation

In [1]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
import numpy as np

# Define the dataset
X = np.array([[1], [2], [3], [4], [5], [6], [7], [8]])
y = np.array([1.5, 2.1, 2.9, 3.6, 4.0, 5.1, 5.8, 6.3])

# Define the number of folds
k = 4
kf = KFold(n_splits=k, shuffle=False)

# Initialize list to store MSE for each fold
mse_list = []

# Perform K-Fold Cross-Validation
for train_index, val_index in kf.split(X):
    # Split data into training and validation sets
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]
    
    # Train the linear regression model on the training set
    model = LinearRegression()
    model.fit(X_train, y_train)
    
    # Predict on the validation set
    y_pred = model.predict(X_val)
    
    # Calculate Mean Squared Error for the current fold
    mse = mean_squared_error(y_val, y_pred)
    mse_list.append(mse)

# Calculate the average MSE across all folds
average_mse = np.mean(mse_list)

mse_list, average_mse


([0.0028321995464852655,
  0.003442410064653929,
  0.04702950874026019,
  0.013489795918367378],
 0.01669847856744169)