In [None]:
import numpy as np
from sklearn.model_selection import train_test_split, KFold
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error

# Load generic dataset for regression
X, y = datasets.load_diabetes(return_X_y=True)

# Create hold-out test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Create two hypothesis classes
hypothesis_classes = { "Model1": linear_model.Lasso,\
                       "Model2": linear_model.Ridge}

hypothesis_performance = {}

kf = KFold(n_splits=5)

for key in hypothesis_classes.keys():
  fold_performances = []
  for train, val in kf.split(X_train):
    # Create linear regression model
    regressor = hypothesis_classes[key]()

    # Train the model using the K-1 folds
    regressor.fit(X_train[train,:], y_train[train])

    # Evaluate performance on K-th fold
    y_pred = regressor.predict(X_train[val,:])

    # Measure loss with mean squared error (MSE)
    fold_performances.append(mean_squared_error(y_train[val], y_pred))

  hypothesis_performance[key] = np.mean(fold_performances)

# Choose model with lowest MSE and retrain
best_model_key = min(hypothesis_performance, key=hypothesis_performance.get)
best_regressor = hypothesis_classes[best_model_key]()
best_regressor.fit(X_train, y_train)
y_test_pred = best_regressor.predict(X_test)
print(f"MSE: {mean_squared_error(y_test, y_test_pred)}")