In [None]:
import numpy as np
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.datasets import load_diabetes
from sklearn.metrics import mean_squared_error

# Load a sample dataset (diabetes dataset from scikit-learn)
data = load_diabetes()
X, y = data.data, data.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the Lasso regression model
lasso_model = Lasso()

# Define a range of alpha values to try
alphas = np.logspace(-4, 2, 100)

# Perform grid search with cross-validation to find the best alpha
param_grid = {'alpha': alphas}
grid_search = GridSearchCV(lasso_model, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

# Get the best alpha value and the corresponding best model
best_alpha = grid_search.best_params_['alpha']
best_model = grid_search.best_estimator_

# Train the best model on the full training data
best_model.fit(X_train, y_train)

# Evaluate the model on the test data
y_pred = best_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Optimal alpha: {best_alpha}")
print(f"Mean Squared Error on test data: {mse:.2f}")
