In [3]:
from sklearn.linear_model import LassoCV
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Step 1: Load Data
data = load_diabetes()
X = data.data
y = data.target

# Step 2: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Set Up LassoCV
# LassoCV will automatically choose the best alpha (penalty) using cross-validation.
# cv=5 means it will perform 5-fold cross-validation
lasso_cv = LassoCV(alphas=[0.1, 0.5, 1.0, 5, 10], cv=5, random_state=42)

# Step 4: Train the Model
lasso_cv.fit(X_train, y_train)

# Step 5: Evaluate Results
print("Best alpha (penalty):", lasso_cv.alpha_)
print("Lasso coefficients:", lasso_cv.coef_)
print("Mean cross-validated score:", lasso_cv.mse_path_.mean(axis=1).min())

# Step 6: Test the model on the test set
y_pred = lasso_cv.predict(X_test)
test_mse = mean_squared_error(y_test, y_pred)
print("Test Mean Squared Error:", test_mse)


Best alpha (penalty): 0.1
Lasso coefficients: [   0.         -152.66477923  552.69777529  303.36515791  -81.36500664
   -0.         -229.25577639    0.          447.91952518   29.64261704]
Mean cross-validated score: 3128.822109870582
Test Mean Squared Error: 2798.1934851697188


In [4]:
import pandas as pd

In [11]:
dt = pd.DataFrame(lasso_cv.coef_, index = range(len(lasso_cv.coef_)))
dt[:len(lasso_cv.coef_)].sort_values(by = 0,ascending=False)

Unnamed: 0,0
2,552.697775
8,447.919525
3,303.365158
9,29.642617
0,0.0
5,-0.0
7,0.0
4,-81.365007
1,-152.664779
6,-229.255776
