In [3]:
# Import necessary libraries
from sklearn.datasets import load_diabetes
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np

# Load the Diabetes dataset
diabetes = load_diabetes()
X = diabetes.data
y = diabetes.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Apply Lasso regression for feature selection
lasso = Lasso(alpha=0.1)
lasso.fit(X_train, y_train)

# Get the coefficients (which features are selected)
coef = lasso.coef_

# Print the selected features
print("Selected Features Coefficients: ", coef)

# Filter out non-zero coefficients (selected features)
selected_features = np.where(coef != 0)[0]
print("Selected feature indices:", selected_features)

# Evaluate model performance on test set
y_pred = lasso.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse:.4f}")


Selected Features Coefficients:  [   0.         -173.27234183  558.93537748  339.35670724  -58.72262052
   -0.         -274.11401212    0.          372.84221354   25.58549157]
Selected feature indices: [1 2 3 4 6 8 9]
Mean Squared Error: 2775.1600
