In [1]:
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score


In [2]:
X, y = fetch_california_housing(return_X_y=True, as_frame=True)


In [3]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [4]:
ridge = Pipeline([
    ('scaler', StandardScaler()),
    ('model', Ridge(alpha=1.0))
])

ridge.fit(X_train, y_train)
y_pred = ridge.predict(X_test)
print("Ridge R2:", r2_score(y_test, y_pred))


Ridge R2: 0.5758157428913684


In [5]:
lasso = Pipeline([
    ('scaler', StandardScaler()),
    ('model', Lasso(alpha=0.05))
])

lasso.fit(X_train, y_train)
y_pred = lasso.predict(X_test)
print("Lasso R2:", r2_score(y_test, y_pred))


Lasso R2: 0.5305222464262259


In [6]:
lasso_coef = lasso.named_steps['model'].coef_
selected_features = X.columns[lasso_coef != 0]
print(selected_features)


Index(['MedInc', 'HouseAge', 'Latitude', 'Longitude'], dtype='object')
