In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error

In [4]:
df = pd.read_csv("California_housing.csv")
df.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,ocean_proximity,median_house_value
0,-122.23,37.88,41,880,129.0,322,126,8.3252,NEAR BAY,452600
1,-122.22,37.86,21,7099,1106.0,2401,1138,8.3014,NEAR BAY,358500
2,-122.24,37.85,52,1467,190.0,496,177,7.2574,NEAR BAY,352100
3,-122.25,37.85,52,1274,235.0,558,219,5.6431,NEAR BAY,341300
4,-122.25,37.85,52,1627,280.0,565,259,3.8462,NEAR BAY,342200


In [5]:
df.dropna(inplace=True)

In [6]:
df = pd.get_dummies(df, columns=['ocean_proximity'], drop_first=True)

In [7]:
features = df.columns.tolist()
features.remove('median_house_value')  
target = 'median_house_value'

In [8]:
X = df[features]
y = df[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [10]:
lasso_model = Lasso(alpha=0.1)
lasso_model.fit(X_train_scaled, y_train)

In [11]:
lasso_coefficients = pd.DataFrame({
    'Feature': features,
    'Lasso Coefficients': lasso_model.coef_
})


In [12]:
selected_features = lasso_coefficients[lasso_coefficients['Lasso Coefficients'] != 0]
print("Selected Features after Lasso:")
print(selected_features)

Selected Features after Lasso:
                       Feature  Lasso Coefficients
0                    longitude       -54372.796266
1                     latitude       -54805.113995
2           housing_median_age        13600.028871
3                  total_rooms       -13611.067017
4               total_bedrooms        42997.632036
5                   population       -41118.374387
6                   households        16306.930485
7                median_income        74538.261852
8       ocean_proximity_INLAND       -18235.260706
9       ocean_proximity_ISLAND         2894.093041
10    ocean_proximity_NEAR BAY        -1970.444239
11  ocean_proximity_NEAR OCEAN         1062.656826


In [13]:
y_pred_lasso = lasso_model.predict(X_test_scaled)

In [14]:
r2_lasso = r2_score(y_test, y_pred_lasso)
mse_lasso = mean_squared_error(y_test, y_pred_lasso)
print(f"Lasso Regression -> R²: {r2_lasso}, MSE: {mse_lasso}")

Lasso Regression -> R²: 0.6488401950304886, MSE: 4802173817.751669


In [15]:
num_eliminated = (lasso_coefficients['Lasso Coefficients'] == 0).sum()
print(f"Lasso eliminated {num_eliminated} features by setting their coefficients to zero.")


Lasso eliminated 0 features by setting their coefficients to zero.
