In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression, Ridge, Lasso
from sklearn.metrics import confusion_matrix, classification_report, mean_squared_error, r2_score

In [5]:
df = pd.read_csv("California_housing.csv")
df.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,ocean_proximity,median_house_value
0,-122.23,37.88,41,880,129.0,322,126,8.3252,NEAR BAY,452600
1,-122.22,37.86,21,7099,1106.0,2401,1138,8.3014,NEAR BAY,358500
2,-122.24,37.85,52,1467,190.0,496,177,7.2574,NEAR BAY,352100
3,-122.25,37.85,52,1274,235.0,558,219,5.6431,NEAR BAY,341300
4,-122.25,37.85,52,1627,280.0,565,259,3.8462,NEAR BAY,342200


In [6]:
df.dropna(inplace=True)

In [7]:
df['ocean_proximity'] = LabelEncoder().fit_transform(df['ocean_proximity'])

In [8]:
features = df.columns.tolist()
features.remove('median_house_value')
target = 'median_house_value'

In [9]:
X = df[features]
y = df[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [11]:
param_grid = {'alpha': [0.01, 0.1, 1, 10]}

ridge = Ridge()
lasso = Lasso()

ridge_search = GridSearchCV(ridge, param_grid, cv=5, scoring='r2')
lasso_search = GridSearchCV(lasso, param_grid, cv=5, scoring='r2')

ridge_search.fit(X_train_scaled, y_train)
lasso_search.fit(X_train_scaled, y_train)

best_ridge_alpha = ridge_search.best_params_['alpha']
best_lasso_alpha = lasso_search.best_params_['alpha']

ridge_best = Ridge(alpha=best_ridge_alpha)
lasso_best = Lasso(alpha=best_lasso_alpha)

ridge_best.fit(X_train_scaled, y_train)
lasso_best.fit(X_train_scaled, y_train)

ridge_pred = ridge_best.predict(X_test_scaled)
lasso_pred = lasso_best.predict(X_test_scaled)

ridge_r2 = r2_score(y_test, ridge_pred)
lasso_r2 = r2_score(y_test, lasso_pred)

ridge_mse = mean_squared_error(y_test, ridge_pred)
lasso_mse = mean_squared_error(y_test, lasso_pred)

In [12]:
print(f"Best Ridge Alpha: {best_ridge_alpha}")
print(f"Best Lasso Alpha: {best_lasso_alpha}")
print(f"Ridge R²: {ridge_r2}, MSE: {ridge_mse}")
print(f"Lasso R²: {lasso_r2}, MSE: {lasso_mse}")


Best Ridge Alpha: 10
Best Lasso Alpha: 10
Ridge R²: 0.6398841016702572, MSE: 4924650013.589562
Lasso R²: 0.6399057946784747, MSE: 4924353357.7803955
