# California Housing – Baseline Linear Regression


In [2]:
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import Ridge


((16512, 8), (4128, 8))

In [2]:
X_train_scaled = np.load("../data/X_train_scaled.npy")
X_test_scaled  = np.load("../data/X_test_scaled.npy")
y_train = np.load("../data/y_train.npy")
y_test  = np.load("../data/y_test.npy")

X_train_scaled.shape, X_test_scaled.shape


((16512, 8), (4128, 8))

In [3]:
lr = LinearRegression()
lr.fit(X_train_scaled, y_train)


0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


In [4]:
y_pred = lr.predict(X_test_scaled)


In [5]:
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)

mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

rmse, mae, r2



(np.float64(0.7455813830127763), 0.5332001304956566, 0.575787706032451)

In [3]:
# Ridge Regression (alpha = 1.0)
ridge = Ridge(alpha=1.0)
ridge.fit(X_train_scaled, y_train)

y_pred_ridge = ridge.predict(X_test_scaled)

rmse_ridge = np.sqrt(mean_squared_error(y_test, y_pred_ridge))
mae_ridge  = mean_absolute_error(y_test, y_pred_ridge)
r2_ridge   = r2_score(y_test, y_pred_ridge)

rmse_ridge, mae_ridge, r2_ridge


(np.float64(0.745556744281478), 0.5331931195789734, 0.5758157428913682)

In [5]:
alphas = [0.01, 0.1, 1.0, 10.0, 100.0]

for a in alphas:
    ridge = Ridge(alpha=a)
    ridge.fit(X_train_scaled, y_train)

    y_pred = ridge.predict(X_test_scaled)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae  = mean_absolute_error(y_test, y_pred)
    r2   = r2_score(y_test, y_pred)

    print(f"Alpha = {a}")
    print(f"  RMSE: {rmse}")
    print(f"  MAE : {mae}")
    print(f"  R²  : {r2}")
    print("-" * 40)


Alpha = 0.01
  RMSE: 0.7455811358288168
  MAE : 0.5332000589081536
  R²  : 0.5757879873121596
----------------------------------------
Alpha = 0.1
  RMSE: 0.7455789118982767
  MAE : 0.5331994146968932
  R²  : 0.5757905180002312
----------------------------------------
Alpha = 1.0
  RMSE: 0.745556744281478
  MAE : 0.5331931195789734
  R²  : 0.5758157428913682
----------------------------------------
Alpha = 10.0
  RMSE: 0.7453421422218557
  MAE : 0.5331379811394743
  R²  : 0.576059903284837
----------------------------------------
Alpha = 100.0
  RMSE: 0.7438185949703784
  MAE : 0.5330142193095552
  R²  : 0.5777912763033604
----------------------------------------


In [6]:
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np


In [7]:
lasso = Lasso(alpha=0.01, max_iter=10000)
lasso.fit(X_train_scaled, y_train)

y_pred_lasso = lasso.predict(X_test_scaled)

rmse_lasso = np.sqrt(mean_squared_error(y_test, y_pred_lasso))
mae_lasso  = mean_absolute_error(y_test, y_pred_lasso)
r2_lasso   = r2_score(y_test, y_pred_lasso)

rmse_lasso, mae_lasso, r2_lasso


(np.float64(0.7404423656125414), 0.5353261423609051, 0.5816154300698727)

In [8]:
non_zero = np.sum(lasso.coef_ != 0)
total = lasso.coef_.shape[0]

print(f"Non-zero coefficients: {non_zero}/{total}")


Non-zero coefficients: 7/8


In [9]:
alphas = [0.001, 0.01, 0.1, 1.0]

for a in alphas:
    lasso = Lasso(alpha=a, max_iter=10000)
    lasso.fit(X_train_scaled, y_train)

    y_pred = lasso.predict(X_test_scaled)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae  = mean_absolute_error(y_test, y_pred)
    r2   = r2_score(y_test, y_pred)

    non_zero = np.sum(lasso.coef_ != 0)

    print(f"Alpha = {a}")
    print(f"  RMSE: {rmse}")
    print(f"  MAE : {mae}")
    print(f"  R²  : {r2}")
    print(f"  Non-zero features: {non_zero}")
    print("-" * 40)


Alpha = 0.001
  RMSE: 0.7446417662764214
  MAE : 0.5331447750392391
  R²  : 0.5768562568705682
  Non-zero features: 8
----------------------------------------
Alpha = 0.01
  RMSE: 0.7404423656125414
  MAE : 0.5353261423609051
  R²  : 0.5816154300698727
  Non-zero features: 7
----------------------------------------
Alpha = 0.1
  RMSE: 0.8243961598848472
  MAE : 0.6222011605619466
  R²  : 0.48136113250290735
  Non-zero features: 3
----------------------------------------
Alpha = 1.0
  RMSE: 1.1448563543099792
  MAE : 0.9060685490007149
  R²  : -0.00021908714592466794
  Non-zero features: 0
----------------------------------------
