In [5]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.metrics import mean_squared_error, r2_score

# 1) Load data
data = fetch_california_housing()
print("Columns:", data.feature_names)
print("Shape of X:", data.data.shape)
print("Shape of y:", data.target.shape)


Columns: ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']
Shape of X: (20640, 8)
Shape of y: (20640,)


In [6]:
X, y = data.data, data.target
print(X)


[[   8.3252       41.            6.98412698 ...    2.55555556
    37.88       -122.23      ]
 [   8.3014       21.            6.23813708 ...    2.10984183
    37.86       -122.22      ]
 [   7.2574       52.            8.28813559 ...    2.80225989
    37.85       -122.24      ]
 ...
 [   1.7          17.            5.20554273 ...    2.3256351
    39.43       -121.22      ]
 [   1.8672       18.            5.32951289 ...    2.12320917
    39.43       -121.32      ]
 [   2.3886       16.            5.25471698 ...    2.61698113
    39.37       -121.24      ]]


In [7]:
print(y)

[4.526 3.585 3.521 ... 0.923 0.847 0.894]


In [8]:

# 2) Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(X_train)


[[   3.2596       33.            5.0176565  ...    3.6918138
    32.71       -117.03      ]
 [   3.8125       49.            4.47354497 ...    1.73809524
    33.77       -118.16      ]
 [   4.1563        4.            5.64583333 ...    2.72321429
    34.66       -120.48      ]
 ...
 [   2.9344       36.            3.98671727 ...    3.33206831
    34.03       -118.38      ]
 [   5.7192       15.            6.39534884 ...    3.17889088
    37.58       -121.96      ]
 [   2.5755       52.            3.40257649 ...    2.10869565
    37.77       -122.42      ]]


In [None]:
print(X_test)


In [None]:
print(y_train)


In [None]:
print(y_test)

In [9]:


# 3) Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
print(X_train_scaled)


[[-0.326196    0.34849025 -0.17491646 ...  0.05137609 -1.3728112
   1.27258656]
 [-0.03584338  1.61811813 -0.40283542 ... -0.11736222 -0.87669601
   0.70916212]
 [ 0.14470145 -1.95271028  0.08821601 ... -0.03227969 -0.46014647
  -0.44760309]
 ...
 [-0.49697313  0.58654547 -0.60675918 ...  0.02030568 -0.75500738
   0.59946887]
 [ 0.96545045 -1.07984112  0.40217517 ...  0.00707608  0.90651045
  -1.18553953]
 [-0.68544764  1.85617335 -0.85144571 ... -0.08535429  0.99543676
  -1.41489815]]


In [None]:
X_test_scaled = scaler.transform(X_test)
print(X_test_scaled)


In [10]:

# 4) Models
models = {
    "Linear": LinearRegression(),
    "Ridge": Ridge(alpha=1.0),
    "Lasso": Lasso(alpha=0.1),
    "ElasticNet": ElasticNet(alpha=0.1, l1_ratio=0.5)
}

# 5) Train and evaluate
for name, model in models.items():
    print(name)
    print(model)
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    print(y_pred)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print(f"{name} -> MSE: {mse:.3f}, R2: {r2:.3f}")


Linear
LinearRegression()
[0.71912284 1.76401657 2.70965883 ... 4.46877017 1.18751119 2.00940251]
Linear -> MSE: 0.556, R2: 0.576
Ridge
Ridge()
[0.71947224 1.76384666 2.709309   ... 4.46847645 1.18797174 2.00922052]
Ridge -> MSE: 0.556, R2: 0.576
Lasso
Lasso(alpha=0.1)
[1.21842882 1.58283222 2.10845347 ... 4.02860631 1.71943859 1.85888548]
Lasso -> MSE: 0.680, R2: 0.481
ElasticNet
ElasticNet(alpha=0.1)
[1.1358095  1.61451454 2.18905062 ... 4.06462223 1.66084332 1.85879854]
ElasticNet -> MSE: 0.636, R2: 0.515
