In [1]:
from sklearn.datasets import fetch_california_housing
import pandas as pd

data = fetch_california_housing()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target

print(df.head())
print(df.columns)


   MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
0  8.3252      41.0  6.984127   1.023810       322.0  2.555556     37.88   
1  8.3014      21.0  6.238137   0.971880      2401.0  2.109842     37.86   
2  7.2574      52.0  8.288136   1.073446       496.0  2.802260     37.85   
3  5.6431      52.0  5.817352   1.073059       558.0  2.547945     37.85   
4  3.8462      52.0  6.281853   1.081081       565.0  2.181467     37.85   

   Longitude  target  
0    -122.23   4.526  
1    -122.22   3.585  
2    -122.24   3.521  
3    -122.25   3.413  
4    -122.25   3.422  
Index(['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup',
       'Latitude', 'Longitude', 'target'],
      dtype='object')


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


In [3]:
X = df.drop('target', axis=1)  # all columns except target
y = df['target']               # only target column


In [8]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.4, random_state=42
)


In [None]:
model = LinearRegression()
model.fit(X_train, y_train)

print("Intercept:", model.intercept_) # intercept
print("Coefficients:", model.coef_)  # coefficients

for feature, coef in zip(X.columns, model.coef_):
    print(f"{feature} : {coef}")


equation = "y = " + str(model.intercept_)
for feature, coef in zip(X.columns, model.coef_):
    equation += f" + ({coef})*{feature}"
print(equation)



y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)



print("Mean Squared Error:", mse)
print("R2 Score:", r2)






Intercept: -37.0957939359066
Coefficients: [ 4.49244604e-01  9.50481954e-03 -1.28102920e-01  8.20334578e-01
 -1.29461684e-06 -3.21412657e-03 -4.18425852e-01 -4.33787875e-01]
MedInc : 0.4492446035012598
HouseAge : 0.009504819540916566
AveRooms : -0.12810292031600842
AveBedrms : 0.8203345777220473
Population : -1.2946168386241372e-06
AveOccup : -0.0032141265676856262
Latitude : -0.41842585205860156
Longitude : -0.4337878754343774
y = -37.0957939359066 + (0.4492446035012598)*MedInc + (0.009504819540916566)*HouseAge + (-0.12810292031600842)*AveRooms + (0.8203345777220473)*AveBedrms + (-1.2946168386241372e-06)*Population + (-0.0032141265676856262)*AveOccup + (-0.41842585205860156)*Latitude + (-0.4337878754343774)*Longitude
Mean Squared Error: 0.5435945247974563
R2 Score: 0.5939400468776985


In [19]:
from sklearn.linear_model import Ridge
# ✅ Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ✅ Create and train Ridge Regression model
ridge_model = Ridge(alpha=10.0)  # alpha is the regularization strength
ridge_model.fit(X_train, y_train)

# ✅ Make predictions
y_pred = ridge_model.predict(X_test)

# ✅ Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("Mean Squared Error:", mse)
print("R2 Score:", r2)

# ✅ Print regression equation
equation = f"y = {ridge_model.intercept_:.4f}"
for feature, coef in zip(X.columns, ridge_model.coef_):
    equation += f" + ({coef:.4f})*{feature}"
print("\nRidge Regression Equation:")
print(equation)

Mean Squared Error: 0.5550405537343013
R2 Score: 0.5764371559180014

Ridge Regression Equation:
y = -36.9838 + (0.4471)*MedInc + (0.0097)*HouseAge + (-0.1203)*AveRooms + (0.7662)*AveBedrms + (-0.0000)*Population + (-0.0035)*AveOccup + (-0.4197)*Latitude + (-0.4334)*Longitude


In [20]:
from sklearn.linear_model import Lasso
# Create and train Lasso Regression model
lasso_model = Lasso(alpha=0.034222561573497685)  # alpha = regularization strength
lasso_model.fit(X_train, y_train)

# Make predictions
y_pred = lasso_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)

r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)

print("R2 Score:", r2)

# Print regression equation
equation = f"y = {lasso_model.intercept_:.4f}"
for feature, coef in zip(X.columns, lasso_model.coef_):
    equation += f" + ({coef:.4f})*{feature}"
print("\nLasso Regression Equation:")
print(equation)

Mean Squared Error: 0.5555752649052168
R2 Score: 0.5760291068434038

Lasso Regression Equation:
y = -27.5008 + (0.3843)*MedInc + (0.0115)*HouseAge + (0.0021)*AveRooms + (0.0000)*AveBedrms + (0.0000)*Population + (-0.0032)*AveOccup + (-0.3310)*Latitude + (-0.3307)*Longitude


In [None]:
# Lasso with cross-validation to find best alpha
from sklearn.linear_model import LassoCV
from sklearn.linear_model import RidgeCV
lasso_cv = LassoCV(alphas=None, cv=5, max_iter=10000, random_state=42)
lasso_cv.fit(X_train, y_train)

# Ridge with cross-validation
ridge_alphas = [0.1, 1.0, 10.0, 50.0, 100.0]  # specify alpha values
ridge_cv = RidgeCV(alphas=ridge_alphas, cv=5)
ridge_cv.fit(X_train, y_train)
print("Best alpha for Ridge:", ridge_cv.alpha_)

print("Best alpha found for LASSO :", lasso_cv.alpha_)



Best alpha for Ridge: 10.0
Best alpha found: 0.034222561573497685
