## Assignment 6 — Ridge, Lasso & Polynomial Regression Comparison

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_squared_error, r2_score

import warnings
warnings.filterwarnings("ignore")


In [2]:
df = pd.read_csv("E:/Engineering/Machine Learning/ADS-VAC/notebooks/datasets/housing.csv")
df.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,7129300520,20141013T000000,221900,3,1.0,1180,5650,1.0,0,0,...,7,1180,0,1955,0,98178,47.5112,-122.257,1340,5650
1,6414100192,20141209T000000,538000,3,2.25,2570,7242,2.0,0,0,...,7,2170,400,1951,1991,98125,47.721,-122.319,1690,7639
2,5631500400,20150225T000000,180000,2,1.0,770,10000,1.0,0,0,...,6,770,0,1933,0,98028,47.7379,-122.233,2720,8062
3,2487200875,20141209T000000,604000,4,3.0,1960,5000,1.0,0,0,...,7,1050,910,1965,0,98136,47.5208,-122.393,1360,5000
4,1954400510,20150218T000000,510000,3,2.0,1680,8080,1.0,0,0,...,8,1680,0,1987,0,98074,47.6168,-122.045,1800,7503


### Preprocess Data

Remove id and date.
Zipcode → One-hot encoding.

In [3]:
df = df.drop(["id", "date"], axis=1)

df = pd.get_dummies(df, columns=["zipcode"], drop_first=True)

X = df.drop("price", axis=1)
y = df["price"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


### RMSE & R² Function

In [4]:
def evaluate(model, X_test, y_test):
    preds = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, preds))
    r2 = r2_score(y_test, preds)
    return rmse, r2


### Ridge Regression

In [5]:
ridge = Ridge(alpha=1.0)
ridge.fit(X_train_scaled, y_train)

ridge_rmse, ridge_r2 = evaluate(ridge, X_test_scaled, y_test)
ridge_rmse, ridge_r2


(170911.74231136753, 0.8067769604365042)

### Lasso Regression

In [6]:
lasso = Lasso(alpha=0.001)   # small alpha = less aggressive shrinking
lasso.fit(X_train_scaled, y_train)

lasso_rmse, lasso_r2 = evaluate(lasso, X_test_scaled, y_test)
lasso_rmse, lasso_r2


(170911.74296128686, 0.806776958966981)

### Polynomial Regression (degree=2)

In [7]:
poly = PolynomialFeatures(degree=2, include_bias=False)
X_train_poly = poly.fit_transform(X_train_scaled)
X_test_poly = poly.transform(X_test_scaled)


Train Linear Regression on Polynomial Features

In [8]:
poly_reg = LinearRegression()
poly_reg.fit(X_train_poly, y_train)

poly_rmse, poly_r2 = evaluate(poly_reg, X_test_poly, y_test)
poly_rmse, poly_r2


(241043845175906.38, -3.8433257883997126e+17)

### Results

In [9]:
results = pd.DataFrame({
    "Model": ["Ridge", "Lasso", "Polynomial Regression (deg 2)"],
    "RMSE": [ridge_rmse, lasso_rmse, poly_rmse],
    "R² Score": [ridge_r2, lasso_r2, poly_r2]
})

results


Unnamed: 0,Model,RMSE,R² Score
0,Ridge,170911.7,0.806777
1,Lasso,170911.7,0.806777
2,Polynomial Regression (deg 2),241043800000000.0,-3.843326e+17


### Interpretation of Results

#### Ridge Regression

Penalizes large coefficients but keeps all variables

Typically performs best among penalized linear models

Good stability & generalization

#### Lasso Regression

Performs feature selection by shrinking some coefficients to zero

May underperform if features are highly correlated

Useful for reducing dimensionality

#### Polynomial Regression

Captures non-linear patterns

Degree 2 usually improves R²

But can cause overfitting → higher RMSE

#### ✔ What to Expect in Results

Ridge usually > Lasso

Polynomial may give high R² but sometimes worse RMSE if overfitting

Lasso removes weak predictors → may reduce accuracy

#### ✔ Example Insights

Housing prices have non-linear relationships (sqft_living², grade interactions)

Regularization is important due to many features (especially after one-hot encoding)