# Linear, Lasso, Ridge Regression for Housing Data

## Boston Housing Dataset

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

### Load Dataset

In [None]:
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
target = raw_df.values[1::2, 2]

X = data
y = target

### Split and Scale Data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

### Linear Regression

In [None]:
lr = LinearRegression()
lr.fit(X_train_scaled, y_train)
y_pred = lr.predict(X_test_scaled)
print("Boston - Linear Regression:")
print(f"MSE: {mean_squared_error(y_test, y_pred):.2f}, R2: {r2_score(y_test, y_pred):.2f}")

### Lasso Regression

In [None]:
lasso = Lasso(alpha=0.1)
lasso.fit(X_train_scaled, y_train)
y_pred = lasso.predict(X_test_scaled)
print("\nBoston - Lasso Regression:")
print(f"MSE: {mean_squared_error(y_test, y_pred):.2f}, R2: {r2_score(y_test, y_pred):.2f}")
print("Coefficients:", lasso.coef_)

### Ridge Regression

In [None]:
ridge = Ridge(alpha=1.0)
ridge.fit(X_train_scaled, y_train)
y_pred = ridge.predict(X_test_scaled)
print("\nBoston - Ridge Regression:")
print(f"MSE: {mean_squared_error(y_test, y_pred):.2f}, R2: {r2_score(y_test, y_pred):.2f}")
print("Coefficients:", ridge.coef_)

## OpenML Housing Dataset

In [None]:
from sklearn.datasets import fetch_openml
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

### Load Dataset

In [None]:
housing = fetch_openml(name="house_prices", as_frame=True, parser='auto')
X = housing.data
y = housing.target

### Split and Scale Data

In [None]:
numeric_features = X.select_dtypes(include=['int64', 'float64']).columns
categorical_features = X.select_dtypes(include=['object']).columns

numeric_transformer = Pipeline(steps=[('scaler', StandardScaler())])

categorical_transformer = Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(transformers=[('num', numeric_transformer, numeric_features),('cat', categorical_transformer, categorical_features)])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Linear Regression


In [None]:
lr_pipe = Pipeline(steps=[('preprocessor', preprocessor),('regressor', LinearRegression())])
lr_pipe.fit(X_train, y_train)
y_pred = lr_pipe.predict(X_test)
print("\nOpenML House Prices - Linear Regression:")
print(f"MSE: {mean_squared_error(y_test, y_pred):.2f}, R2: {r2_score(y_test, y_pred):.2f}")

### Lasso Regression

In [None]:
lasso_pipe = Pipeline(steps=[('preprocessor', preprocessor),('regressor', Lasso(alpha=0.1))])
lasso_pipe.fit(X_train, y_train)
y_pred = lasso_pipe.predict(X_test)
print("\nOpenML House Prices - Lasso Regression:")
print(f"MSE: {mean_squared_error(y_test, y_pred):.2f}, R2: {r2_score(y_test, y_pred):.2f}")

### Ridge Regression

In [None]:
ridge_pipe = Pipeline(steps=[('preprocessor', preprocessor), ('regressor', Ridge(alpha=1.0))])
ridge_pipe.fit(X_train, y_train)
y_pred = ridge_pipe.predict(X_test)
print("\nOpenML House Prices - Ridge Regression:")
print(f"MSE: {mean_squared_error(y_test, y_pred):.2f}, R2: {r2_score(y_test, y_pred):.2f}")

## California Housing Dataset

### Load Dataset

In [None]:
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
X = housing.data
y = housing.targe

### Split and Scale Data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

### Linear Regression

In [None]:
lr = LinearRegression()
lr.fit(X_train_scaled, y_train)
y_pred = lr.predict(X_test_scaled)
print("\nCalifornia Housing - Linear Regression:")
print(f"MSE: {mean_squared_error(y_test, y_pred):.2f}, R2: {r2_score(y_test, y_pred):.2f}")

### Lasso Regression

In [None]:
lasso = Lasso(alpha=0.1)
lasso.fit(X_train_scaled, y_train)
y_pred = lasso.predict(X_test_scaled)
print("\nCalifornia Housing - Lasso Regression:")
print(f"MSE: {mean_squared_error(y_test, y_pred):.2f}, R2: {r2_score(y_test, y_pred):.2f}")
print("Coefficients:", lasso.coef_)

### Ridge Regression

In [None]:
ridge = Ridge(alpha=1.0)
ridge.fit(X_train_scaled, y_train)
y_pred = ridge.predict(X_test_scaled)
print("\nCalifornia Housing - Ridge Regression:")
print(f"MSE: {mean_squared_error(y_test, y_pred):.2f}, R2: {r2_score(y_test, y_pred):.2f}")
print("Coefficients:", ridge.coef_)