In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the California Housing dataset
cal_housing = fetch_california_housing()

# Convert the dataset into a pandas DataFrame
df = pd.DataFrame(cal_housing.data, columns=cal_housing.feature_names)
df['target'] = cal_housing.target

# Handle missing values
print(df.isnull().sum())  # Check for missing values

# Perform necessary feature scaling (e.g., standardization)
scaler = StandardScaler()
df[['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']] = scaler.fit_transform(df[['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']])

# Split the dataset into training and testing sets
X = df.drop('target', axis=1)
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Regression Algorithm Implementation

## Linear Regression
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
lr_y_pred = lr_model.predict(X_test)

## Decision Tree Regressor
dt_model = DecisionTreeRegressor()
dt_model.fit(X_train, y_train)
dt_y_pred = dt_model.predict(X_test)

## Random Forest Regressor
rf_model = RandomForestRegressor()
rf_model.fit(X_train, y_train)
rf_y_pred = rf_model.predict(X_test)

## Gradient Boosting Regressor
gb_model = GradientBoostingRegressor()
gb_model.fit(X_train, y_train)
gb_y_pred = gb_model.predict(X_test)

## Support Vector Regressor (SVR)
svr_model = SVR()
svr_model.fit(X_train, y_train)
svr_y_pred = svr_model.predict(X_test)

# Model Evaluation and Comparison

## Mean Squared Error (MSE)
lr_mse = mean_squared_error(y_test, lr_y_pred)
dt_mse = mean_squared_error(y_test, dt_y_pred)
rf_mse = mean_squared_error(y_test, rf_y_pred)
gb_mse = mean_squared_error(y_test, gb_y_pred)
svr_mse = mean_squared_error(y_test, svr_y_pred)

## Mean Absolute Error (MAE)
lr_mae = mean_absolute_error(y_test, lr_y_pred)
dt_mae = mean_absolute_error(y_test, dt_y_pred)
rf_mae = mean_absolute_error(y_test, rf_y_pred)
gb_mae = mean_absolute_error(y_test, gb_y_pred)
svr_mae = mean_absolute_error(y_test, svr_y_pred)

## R-squared Score (R²)
lr_r2 = r2_score(y_test, lr_y_pred)
dt_r2 = r2_score(y_test, dt_y_pred)
rf_r2 = r2_score(y_test, rf_y_pred)
gb_r2 = r2_score(y_test, gb_y_pred)
svr_r2 = r2_score(y_test, svr_y_pred)

# Compare the results of all models
print("Mean Squared Error (MSE):")
print(f"Linear Regression: {lr_mse}")
print(f"Decision Tree Regressor: {dt_mse}")
print(f"Random Forest Regressor: {rf_mse}")
print(f"Gradient Boosting Regressor: {gb_mse}")
print(f"Support Vector Regressor (SVR): {svr_mse}")

print("\nMean Absolute Error (MAE):")
print(f"Linear Regression: {lr_mae}")
print(f"Decision Tree Regressor: {dt_mae}")
print(f"Random Forest Regressor: {rf_mae}")
print(f"Gradient Boosting Regressor: {gb_mae}")
print(f"Support Vector Regressor (SVR): {svr_mae}")

print("\nR-squared Score (R²):")
print(f"Linear Regression: {lr_r2}")
print(f"Decision Tree Regressor: {dt_r2}")
print(f"Random Forest Regressor: {rf_r2}")
print(f"Gradient Boosting Regressor: {gb_r2}")
print(f"Support Vector Regressor (SVR): {svr_r2}")

MedInc        0
HouseAge      0
AveRooms      0
AveBedrms     0
Population    0
AveOccup      0
Latitude      0
Longitude     0
target        0
dtype: int64
Mean Squared Error (MSE):
Linear Regression: 0.5558915986952442
Decision Tree Regressor: 0.4940733357986192
Random Forest Regressor: 0.2540398984746482
Gradient Boosting Regressor: 0.2940804571354899
Support Vector Regressor (SVR): 0.35519846199894217

Mean Absolute Error (MAE):
Linear Regression: 0.5332001304956566
Decision Tree Regressor: 0.4506069937015504
Random Forest Regressor: 0.3269714810077521
Gradient Boosting Regressor: 0.3717234163505605
Support Vector Regressor (SVR): 0.39776309634378626

R-squared Score (R²):
Linear Regression: 0.575787706032451
Decision Tree Regressor: 0.622962491861231
Random Forest Regressor: 0.806136936870142
Gradient Boosting Regressor: 0.7755811643398038
Support Vector Regressor (SVR): 0.7289407597956459
