In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import statsmodels.api as sm
from statsmodels.tools.eval_measures import aic, bic

In [5]:
# Load the Boston Housing dataset
#LOAD
df = pd.read_csv(r"C:\Users\ElavarasiChinnadurai\Downloads\boston.csv")

In [6]:
# Simple Linear Regression: MEDV ~ RM

X_simple = df[['RM']]
y = df['MEDV']
X_simple = sm.add_constant(X_simple)

model_simple = sm.OLS(y, X_simple).fit()
print(model_simple.summary())

                            OLS Regression Results                            
Dep. Variable:                   MEDV   R-squared:                       0.484
Model:                            OLS   Adj. R-squared:                  0.483
Method:                 Least Squares   F-statistic:                     471.8
Date:                Fri, 26 Sep 2025   Prob (F-statistic):           2.49e-74
Time:                        12:49:22   Log-Likelihood:                -1673.1
No. Observations:                 506   AIC:                             3350.
Df Residuals:                     504   BIC:                             3359.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        -34.6706      2.650    -13.084      0.0

In [8]:
#Multiple Linear Regression: MEDV ~ RM + LSTAT + CRIM
X_multi = df[['RM', 'LSTAT', 'CRIM']]
X_multi = sm.add_constant(X_multi)

model_multi = sm.OLS(y, X_multi).fit()
print(model_multi.summary())

                            OLS Regression Results                            
Dep. Variable:                   MEDV   R-squared:                       0.646
Model:                            OLS   Adj. R-squared:                  0.644
Method:                 Least Squares   F-statistic:                     305.2
Date:                Fri, 26 Sep 2025   Prob (F-statistic):          1.01e-112
Time:                        12:50:20   Log-Likelihood:                -1577.6
No. Observations:                 506   AIC:                             3163.
Df Residuals:                     502   BIC:                             3180.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -2.5623      3.166     -0.809      0.4

In [9]:
#MSE, RMSE, R², Adjusted R²

#Predictions
y_pred_simple = model_simple.predict(X_simple)
y_pred_multi = model_multi.predict(X_multi)

#Metrics
mse_simple = mean_squared_error(y, y_pred_simple)
rmse_simple = mse_simple ** 0.5

mse_multi = mean_squared_error(y, y_pred_multi)
rmse_multi = mse_multi ** 0.5

print("Simple Model: MSE =", mse_simple, "RMSE =", rmse_simple)
print("Multiple Model: MSE =", mse_multi, "RMSE =", rmse_multi)

Simple Model: MSE = 43.60055177116956 RMSE = 6.603071389222561
Multiple Model: MSE = 29.8970126193923 RMSE = 5.46781607402739


In [10]:
#5-Fold Cross-Validation (for Multiple Model)
X = df[['RM', 'LSTAT', 'CRIM']]
y = df['MEDV']

lr = LinearRegression()
scores = cross_val_score(lr, X, y, scoring='r2', cv=5)

print("5-Fold Cross-Validation R² Scores:", scores)
print("Average R²:", scores.mean())

5-Fold Cross-Validation R² Scores: [ 0.65218907  0.69317906  0.41882483  0.17386096 -0.27841169]
Average R²: 0.33192844374030106


In [11]:
#Compare AIC & BIC
print("Simple Model AIC:", model_simple.aic, "BIC:", model_simple.bic)
print("Multiple Model AIC:", model_multi.aic, "BIC:", model_multi.bic)

Simple Model AIC: 3350.151117225073 BIC: 3358.604190563648
Multiple Model AIC: 3163.231628486269 BIC: 3180.137775163419
