In [1]:
import pandas as pd
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from rdatasets import data as rdata

In [2]:
# Load the dataset
boston = rdata("MASS", "Boston")

In [3]:
# Inspect data
print(boston.head())
print(boston.info())
print(boston.describe())

   rownames     crim    zn  indus  chas    nox     rm   age     dis  rad  tax  \
0         1  0.00632  18.0   2.31     0  0.538  6.575  65.2  4.0900    1  296   
1         2  0.02731   0.0   7.07     0  0.469  6.421  78.9  4.9671    2  242   
2         3  0.02729   0.0   7.07     0  0.469  7.185  61.1  4.9671    2  242   
3         4  0.03237   0.0   2.18     0  0.458  6.998  45.8  6.0622    3  222   
4         5  0.06905   0.0   2.18     0  0.458  7.147  54.2  6.0622    3  222   

   ptratio   black  lstat  medv  
0     15.3  396.90   4.98  24.0  
1     17.8  396.90   9.14  21.6  
2     17.8  392.83   4.03  34.7  
3     18.7  394.63   2.94  33.4  
4     18.7  396.90   5.33  36.2  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Data columns (total 15 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   rownames  506 non-null    int64  
 1   crim      506 non-null    float64
 2   zn        506 non-null    float64
 3   indu

In [4]:
# Define response and predictor variables
Y = boston['medv']  # Response variable
X = boston.drop(columns=['medv'])  # All other predictors
X = sm.add_constant(X)  # Add intercept

# Fit the full model
full_model = sm.OLS(Y, X).fit()

# Print AIC and BIC
print(f"Full Model AIC: {full_model.aic:.3f}")
print(f"Full Model BIC: {full_model.bic:.3f}")

Full Model AIC: 3026.090
Full Model BIC: 3089.488


In [10]:
# Subset models
X1 = sm.add_constant(boston[['rm', 'lstat']])  # Model with 'rm' and 'lstat'
X2 = sm.add_constant(boston[['rm', 'lstat', 'crim']])  # Add 'crim'

# Fit models
model1 = sm.OLS(Y, X1).fit()
model2 = sm.OLS(Y, X2).fit()

# Compare AIC and BIC
print(f"Model 1 AIC: {model1.aic:.3f}, BIC: {model1.bic:.3f}")
print(f"Model 2 AIC: {model2.aic:.3f}, BIC: {model2.bic:.3f}")
print(f"Full Model AIC: {full_model.aic:.3f}, BIC: {full_model.bic:.3f}")

Model 1 AIC: 3171.542, BIC: 3184.222
Model 2 AIC: 3163.232, BIC: 3180.138
Full Model AIC: 3026.090, BIC: 3089.488


In [9]:
boston

Unnamed: 0,rownames,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
0,1,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296,15.3,396.90,4.98,24.0
1,2,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.90,9.14,21.6
2,3,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,4,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,5,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.90,5.33,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,502,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273,21.0,391.99,9.67,22.4
502,503,0.04527,0.0,11.93,0,0.573,6.120,76.7,2.2875,1,273,21.0,396.90,9.08,20.6
503,504,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273,21.0,396.90,5.64,23.9
504,505,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21.0,393.45,6.48,22.0
