In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.preprocessing import PolynomialFeatures, StandardScaler, MinMaxScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import r2_score
#Binary: GaussianNB, Multi: MultinomialNB

ML C64 - Model Selection session 2 (Python demo)
The session is divided into the following parts:

 

1. Introduction and Context Setting - 

2. Data Acquisition and Understanding -

3. Data Preprocessing - 

4. Exploratory Data Analysis (EDA) - 

5. Feature Selection and Dimensionality Reduction -

6. Model Selection - 

7. Model Evaluation and Tuning - 

8. Final Model Selection and Validation - 

9. Deployment Strategy - 

10. Summary, Q&A, and Closing Remarks

In [2]:
df=pd.read_csv("model_selection_data.csv")

In [3]:
df.head(2)

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,Target
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6


# The Boston Housing Dataset 
# The Boston Housing Dataset is a derived from information collected by the U.S. Census Service concerning housing in the area of Boston MA. The following describes the dataset columns:
# CRIM - per capita crime rate by town 
# ZN - proportion of residential land zoned for lots over 25,000 sq.ft. 
# INDUS - proportion of non-retail business acres per town. 
# CHAS - Charles River dummy variable (1 if tract bounds river; 0 otherwise) 
# NOX - nitric oxides concentration (parts per 10 million) 
# RM - average number of rooms per dwelling 
# AGE - proportion of owner-occupied units built prior to 1940 
# DIS - weighted distances to five Boston employment centres 
# RAD - index of accessibility to radial highways 
# TAX - full-value property-tax rate per $10,000
# PTRATIO - pupil-teacher ratio by town 
# B - 1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town
# LSTAT - % lower status of the population
# Target - Median value of owner-occupied homes in $1000's

In [4]:
# Missing Value

In [5]:
df.isnull().mean()

CRIM       0.0
ZN         0.0
INDUS      0.0
CHAS       0.0
NOX        0.0
RM         0.0
AGE        0.0
DIS        0.0
RAD        0.0
TAX        0.0
PTRATIO    0.0
B          0.0
LSTAT      0.0
Target     0.0
dtype: float64

In [6]:
#EDA

In [7]:
# skipping this problem statement considering the majar focus 

In [8]:
X=df.drop(["Target"], axis=1)
y=df["Target"]

In [9]:
X_train, X_test, y_train, y_test=train_test_split(X,y, test_size=0.2,random_state=42)

In [10]:
X_train.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
477,15.0234,0.0,18.1,0.0,0.614,5.304,97.3,2.1007,24.0,666.0,20.2,349.48,24.91
15,0.62739,0.0,8.14,0.0,0.538,5.834,56.5,4.4986,4.0,307.0,21.0,395.62,8.47
332,0.03466,35.0,6.06,0.0,0.4379,6.031,23.3,6.6407,1.0,304.0,16.9,362.25,7.83
423,7.05042,0.0,18.1,0.0,0.614,6.103,85.1,2.0218,24.0,666.0,20.2,2.52,23.29
19,0.7258,0.0,8.14,0.0,0.538,5.727,69.5,3.7965,4.0,307.0,21.0,390.95,11.28


In [11]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(404, 13)
(102, 13)
(404,)
(102,)


In [12]:
#Base Model

In [13]:
#Data--.scalling-->polynomialfeature-->model
#y=m1x1^2+m2x1+m3x2^2

In [14]:
#Pipeline creation and fitting
linear_reg_pipeline=make_pipeline(StandardScaler(),PolynomialFeatures(degree=2),LinearRegression())
linear_reg_pipeline.fit(X_train, y_train)

In [15]:
# pridicatiion 
y_pred_linear_reg_train=linear_reg_pipeline.predict(X_train)
y_pred_linear_reg_test=linear_reg_pipeline.predict(X_test)

In [16]:
r2_linear_reg_train=r2_score(y_train, y_pred_linear_reg_train)
r2_linear_reg_test=r2_score(y_test, y_pred_linear_reg_test)

In [17]:
print("Training R-square:")
print(f"linear R-gression:{r2_linear_reg_train}")
print("\n testing R-square:")
print(f"linear R-gression:{r2_linear_reg_test}")


Training R-square:
linear R-gression:0.9409335229605889

 testing R-square:
linear R-gression:0.8056742630273065


In [18]:
#Riage Regression 

In [127]:
ridge_reg_pipeline=make_pipeline(StandardScaler(),PolynomialFeatures(degree=2),Ridge(alpha=.2))
ridge_reg_pipeline.fit(X_train, y_train)

In [133]:
# pridicatiion 
y_pred_ridge_reg_train=linear_reg_pipeline.predict(X_train)
y_pred_ridge_reg_test=linear_reg_pipeline.predict(X_test)

In [134]:
r2_linear_reg_train=r2_score(y_train, y_pred_ridge_reg_train)
r2_linear_reg_test=r2_score(y_test, y_pred_ridge_reg_test)

In [135]:
print("Training R-square:")
print(f"linear R-gression:{r2_linear_reg_train:.2f}")
print("\n testing R-square:")
print(f"linear R-gression:{r2_linear_reg_test:.2f}")


Training R-square:
linear R-gression:0.94

 testing R-square:
linear R-gression:0.81


In [23]:
# Mean MAX scaler

In [124]:
ridge_reg_pipeline=make_pipeline(MinMaxScaler(),PolynomialFeatures(degree=2),Ridge(alpha=.8))
ridge_reg_pipeline.fit(X_train, y_train)

In [125]:
# pridicatiion 
y_pred_ridge_reg_train=ridge_reg_pipeline.predict(X_train)
y_pred_ridge_reg_test=ridge_reg_pipeline.predict(X_test)

In [122]:
r2_linear_reg_train=r2_score(y_train, y_pred_ridge_reg_train)
r2_linear_reg_test=r2_score(y_test, y_pred_ridge_reg_test)

In [126]:
print("Training R-square:")
print(f"linear R-gression:{r2_linear_reg_train:.2f}")
print("\n testing R-square:")
print(f"linear R-gression:{r2_linear_reg_test:.2f}")


Training R-square:
linear R-gression:0.89

 testing R-square:
linear R-gression:0.84


In [28]:
#Lasso Regression

In [81]:
lasso_reg_pipeline=make_pipeline(StandardScaler(),PolynomialFeatures(degree=2),Lasso(alpha=.45))
lasso_reg_pipeline.fit(X_train, y_train)

In [116]:
lasso_reg_pipeline=make_pipeline(MinMaxScaler(),PolynomialFeatures(degree=2),Lasso(alpha=.021))
lasso_reg_pipeline.fit(X_train, y_train)

  model = cd_fast.enet_coordinate_descent(


In [117]:
# pridicatiion 
y_pred_lasso_reg_train=lasso_reg_pipeline.predict(X_train)
y_pred_lasso_reg_test=lasso_reg_pipeline.predict(X_test)

In [118]:
r2_lasso_reg_train=r2_score(y_train, y_pred_lasso_reg_train)
r2_lasso_reg_test=r2_score(y_test, y_pred_lasso_reg_test)


In [119]:
print("Training R-square:")
print(f"linear R-gression:{r2_lasso_reg_train:.2f}")
print("\n testing R-square:")
print(f"linear R-gression:{r2_lasso_reg_test:.2f}")


Training R-square:
linear R-gression:0.85

 testing R-square:
linear R-gression:0.82


In [114]:
# Model Selections:

- Linear Regression (BaseLine Model) :Train R2:94%, Test R2:81% (BaseLine Model) 13% rejected
- Ridge Model (alpha=85, Standard Scaler) :Train:89%, Test:82% -7 % rejected
- Ridge Model (alpha=250, Standard Scaler) :Train:84%, Test:78% -6 % rejected
- Ridge Model (alpha=0.2, MinMax Scaler) :Train:90%, Test:85% -5 % rejected
- Ridge Model (alpha=0.8, MinMax Scaler) :Train:87%, Test:84% -3 % selected
- Lasso Model (alpha=0.45, Standard Scaler):Train:83%, Test:78%-5 % rejected 
- Lasso Model (alpha=0.021, MinMax Scaler) :Train:85%, Test:82%-3 % rejected 