# Objective

To review model selection and model assessment in a classification context


## Preliminaries

In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns


from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import f1_score

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

**Model selection** involves estimating $E_{val}$ for a bouqet of models in order to arrive at a final model

**Model assessment** involves estimating $E_{test}$ for the *final model* before it is rolled into production

Model selection requires a validation set

Model assessment required a test set

# Example: **Airplane accidents**

## Data

In [2]:
airplanes_df = pd.read_csv("/content/drive/MyDrive/AI-ML/supervised-learning-revision/Day2/data/airplane-accidents.csv")

In [3]:
airplanes_df.shape

(10000, 12)

In [4]:
airplanes_df.head()

Unnamed: 0,Severity,Safety_Score,Days_Since_Inspection,Total_Safety_Complaints,Control_Metric,Turbulence_In_gforces,Cabin_Temperature,Accident_Type_Code,Max_Elevation,Violations,Adverse_Weather_Metric,Accident_ID
0,Minor_Damage_And_Injuries,49.223744,14,22,71.285324,0.272118,78.04,2,31335.476824,3,0.424352,7570
1,Minor_Damage_And_Injuries,62.465753,10,27,72.288058,0.423939,84.54,2,26024.711057,2,0.35235,12128
2,Significant_Damage_And_Fatalities,63.059361,13,16,66.362808,0.322604,78.86,7,39269.053927,3,0.003364,2181
3,Significant_Damage_And_Serious_Injuries,48.082192,11,9,74.703737,0.337029,81.79,3,42771.4992,1,0.211728,5946
4,Significant_Damage_And_Fatalities,26.484018,13,25,47.948952,0.54114,77.16,3,35509.228515,2,0.176883,9054


In [5]:
airplanes_X, airplanes_y = (airplanes_df.drop(["Severity", "Accident_ID"], axis=1),
                            airplanes_df.Severity)

In [6]:
airplanes_X_train, airplanes_Xtest, airplanes_y_train, airplanes_ytest = train_test_split(airplanes_X,
                                                                                          airplanes_y,
                                                                                          test_size=0.2,
                                                                                          random_state=20130810)

We will not touch testing data till the very end.

## Model 1: **Softmax**

In [7]:
airplanes_Xtrain, airplanes_Xvalid, airplanes_ytrain, airplanes_yvalid = train_test_split(airplanes_X_train,
                                                                                          airplanes_y_train,
                                                                                          test_size=0.2,
                                                                                          random_state=20130810)

### Tuning *C*


In [8]:
sc = StandardScaler()

In [9]:
airplanes_scaledXtrain = sc.fit_transform(airplanes_Xtrain)

airplanes_scaledXvalid = sc.transform(airplanes_Xvalid)

In [10]:
C_values = [0.001, 0.1, 1, 5, 10]

In [11]:
training_accuracy, validation_accuracy = [], []
training_f1, validation_f1 = [], []

In [12]:
%%time

for c in C_values:
  learner_softmax = LogisticRegression(C=c,
                                       multi_class='multinomial')
  learner_softmax.fit(airplanes_scaledXtrain, 
                      airplanes_ytrain)
  
  training_accuracy.append(learner_softmax.score(airplanes_scaledXtrain,
                                                 airplanes_ytrain))
  
  validation_accuracy.append(learner_softmax.score(airplanes_scaledXvalid,
                                                   airplanes_yvalid))
  
  training_f1.append(f1_score(airplanes_ytrain,
                              learner_softmax.predict(airplanes_scaledXtrain),
                              average='macro'))
  
  validation_f1.append(f1_score(airplanes_yvalid,
                                learner_softmax.predict(airplanes_scaledXvalid),
                                average='macro'))

CPU times: user 636 ms, sys: 0 ns, total: 636 ms
Wall time: 657 ms


In [13]:
pd.DataFrame({'C': C_values,
              'training_accuracy': training_accuracy,
              'validation_accuracy': validation_accuracy,
              'training_f1': training_f1,
              'validation_f1': validation_f1})

Unnamed: 0,C,training_accuracy,validation_accuracy,training_f1,validation_f1
0,0.001,0.505469,0.49625,0.43713,0.423211
1,0.1,0.640469,0.6275,0.595043,0.574621
2,1.0,0.644687,0.63125,0.599743,0.579201
3,5.0,0.644844,0.63125,0.599886,0.579116
4,10.0,0.644844,0.63125,0.599886,0.579116


The best model has $C=1$

## Model 2: **KNN**

In [14]:
airplanes_Xtrain, airplanes_Xvalid, airplanes_ytrain, airplanes_yvalid = train_test_split(airplanes_X_train,
                                                                                          airplanes_y_train,
                                                                                          test_size=0.2,
                                                                                          random_state=20130810)

In [15]:
sc = StandardScaler()

In [16]:
airplanes_scaledXtrain = sc.fit_transform(airplanes_Xtrain)

airplanes_scaledXvalid = sc.transform(airplanes_Xvalid)

### Tuning *$K$*

In [17]:
k_values = [2, 3, 4, 5, 6, 7, 8, 9, 10]

training_accuracy, validation_accuracy = [], []
training_f1, validation_f1 = [], []

In [18]:
%%time

for k in k_values:
  learner_knn = KNeighborsClassifier(n_neighbors=k)
  learner_knn.fit(airplanes_scaledXtrain, airplanes_ytrain)

  training_accuracy.append(learner_knn.score(airplanes_scaledXtrain,
                                             airplanes_ytrain))

  validation_accuracy.append(learner_knn.score(airplanes_scaledXvalid, 
                                               airplanes_yvalid))
  
  training_f1.append(f1_score(airplanes_yvalid,
                              learner_knn.predict(airplanes_scaledXvalid),
                              average='macro'))
  
  validation_f1.append(f1_score(airplanes_yvalid,
                                learner_knn.predict(airplanes_scaledXvalid),
                                average='macro')) 

CPU times: user 9.93 s, sys: 36.5 ms, total: 9.96 s
Wall time: 9.91 s


In [19]:
pd.DataFrame({'k': k_values,
              'training_accuracy': training_accuracy,
              'validation_accuracy': validation_accuracy,
              'training_f1': training_f1,
              'validation_f1': validation_f1})

Unnamed: 0,k,training_accuracy,validation_accuracy,training_f1,validation_f1
0,2,0.848437,0.64875,0.633018,0.633018
1,3,0.8475,0.67875,0.676753,0.676753
2,4,0.825,0.7,0.695395,0.695395
3,5,0.80375,0.686875,0.683171,0.683171
4,6,0.798281,0.705,0.698091,0.698091
5,7,0.786563,0.6925,0.687447,0.687447
6,8,0.781563,0.70625,0.699183,0.699183
7,9,0.772344,0.69625,0.692693,0.692693
8,10,0.770469,0.691875,0.684567,0.684567


The best model has $k=8$.

## Model 3: **Naive Bayes**

In [20]:
airplanes_Xtrain, airplanes_Xvalid, airplanes_ytrain, airplanes_yvalid = train_test_split(airplanes_X_train,
                                                                                          airplanes_y_train,
                                                                                          test_size=0.2,
                                                                                          random_state=20130810)

In [21]:
sc = StandardScaler()

In [22]:
airplanes_scaledXtrain = sc.fit_transform(airplanes_Xtrain)

airplanes_scaledXvalid = sc.transform(airplanes_Xvalid)

In [23]:
learner_multinomialnb = GaussianNB()

In [24]:
learner_multinomialnb.fit(airplanes_scaledXtrain,
                          airplanes_ytrain)

GaussianNB(priors=None, var_smoothing=1e-09)

Validation accuracy

In [25]:
learner_multinomialnb.score(airplanes_scaledXvalid,
                            airplanes_yvalid)

0.458125

Validation F1

In [26]:
f1_score(airplanes_yvalid,
         learner_multinomialnb.predict(airplanes_scaledXvalid),
         average='macro')

0.43115730298558425

## Comparing between best learners within each algorithm

In [27]:
sc = StandardScaler()

Go back to the original train data

In [28]:
airplanes_scaledXtrain = sc.fit_transform(airplanes_X_train)

Usually a cross validation approach is computationally more approachable at this stage.

In [29]:
learner_best_softmax = LogisticRegression(C=1, multi_class='multinomial')
learner_best_knn = KNeighborsClassifier(n_neighbors=8)
learner_best_gaussiannb = GaussianNB()

In [30]:
cross_val_score(learner_best_softmax,
                airplanes_scaledXtrain,
                y=airplanes_y_train,
                cv=10,
                scoring='f1_macro')

array([0.58706611, 0.57749395, 0.59441124, 0.60192975, 0.57857361,
       0.63338168, 0.57958805, 0.62135056, 0.58784564, 0.56713612])

In [31]:
cross_val_score(learner_best_knn,
                airplanes_scaledXtrain,
                y=airplanes_y_train,
                cv=10,
                scoring='f1_macro')

array([0.69426131, 0.67489772, 0.69893028, 0.68736816, 0.71431292,
       0.72674767, 0.68706051, 0.70839449, 0.69800033, 0.71457742])

In [32]:
cross_val_score(learner_best_gaussiannb,
                airplanes_scaledXtrain,
                y=airplanes_y_train,
                cv=10,
                scoring='f1_macro')

array([0.4239052 , 0.43661353, 0.41520808, 0.45365884, 0.42886841,
       0.47908403, 0.43798537, 0.47997675, 0.41757438, 0.42611304])

## Final model

In [33]:
sc = StandardScaler()

airplanes_scaledXtrain = sc.fit_transform(airplanes_X_train)
airplanes_scaledXtest = sc.transform(airplanes_Xtest)

In [34]:
learner_best_knn.fit(airplanes_scaledXtrain,
                     airplanes_y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=8, p=2,
                     weights='uniform')

In [35]:
f1_score(airplanes_ytest,
         learner_best_knn.predict(airplanes_scaledXtest),
         average='macro')

0.6916705981893833