# **Classification Metrics: Tools (Aside from Accuracy) for Assessing ML Models**




In [None]:
# Import the following packages
import pandas as pd
import numpy as np

# Numpy random seed for consistency
np.set_printoptions(precision=4, suppress=True)
np.random.seed(123) #use this random "seed" so that we can all get the same synthetic data!

# To model normal distribution
from scipy.stats import norm

# To make data
from sklearn.datasets import make_blobs

## Let's begin by making synthetic data with 2 features (to be used for classification as a 1 or 0)

In [None]:
#Make data set with 3000 observations
n = 3000

In [None]:
centers = [[9.5, 0], [10.5, 0]] # Define the coordinates to center our blobs (x,y)
X, y = make_blobs(n_samples=n, centers=centers, cluster_std=0.4, random_state=7)
data = pd.DataFrame(X, columns=['feature1','feature2']) # Rename the feature columns (like x and y; coordinates to be used to classify points as 0 or 1)
data['target'] = y.astype('str') # Convert dtype to help w/ viz

data.head() #view the first few rows 

Unnamed: 0,feature1,feature2,target
0,9.607937,-0.002729,0
1,10.229965,0.054297,1
2,10.269156,0.044818,0
3,10.418941,-0.042682,1
4,9.422457,0.278918,0


In [None]:
#View the shape of our synthetic data, and the frequencies of each class (Hint: value_counts())
print('Shape:', data.shape, '\n')
print('Class Frequencies:')
print(data.target.value_counts(normalize=True))

Shape: (3000, 3) 

Class Frequencies:
1    0.5
0    0.5
Name: target, dtype: float64


As you can see, the "class frequencies" of 0 and 1 observations depict a 50-50 split, meaning that half of our data is 1's and half of our data is 0's

**Below is a pre-made classifier (common classifiers we have/may learn are regression, Decision Trees, K Nearest Neighbors, etc.). This classifier will make the predictions of 0's and 1's based upon training and testing data **

In [None]:
class BoundaryClassifier():
    def __init__(self):
        from scipy.stats import norm
        self.name = 'Classify observations on 1D boundary'
    
    def fit(self, X_train, y_train, x_boundary=None):
        self.boundary = x_boundary
        
    def predict(self, X_test):
        b = self.boundary
        x = X_test.feature1
        y_pred =  (x > b).astype(np.int) #boundary, b, a threshold we can use to determine if observation is a 0 or a 1
        return y_pred
    
    def predict_proba(self, X_test): #the predicted probability
        b = self.boundary
        x = X_test.feature1
        
        # Use the normal distribution to model probabilities
        y_pred_proba = ((x-b)/0.4).apply(norm.cdf)
        return y_pred_proba

**1. As learned, split your data into training and testing data**

In [None]:
from sklearn.model_selection import train_test_split

X = data.drop(columns=['target'])
y = data.target.astype('int')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

**2.Employ the Classifier "BoundaryClassifier()" to fit the model to the data and predict the 0 and 1 classes. Hint: an extra input is needed in clf.fit(), called x_boundary. Set this boundary/threshold=10, which is the threshold we can use to determine if a point is a 0 or 1 (threshold determined for this specific synthetic dataset)**

In [None]:
clf = BoundaryClassifier() # Create the model
clf.fit(X_train, y_train, x_boundary = 10) # Fit it to the dta

y_pred = clf.predict(X_test) # Predict classes
y_pred_proba = clf.predict_proba(X_test) # Predict the probability of falling into class 1

**3.Create a data frame to view the actual class, predicted class (from model), and predicted probability ('y_pred_proba'), from BoundaryClassifier()**

In [None]:
test_results = pd.DataFrame(data = {'Actual Class':y_test, 'Predicted Class':y_pred, 'Predicted Probabilty':y_pred_proba})
test_results.sample(5)
#note how when the "Predicted Probability" <0.5, you often see actual class != predicted class.

Unnamed: 0,Actual Class,Predicted Class,Predicted Probabilty
57,1,1,0.73788
2659,1,0,0.196321
691,0,0,0.024315
102,1,1,0.7131
1569,1,1,0.992458


## **Classification Metrics**

1. Compute the accuracy of the model

In [None]:
##ACCURACY SCORE
from sklearn.metrics import accuracy_score
acc = accuracy_score(y_test, y_pred)

acc.round(4)

0.9008

2. Create a confusion matrix to model the true positives, true negatives, false positives, and false negatives

In [None]:
##CONFUSION MATRIX
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred)
print(cm)

[[538  63]
 [ 56 543]]


In [None]:
#code to turn outputted matrix into a dataframe
def custom_confusion_matrix(y_test_, y_pred_proba_, alpha=0.5, output='dataframe'):
    """
    Usage:
        cm = custom_confusion_matrix(y_test, y_pred_proba, output = 'dataframe')
        tn, fp, fn, tp = custom_confusion_matrix(y_test, y_pred_proba, output = 'rates')

    Params:
        alpha: Threshold probability for classification (default = 0.5)
        output: One of 'dataframe', 'rates', or 'array'
    """
    y_pred_ = (y_pred_proba_ >  alpha).map({True:1,False:0})
    cf_mat_ = confusion_matrix(y_test_, y_pred_)
    if output == 'dataframe':
        return pd.DataFrame(cf_mat_, columns=['Predicted 0', 'Predicted 1'], index=['Actual 0', 'Actual 1'])
    elif output == 'rates':
        return cf_mat_.ravel()
    else:
        return cf_mat_

In [None]:
cm = custom_confusion_matrix(y_test, y_pred_proba, output = 'dataframe')
cm

Unnamed: 0,Predicted 0,Predicted 1
Actual 0,538,63
Actual 1,56,543


In [None]:
#assigning values to corresponding tn, fp, fn, tp measures
tn, fp, fn, tp = custom_confusion_matrix(y_test, y_pred_proba, output = 'rates')
tn, fp, fn, tp

(538, 63, 56, 543)

3. Compute the Sensitivity, Specificity, Precision, and F-1 Scores.

In [None]:
##SENSITIVITY
tpr  = tp / (tp + fn)
tpr.round(4)
#(543)/(56+543) does the same

0.9065

In [None]:
##SPECIFICITY
tnr=tn/(tn+fp)
tnr.round(4)
#538/(538+63) does the same

0.8952

In [None]:
##PRECISION
precision=(tp/(tp+fp))
precision.round(4)

0.896

In [None]:
##F-1 SCORE
f_1=2*tpr*precision/(tpr+precision) #recall that f_1 is 2*sensitivity*precision/(sensitivity+precision)
f_1.round(4)

0.9012

# What does each of these metrics mean in context of the classes in this model?
1. Accuracy: how often the classification (1's or 0's overall irrespective of specific class) is made correctly
2. Sensitivity: "true positive rate"; out of all the actual 1's, how many did the model correctly classify as 1's?
3. Specificity: "true negative rate"; out of all the actual 0's, how many did the model correctly classify as 0's?
4. Precision: out of all the model-predicted 1's, how often was it correct?
5. F-1 Score: how well can the model predict 1's ('positive values') and disciminate against 0's ('negative values')?

You can now start to see how important these other classification metrics can be in real-life scenarios. In healthcare, for example, if 1 (positive values) corresponds to the presence of a condition, the *Sensitivity* metric is especially important, because you'd want to maximize the true positive rate of predictions! ("sensitivity"=out of all actual disease cases present, how many the model correctly predicted as being present)