# Lesson 4.03 - Classification Metrics I

In [1]:
#import all required libraries
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [2]:
# Load data and drop columns that are not required
df = pd.read_csv("data/dataset.csv", header = 1, sep = ";")
df = df.drop(columns=['Green frogs','Brown frogs', 'Common toad', 'Common newt', 'Great crested newt','Tree frog'])
df = df.drop(columns=['ID', 'TR', 'VR', 'SUR1', 'SUR2', 'SUR3', 'UR', 'FR', 'RR', 'BR','MR', 'CR'])

# Display the breakdown
df['Fire-bellied toad'].value_counts()

0    131
1     58
Name: Fire-bellied toad, dtype: int64

In [3]:
# Prepare Data
X = df.iloc[:, 1:-1].values
y = df.iloc[:, 4].values

In [4]:
# Train/Test Split and Scale Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.40)
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [5]:
# Instantiate KNN
classifier = KNeighborsClassifier(n_neighbors = 5)
classifier.fit(X_train, y_train)
classifier_y_pred = classifier.predict(X_test)

#### Confusion Matrix output is displayed in the following sequence --> tn, fp, fn, tp
* TN = `confusion_matrix[0][0]`
* FP = `confusion_matrix[0][1]`
* FN = `confusion_matrix[1][0]`
* TP = `confusion_matrix[1][1]`

In [6]:
# Display Classification Metrics
print(confusion_matrix(y_test, classifier_y_pred))
print(classification_report(y_test, classifier_y_pred))

[[41 11]
 [21  3]]
              precision    recall  f1-score   support

           0       0.66      0.79      0.72        52
           1       0.21      0.12      0.16        24

    accuracy                           0.58        76
   macro avg       0.44      0.46      0.44        76
weighted avg       0.52      0.58      0.54        76



#### Recap on Classification Metrics Formula
* Accuracy = True Predictions / Total Predictions
* Precision = True Positives / (True Positives + False Positives)
* Recall = True Positives / (True Positives + False Negatives)
* F1-Score  = Weighted Average of Precision and Recall
    * Offers a better overall measure of performance
* Support = True Positives (or True Negatives) that lie in that class