# Evaluation matrices (multiclass classification)


## Setting up
- Iris data
- 3 classes
- 2 features
- Logistic regression

In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, balanced_accuracy_score

# Iris data
dataObj = load_iris()

# X data (features)
X = dataObj.data[:, [1, 2]]

# y data
# y = dataObj.target
np.random.seed(0)
y = np.random.randint(3, size=150)

print(np.unique(y))

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1, stratify=y)

# Standardize features
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

# Constructing classifier
svc = SVC(random_state=0, C=10)

# Training
svc.fit(X_train_std, y_train)

## Confusion matrix

In [None]:
from sklearn.metrics import confusion_matrix

y_pred = svc.predict(X_test_std)
confusion_matrix(y_true=y_test, y_pred=y_pred)

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay

ConfusionMatrixDisplay.from_estimator(
    estimator=svc, X=X_test_std, y=y_test, display_labels=dataObj.target_names
)
plt.show()  

Compute class-wise (default) multilabel confusion matrix to evaluate the accuracy of a classification, and output confusion matrices for each class or sample.

Note that I used `np.flip` to reverse the order of the element to make the output consistent with what we used previously.

In [None]:
import seaborn as sns
from sklearn.metrics import multilabel_confusion_matrix

#Multilabel confusion matrix
matrices = multilabel_confusion_matrix(y_true=y_test, y_pred=y_pred)
# print(matrices)

nc = np.unique(y).shape[0]
fig, axs = plt.subplots(nc, 1, figsize=(5,nc*4))
for idx, m in enumerate(matrices):
    sns.heatmap(np.flip(m), annot=True, cmap='Blues', ax=axs[idx])
    axs[idx].set_title(f'Class {idx}')


## Accuracy, Precision, Recall, F1

#### Accuracy

In [None]:
acc = accuracy_score(y_test, y_pred)
print(f'Accuracy = {acc:6.5f}')

acc = (6 + 5 + 2) / (45)
print(f'Accuracy = {acc:6.5f}')

In [None]:
# Balanced accuracy (defiend as the macro average of recall obtained on each class)
REC0 = 6 / (6 + 9 + 2)
REC1 = 5 / (5 + 4 + 4)
REC2 = 2 / (2 + 7 + 6)

bal_acc = (REC0 + REC1 + REC2) / 3
print(f'Balanced Accuracy = {bal_acc:6.5f}')

bal_acc = recall_score(y_test, y_pred, average='macro')
print(f'Balanced Accuracy = {bal_acc:6.5f}')

bal_acc = balanced_accuracy_score(y_test, y_pred)
print(f'Balanced Accuracy = {bal_acc:6.5f}')

#### Summary using `classification_report`

`Support` is the number of true instances for each label.

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred,  digits=4))

#### Precision, Recall, F1

Note that micro average is the same as accuracy.

In [None]:
# Precision
datas = []
for average in ['macro', 'weighted', 'micro']:
    PRE = precision_score(y_true=y_test, y_pred=y_pred, average=average)
    REC = recall_score(y_true=y_test, y_pred=y_pred, average=average)
    F1 = f1_score(y_true=y_test, y_pred=y_pred, average=average)
    data = {"average": average, 'precision': PRE, 'recall': REC, 'f1': F1}
    datas.append(data)

df = pd.DataFrame(datas).set_index('average')
display(df)

print(f"Accuracy: {accuracy_score(y_test, y_pred):6.5f}")

#### Summary using `classification_report`

`Support` is the number of true instances for each label.

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred,  digits=4))

## Extra

Calculate classwise values

In [None]:
PRE = precision_score(y_true=y_test, y_pred=y_pred, average=None)
REC = recall_score(y_true=y_test, y_pred=y_pred, average=None)
F1 = f1_score(y_true=y_test, y_pred=y_pred, average=None)
print(PRE, REC, F1)
dft = pd.DataFrame( [PRE, REC, F1]).transpose()
dft.columns=['precision', 'recall', 'f1']
dft.index.names = ['class']
dft

Using `precision_recall_fscore_support` functions

Calculate average

In [None]:
from sklearn.metrics import precision_recall_fscore_support

arr = []
for average in ['macro', 'weighted', 'micro']:
    prfs = precision_recall_fscore_support(y_test, y_pred, average=average)
    print(prfs)
    data = {'average': average, 'precision': prfs[0], "recall": prfs[1], "f1": prfs[2] }
    arr.append(data)

dft = pd.DataFrame.from_records(arr, index='average')
display(dft)

Calculate classwise values

In [None]:
prfs = precision_recall_fscore_support(y_test, y_pred, average=None)
dft = pd.DataFrame(prfs).transpose()
dft.columns = ['precision', 'recall', 'f1', 'support']
dft.index.names = ['class']
display(dft)