# Evaluation matrices (binary classification)


## Setting up

- Breat cancer data
- 2 classes
- 30 features
- SVC

Classes:

- `0` = Malignant - Tumor grows rapidly, invade and destroy nearby normal tissues, and spread throughout the body.
- `1` = Benign - Tumor grows slowly and do not spread.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
import seaborn as sns

# Breast cancer data
from sklearn.datasets import load_breast_cancer

# Load data
dataObj = load_breast_cancer()
X = dataObj.data
y = dataObj.target
print(np.unique(y))
print(X.shape)

# Visualize with dataframe
df = pd.DataFrame(data=X, columns=dataObj.feature_names)
df.insert(loc=0, column="class", value=y)
df["class"] = df["class"].map({0: dataObj.target_names[0], 1: dataObj.target_names[1]})
display(df)

In [None]:
# Class distribution
df["class"].value_counts()

In [None]:
# Splitting data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, stratify=y, random_state=1
)

# Standardize features
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

# Classfier
svc = SVC(random_state=1)

## Confusion matrix


In [None]:
from sklearn.metrics import confusion_matrix

# Training
svc.fit(X_train_std, y_train)

# Prediction from test data
y_pred = svc.predict(X_test_std)

In [None]:
# Confusion matrix (works but very ugly)
confmat = confusion_matrix(y_true=y_test, y_pred=y_pred, labels=[0, 1])
print(confmat)

In [None]:
# Manual plot
sns.heatmap(confmat, annot=True, cmap="Blues")

In [None]:
# Confusion matrix (more beautiful)
from sklearn.metrics import ConfusionMatrixDisplay

ConfusionMatrixDisplay.from_estimator(
    estimator=svc,
    X=X_test_std,
    y=y_test,
    labels=[0, 1],
    display_labels=["malignant", "benign"],
)
plt.show()

### Note

- The class 0 samples that are correctly predicted as class 0 are now in the upper left corner of the matrix.
- In order to change the ordering, we can use the "labels" argument.


In [None]:
confmat = confusion_matrix(y_true=y_test, y_pred=y_pred, labels=[1, 0])
print(confmat)

In [None]:
ConfusionMatrixDisplay.from_estimator(
    estimator=svc,
    X=X_test_std,
    y=y_test,
    labels=[1, 0],
    display_labels=["benign", "malignant"],
)
plt.show()

## Accuracy, Precision, Recall, and F1

- Be careful with the definition of "positive" label. In this case, we want `0` to be positive (เป็นโรค).
- Therefore, we need to set `pos_label=0` when calculating precision, recall and F1


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Accuracy
ACC = accuracy_score(y_true=y_test, y_pred=y_pred)

datas = []

# Label 0
PRE = precision_score(y_true=y_test, y_pred=y_pred, pos_label=0)
REC = recall_score(y_true=y_test, y_pred=y_pred, pos_label=0)
F1 = f1_score(y_true=y_test, y_pred=y_pred, pos_label=0)
datas.append([PRE, REC, F1])

# Label 1 (Default)
PRE = precision_score(y_true=y_test, y_pred=y_pred)
REC = recall_score(y_true=y_test, y_pred=y_pred)
F1 = f1_score(y_true=y_test, y_pred=y_pred)
datas.append([PRE, REC, F1])

In [None]:
df = pd.DataFrame.from_records(
    datas, columns=["Precision", "Recall", "F1"], index=["L0", "L1"]
)
df.index.name = "Label"
display(df)

print(f"Accuracy: {ACC:6.5f}")

## Classification report


In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred, digits=5))

In [None]:
PRE0 = df.loc["L0", "Precision"]
PRE1 = df.loc["L1", "Precision"]

mac_ave = (PRE0 + PRE1) / 2
print(f"Macro average precision: {mac_ave:6.5f}")

In [None]:
weighted_ave = PRE0 * (42 / 114) + PRE1 * (72 / 114)
print(f"Weighted average precision: {weighted_ave:6.5f}")