##### Authors: Rafael Dousse, Eva Ray, Massimo Stefani

# Exercice 2 - System evaluation

## Imports

In [1]:
import pandas as pd
import numpy as np

## Load data

Define the path of the data file

In [2]:
path = "ex2-system-a.csv"

Read the CSV file using `read_csv`

In [3]:
dataset_a = pd.read_csv(path, sep=";", index_col=False, names=["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "y_true"])

Display first rows

In [4]:
dataset_a.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,y_true
0,5.34845e-08,7.49348e-10,8.08347e-07,2.08229e-05,5.22236e-10,2.33026e-08,5.24127e-12,0.999965,4.80859e-07,1.3e-05,7
1,0.00133427,3.20296e-05,0.850428,0.00166909,1.54646e-07,0.000241294,0.144828,1.12281e-11,0.00145633,1.1e-05,2
2,3.64305e-06,0.996276,0.00204591,0.000421053,2.19402e-05,1.64413e-05,0.000283816,0.000372296,0.000515012,4.4e-05,1
3,0.99982,2.55039e-10,1.11201e-05,1.6532e-05,5.37573e-10,8.99975e-05,9.38092e-06,4.46447e-05,2.41844e-06,6e-06,0
4,2.09246e-08,7.46422e-08,3.56082e-05,5.4962e-07,0.998896,3.07092e-08,0.000234615,9.74801e-07,1.07161e-06,0.000831,4


Store some useful statistics (class names + number of classes)

In [5]:
class_names = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
nb_classes = len(class_names)

## Exercise's steps

a) Write a function to take classification decisions on such outputs according to Bayes’rule.

In [28]:
def bayes_classification(df):
    """
    Take classification decisions according to Bayes rule.
        
    Parameters
    ----------
    df : Pandas DataFrame of shape (n_samples, n_features + ground truth)
         Dataset.
         
    Returns
    -------
    preds : Numpy array of shape (n_samples,)
            Class labels for each data sample.
    """
    
    # We take all columns except the last one as probabilities
    n_classes = df.shape[1] - 1   
    # Then we extract the first n_classes columns which contains the predicted probabilities 
    # (one column per class) for each sample             
    probs = df.iloc[:, :n_classes].to_numpy(dtype=float)
    # Finally, for each row, we take the index of the maximum probability (the most likely class)  
    preds = np.argmax(probs, axis=1)

    return preds

b) What is the overall error rate of the system ?

In [10]:
# Your code here: compute and print the error rate of the system
classification = bayes_classification(dataset_a)

accuracy = np.mean(classification == dataset_a["y_true"].to_numpy())
error_rate_1 = 1 - accuracy

error_rate_2 = np.mean(classification != dataset_a["y_true"].to_numpy())

print(f"Accuracy: {accuracy:.4f}")
print(f"Error rate 1: {error_rate_1:.4f}")
print(f"Error rate 2: {error_rate_2:.4f}")


Accuracy: 0.8927
Error rate 1: 0.1073
Error rate 2: 0.1073


c) Compute and report the confusion matrix of the system.

In [12]:
def confusion_matrix(y_true, y_pred, n_classes):
    """
    Compute the confusion matrix.
        
    Parameters
    ----------
    y_true : Numpy array of shape (n_samples,)
             Ground truth.
    y_pred : Numpy array of shape (n_samples,)
             Predictions.
    n_classes : Integer
                Number of classes.
         
    Returns
    -------
    cm : Numpy array of shape (n_classes, n_classes)
         Confusion matrix.
    """
    # Your code here
    cm = np.zeros((n_classes, n_classes), dtype=int)
    
    for true,pred in zip(y_true, y_pred):
        # Here we increment the cell corresponding to (true class, predicted class)
        cm[true, pred] += 1

    return cm

In [None]:
# Your code here: compute and print the confusion matrix

cm = confusion_matrix(dataset_a["y_true"].to_numpy(), classification, nb_classes)
print("Confusion Matrix:")
print(cm)

Confusion Matrix:
[[ 944    0   11    0    0    2   10    7    5    1]
 [   0 1112    2    3    1    4    3    1    9    0]
 [  10    6  921   12   15    3   19   15   26    5]
 [   1    1   31  862    2   72    5   14   12   10]
 [   2    3    6    2  910    1   12    6    4   36]
 [  12    3    6   29   19  768   19    9   21    6]
 [  14    3   21    2   22   28  865    0    3    0]
 [   0   14   30    9    7    2    1  929    3   33]
 [  12   16   18   26   24   46   22   19  772   19]
 [  10    4    6   22   53   18    0   48    4  844]]


d) What are the worst and best classes in terms of precision and recall ?

In [17]:
def precision_per_class(cm):
    """
    Compute the precision per class.
        
    Parameters
    ----------
    cm : Numpy array of shape (n_classes, n_classes)
         Confusion matrix.
         
    Returns
    -------
    precisions : Numpy array of shape (n_classes,)
                 Precision per class.
    """
    
    # axis 0 = sum over columns 
    return np.diag(cm) / np.sum(cm, axis=0)

In [16]:
def recall_per_class(cm):
    """
    Compute the recall per class.
        
    Parameters
    ----------
    cm : Numpy array of shape (n_classes, n_classes)
         Confusion matrix.
         
    Returns
    -------
    recalls : Numpy array of shape (n_classes,)
              Recall per class.
    """
    # axis 1 = sum over rows
    return np.diag(cm) / np.sum(cm, axis=1)

In [20]:
# Your code here: find and print the worst and best classes in terms of precision

precisions = precision_per_class(cm)
for i, p in enumerate(precisions):
    print(f"Class {i}: Precision = {p:.4f}")
best_class = np.argmax(precisions)
worst_class = np.argmin(precisions)
print(f"Best class: {best_class} with precision {precisions[best_class]:.4f}")
print(f"Worst class: {worst_class} with precision {precisions[worst_class]:.4f}")

Class 0: Precision = 0.9393
Class 1: Precision = 0.9570
Class 2: Precision = 0.8755
Class 3: Precision = 0.8914
Class 4: Precision = 0.8642
Class 5: Precision = 0.8136
Class 6: Precision = 0.9048
Class 7: Precision = 0.8865
Class 8: Precision = 0.8987
Class 9: Precision = 0.8847
Best class: 1 with precision 0.9570
Worst class: 5 with precision 0.8136


In [21]:
# Your code here: find and print the worst and best classes in terms of recall

recall = recall_per_class(cm)
for i, r in enumerate(recall):
    print(f"Class {i}: Recall = {r:.4f}")
best_class = np.argmax(recall)
worst_class = np.argmin(recall)
print(f"Best class: {best_class} with recall {recall[best_class]:.4f}")
print(f"Worst class: {worst_class} with recall {recall[worst_class]:.4f}")


Class 0: Recall = 0.9633
Class 1: Recall = 0.9797
Class 2: Recall = 0.8924
Class 3: Recall = 0.8535
Class 4: Recall = 0.9267
Class 5: Recall = 0.8610
Class 6: Recall = 0.9029
Class 7: Recall = 0.9037
Class 8: Recall = 0.7926
Class 9: Recall = 0.8365
Best class: 1 with recall 0.9797
Worst class: 8 with recall 0.7926


e) In file `ex1-system-b.csv` you find the output of a second system B. What is the best system between (a) and (b) in terms of error rate and F1.

In [22]:
# Your code here: load the data of the system B

path = "ex2-system-b.csv"
dataset_b = pd.read_csv(path, sep=";", index_col=False, names=["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "y_true"])

confusion_sb = confusion_matrix(dataset_b["y_true"].to_numpy(), bayes_classification(dataset_b), nb_classes)
print("Confusion Matrix for system B:")
print(confusion_sb)

Confusion Matrix for system B:
[[ 963    0    4    1    0    2    6    2    2    0]
 [   0 1123    3    1    0    0    5    1    2    0]
 [   5    0  996    7    4    0    4    8    6    2]
 [   0    0   13  985    0    3    1    3    3    2]
 [   1    2    4    0  938    1    9    2    5   20]
 [   8    1    0   27    1  830   12    1    8    4]
 [   6    3    2    1    7    4  930    1    4    0]
 [   0    7   18   10    3    1    0  981    0    8]
 [   7    1    7   19    5    7    4    7  912    5]
 [   3    4    1   14   11    6    2    8    5  955]]


In [25]:
def system_accuracy(cm):
    """
    Compute the system accuracy.
        
    Parameters
    ----------
    cm : Numpy array of shape (n_classes, n_classes)
         Confusion matrix.
         
    Returns
    -------
    accuracy : Float
               Accuracy of the system.
    """
    # Your code here
    # np.trace(cm) gives the sum of the diagonal elements 
    accuracy = np.trace(cm) / np.sum(cm)
    return accuracy

In [24]:
def system_f1_score(cm):
    """
    Compute the system F1 score.
        
    Parameters
    ----------
    cm : Numpy array of shape (n_classes, n_classes)
         Confusion matrix.
         
    Returns
    -------
    f1_score : Float
               F1 score of the system.
    """
    # Your code here
    precision = precision_per_class(cm)
    recall = recall_per_class(cm)
    F1_score = 2 * (precision * recall) / (precision + recall)
    return F1_score.mean()

In [26]:
# Your code here: compute and print the accuracy and the F1 score of the system A

accuracy_a = system_accuracy(cm)
f1_score_a = system_f1_score(cm)
print(f"System A - Accuracy: {accuracy_a:.4f}, F1 Score: {f1_score_a:.4f}")



System A - Accuracy: 0.8927, F1 Score: 0.8907


In [27]:
# Your code here: compute and print the accuracy and the F1 score of the system B

accuracy_b = system_accuracy(confusion_sb)
f1_score_b = system_f1_score(confusion_sb)
print(f"System B - Accuracy: {accuracy_b:.4f}, F1 Score: {f1_score_b:.4f}")

System B - Accuracy: 0.9613, F1 Score: 0.9609
