This is one of the deliverables produced from this project: https://library.hkust.edu.hk/ds/project/p002/

> This notebook is created by Holly CHAN (HKUST Library) for generating the figures as presented in our article in IFLA's TILT (Jan 2025 Issue)

# Create Confusion Matrix & Calculate Accuracy, F1 Score, Precision, Recall

In [None]:
!pip install scikit-learn

In [41]:
import pandas as pd
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, precision_score, recall_score

## Result of `deepface`

In [60]:
file_path = 'face_verification_results.xlsx' 
df = pd.read_excel(file_path, sheet_name='deepface_excludeFail')

In [61]:
columns = ['chang', 'chung', 'woo']
for column in columns:
    df[f'{column}_binary'] = df['Correct answer'].apply(lambda x: 1 if x == column else 0)
    df[column] = df[column].astype(int)  # Convert TRUE/FALSE to 1/0


# Initialize TP, TN, FP, FN columns for each category
for column in columns:
    df[f'TP_{column}'] = ((df[column] == 1) & (df[f'{column}_binary'] == 1)).astype(int)
    df[f'TN_{column}'] = ((df[column] == 0) & (df[f'{column}_binary'] == 0)).astype(int)
    df[f'FP_{column}'] = ((df[column] == 0) & (df[f'{column}_binary'] == 1)).astype(int)
    df[f'FN_{column}'] = ((df[column] == 1) & (df[f'{column}_binary'] == 0)).astype(int)
    

# Calculate total counts for each metric
summary = {f'{metric}_{column}': df[f'{metric}_{column}'].sum() 
           for column in columns 
           for metric in ['TP', 'TN', 'FP', 'FN']}

In [62]:
columns = ['chang', 'chung', 'woo']
results = {}

for column in columns:
    # True labels
    y_true = df[f'{column}_binary'].values
    # Predicted labels
    y_pred = df[column].values
    
    # Calculate 
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    conf_matrix = confusion_matrix(y_true, y_pred)

    # Create a DataFrame for the confusion matrix with correct labels
    conf_matrix_df = pd.DataFrame(conf_matrix, 
                                   index=['Actual Negative', 'Actual Positive'], 
                                   columns=['Predicted Negative', 'Predicted Positive'])
    
    # Store the results
    results[column] = {
        'accuracy': round(accuracy, 4),
        'precision': round(precision, 4),
        'recall': round(recall, 4),
        'f1_score': round(f1, 4),
        'confusion_matrix': conf_matrix_df
    }

# Display the results
for column, metrics in results.items():
    print(f"=============== {column} =================")
    print(f"Accuracy = {metrics['accuracy']}, F1 Score = {metrics['f1_score']}, Precision = {metrics['precision']}, Recall = {metrics['recall']}")
    print(f"Confusion Matrix:\n{metrics['confusion_matrix']}\n")

Accuracy = 0.9598, F1 Score = 0.7568, Precision = 0.6087, Recall = 1.0
Confusion Matrix:
                 Predicted Negative  Predicted Positive
Actual Negative                 201                   9
Actual Positive                   0                  14

Accuracy = 0.8929, F1 Score = 0.4286, Precision = 0.2812, Recall = 0.9
Confusion Matrix:
                 Predicted Negative  Predicted Positive
Actual Negative                 191                  23
Actual Positive                   1                   9

Accuracy = 0.8705, F1 Score = 0.5397, Precision = 0.425, Recall = 0.7391
Confusion Matrix:
                 Predicted Negative  Predicted Positive
Actual Negative                 178                  23
Actual Positive                   6                  17



## Result of `face_recognition`

In [63]:
file_path = 'face_verification_results.xlsx' 
df = pd.read_excel(file_path, sheet_name='face_recognition_excludeFail')

In [64]:
columns = ['chang', 'chung', 'woo']
for column in columns:
    df[f'{column}_binary'] = df['Correct answer'].apply(lambda x: 1 if x == column else 0)
    df[column] = df[column].astype(int)  # Convert TRUE/FALSE to 1/0


# Initialize TP, TN, FP, FN columns for each category
for column in columns:
    df[f'TP_{column}'] = ((df[column] == 1) & (df[f'{column}_binary'] == 1)).astype(int)
    df[f'TN_{column}'] = ((df[column] == 0) & (df[f'{column}_binary'] == 0)).astype(int)
    df[f'FP_{column}'] = ((df[column] == 0) & (df[f'{column}_binary'] == 1)).astype(int)
    df[f'FN_{column}'] = ((df[column] == 1) & (df[f'{column}_binary'] == 0)).astype(int)
    

# Calculate total counts for each metric
summary = {f'{metric}_{column}': df[f'{metric}_{column}'].sum() 
           for column in columns 
           for metric in ['TP', 'TN', 'FP', 'FN']}

In [65]:
columns = ['chang', 'chung', 'woo']
results = {}

for column in columns:
    # True labels
    y_true = df[f'{column}_binary'].values
    # Predicted labels
    y_pred = df[column].values
    
    # Calculate 
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    conf_matrix = confusion_matrix(y_true, y_pred)

    # Create a DataFrame for the confusion matrix with correct labels
    conf_matrix_df = pd.DataFrame(conf_matrix, 
                                   index=['Actual Negative', 'Actual Positive'], 
                                   columns=['Predicted Negative', 'Predicted Positive'])
    
    # Store the results
    results[column] = {
        'accuracy': round(accuracy, 4),
        'precision': round(precision, 4),
        'recall': round(recall, 4),
        'f1_score': round(f1, 4),
        'confusion_matrix': conf_matrix_df
    }

# Display the results
for column, metrics in results.items():
    print(f"=============== {column} =================")
    print(f"Accuracy = {metrics['accuracy']}, F1 Score = {metrics['f1_score']}, Precision = {metrics['precision']}, Recall = {metrics['recall']}")
    print(f"Confusion Matrix:\n{metrics['confusion_matrix']}\n")

Accuracy = 0.9118, F1 Score = 0.5714, Precision = 0.4, Recall = 1.0
Confusion Matrix:
                 Predicted Negative  Predicted Positive
Actual Negative                 232                  24
Actual Positive                   0                  16

Accuracy = 0.6507, F1 Score = 0.2149, Precision = 0.1226, Recall = 0.8667
Confusion Matrix:
                 Predicted Negative  Predicted Positive
Actual Negative                 164                  93
Actual Positive                   2                  13

Accuracy = 0.6066, F1 Score = 0.327, Precision = 0.197, Recall = 0.963
Confusion Matrix:
                 Predicted Negative  Predicted Positive
Actual Negative                 139                 106
Actual Positive                   1                  26

