# Evaluation scores
Using the file ../eval/scores_resnet_spec_eval.csv I evaluate the model performance of the evaluation process, performed on the ASVSpoof2021 dataset.


## Computing the % of correct predictions

In [35]:
import csv
import pandas as pd
from src.utils import *
from sklearn.metrics import balanced_accuracy_score

In [16]:
config_path = '../config/residualnet_train_config.yaml'
config = read_yaml(config_path)
df_eval = pd.read_csv(os.path.join('..', config['df_eval_path']))
df_eval[0:3]

Unnamed: 0.1,Unnamed: 0,path,label
0,0,/nas/public/dataset/asvspoof2019/LA/ASVspoof20...,1
1,1,/nas/public/dataset/asvspoof2019/LA/ASVspoof20...,1
2,2,/nas/public/dataset/asvspoof2019/LA/ASVspoof20...,1


In [14]:
csv_score_file = '../eval/scores_resnet_spec_eval.csv'  
scores_eval = pd.read_csv(csv_score_file, delimiter=' ', header=None, engine='python')
scores_eval[:3]

Unnamed: 0,0,1
0,/nas/public/dataset/asvspoof2019/LA/ASVspoof20...,0.048788
1,/nas/public/dataset/asvspoof2019/LA/ASVspoof20...,-25.825459
2,/nas/public/dataset/asvspoof2019/LA/ASVspoof20...,-39.20491


In [15]:
print(f'The length of the evaluation file list is {len(df_eval)}\nThe length of the evaluation score list is {len(scores_eval)}')

The length of the evaluation file list is 71237
The length of the evaluation score list is 71237


example: <br>
higher log probability ==> predicted class
if [score 0]=-2 and [score 1]=-3 then it means that the predicted class is class 0 <br>
[score] = -2+3 = 1 > 0 --> class 0
<br>
###########
<br>
if [score 0]=-3 and [score 1]=-2 then it means that the predicted class is class 1 <br>
[score] = -3+2 = -1 < 0 --> class 1

In [17]:
# the score values .csv uses a space as a delimiter between the file path and the score

def convert_column_to_binary(csv_file):
    binary_list = []
    
    with open(csv_file, 'r') as file:
        for line in file:
            # Split each line based on space 
            parts = line.strip().split()
            value = float(parts[1])
            
            try:
                if value > 0:
                    binary_list.append(0)
                else:
                    binary_list.append(1)
            except ValueError:
                pass
        
    return binary_list

In [29]:
pred_labels = convert_column_to_binary(csv_score_file)
pred_labels[:3]

[0, 1, 1]

## Unbalanced accuracy

In [40]:
# column at which I have the GT labels
csv_column_index = 2
csv_GT_file = '../data/df_eval_19.csv'

# extract the GT labels 
with open(csv_GT_file, 'r') as file:
    reader = csv.reader(file)
    next(reader)
    csv_GT_values = [int(row[csv_column_index]) for row in reader]

true_labels = csv_GT_values
total_values = len(pred_labels)
identical_count = sum(1 for x,y in zip(pred_labels, csv_GT_values) if x==y)
unbalanced_acc = (identical_count/total_values)*100
print(f'The unbalanced accuracy is {unbalanced_acc:.2f}%')


The unbalanced accuracy is 77.15%


## Balanced accuracy

In [39]:
balanced_acc = balanced_accuracy_score(y_true=true_labels, y_pred=pred_labels)
print(f'The balanced accuracy is {balanced_acc*100:.2f}%')

The balanced accuracy is 84.21%


## Computing the ROC and EER

In [41]:
from sklearn.metrics import roc_curve
import numpy as np

In [42]:
def compute_eer(y_true, y_score):
    # compute the ROC curve
    fpr, tpr, thresholds = roc_curve(y_true, y_score)
    
    # find threshold
    eer_threshold_idx = np.argmin(np.abs(fpr-(1-tpr)))
    eer_threshold = thresholds[eer_threshold_idx]
    
    # Calculate EER (FPR or TPR at the EER threshold)
    eer_fpr = fpr[eer_threshold_idx]
    eer_tpr = tpr[eer_threshold_idx]
    eer = (eer_fpr + (1 - eer_tpr)) / 2.0
    
    return eer, eer_threshold

In [43]:
eer, eer_threshold = compute_eer(true_labels, pred_labels)

In [44]:
eer

0.15788922284503523

## Computing the confusion matrix
* TN true negative, actual class was 0 (BF) and predicted as 0
* FP false positive, actual class was 0, but predicted as 1 (deep fake)
* FN false negative, actual class was 1, but predicted as 0
* TP true positive, actual class was 1 and predicted as 1

In [None]:
from sklearn.metrics import confusion_matrix

In [45]:
cm = confusion_matrix(true_labels, pred_labels)
print(cm)

# [ TN    FP
#   FN    TP ]

[[ 6848   507]
 [15769 48113]]


## Computing the indices of wrong predictions

In [47]:
def find_different_indices(list1, list2):
    if len(list1) != len(list2):
        raise ValueError
    diff_idx = [i for i, (x,y) in enumerate(zip(list1, list2)) if x!=y]
    return diff_idx

wrong_preds_idx = find_different_indices(true_labels, pred_labels)

In [48]:
wrong_preds_idx[:10]

[0, 9, 10, 12, 14, 20, 22, 24, 25, 26]