# Evaluation scores
Using the file ../eval/scores_resnet_spec_eval.csv I evaluate the model performance of the evaluation process, performed on the ASVSpoof2021 dataset.


## Computing the % of correct predictions

In [2]:
import csv

In [3]:
# the score values .csv uses a space as a delimiter between the file path and the score

def convert_column_to_binary(csv_file):
    binary_list = []
    
    with open(csv_file, 'r') as file:
        for line in file:
            # Split each line based on space 
            parts = line.strip().split()
            value = float(parts[1])
            
            try:
                if value > 0:
                    binary_list.append(0)
                else:
                    binary_list.append(1)
            except ValueError:
                pass
        
    return binary_list

In [4]:
csv_score_file = '../eval/scores_resnet_spec_eval.csv'
scores_binary_list = convert_column_to_binary(csv_score_file)

In [5]:
len(scores_binary_list)

611829

In [6]:
# column at which I have the GT labels
csv_column_index = 2
csv_GT_file = '../data/df_eval.csv'

with open(csv_GT_file, 'r') as file:
    reader = csv.reader(file)
    next(reader)
    csv_GT_values = [int(row[csv_column_index]) for row in reader]
    
total_values = len(scores_binary_list)
identical_count = sum(1 for x,y in zip(scores_binary_list, csv_GT_values) if x==y)
percentage_identical = (identical_count/total_values)*100


In [7]:
percentage_identical

89.25647525697539

## Computingh the ROC and EER

In [8]:
from sklearn.metrics import roc_curve
import numpy as np

In [9]:
def compute_eer(y_true, y_score):
    # compute the ROC curve
    fpr, tpr, thresholds = roc_curve(y_true, y_score)
    
    # find threshold
    eer_threshold_idx = np.argmin(np.abs(fpr-(1-tpr)))
    eer_threshold = thresholds[eer_threshold_idx]
    
    # Calculate EER (FPR or TPR at the EER threshold)
    eer_fpr = fpr[eer_threshold_idx]
    eer_tpr = tpr[eer_threshold_idx]
    eer = (eer_fpr + (1 - eer_tpr)) / 2.0
    
    return eer, eer_threshold

In [10]:
eer, eer_threshold = compute_eer(csv_GT_values, scores_binary_list)

In [11]:
eer

0.2700244945842659

## Computing the confusion matrix
* TN true negative, actual class was 0 (BF) and predicted as 0
* FP false positive, actual class was 0, but predicted as 1 (deep fake)
* FN false negative, actual class was 1, but predicted as 0
* TP true positive, actual class was 1 and predicted as 1

In [12]:
from sklearn.metrics import confusion_matrix

In [13]:
cm = confusion_matrix(csv_GT_values, scores_binary_list)
print(cm)

# [ TN    FP
#   FN    TP ]

[[ 12539  10078]
 [ 55654 533558]]


## Computing the indices of wrong predictions

In [14]:
def find_different_indices(list1, list2):
    if len(list1) != len(list2):
        raise ValueError
    diff_idx = [i for i, (x,y) in enumerate(zip(list1, list2)) if x!=y]
    return diff_idx

wrong_preds_idx = find_different_indices(csv_GT_values, scores_binary_list)

In [15]:
wrong_preds_idx[:10]

[1, 7, 13, 18, 20, 59, 60, 61, 64, 72]