# Load the scores

In [1]:
import csv
import re
import sys

import matplotlib.pyplot as plt
import pandas as pd
from src.utils import *
from sklearn.metrics import accuracy_score, balanced_accuracy_score, roc_curve, auc

In [2]:
probs_RawNet_SSA_5dot0_csv = '../eval/prob_rawnet_eval_SSA_5dot0.csv'
probs_RawNet_FGSM_5dot0_csv = '../eval/prob_rawnet_eval_FGSM_5dot0.csv'
probs_ResNet_SSA_5dot0_csv = '../eval/prob_resnet_eval_SSA_5dot0.csv'
probs_ResNet_FGSM_5dot0_csv = '../eval/prob_resnet_eval_FGSM_5dot0.csv'

probs_RawNet_SSA_5dot0 = pd.read_csv(probs_RawNet_SSA_5dot0_csv, header=0, engine='python')
probs_RawNet_FGSM_5dot0 = pd.read_csv(probs_RawNet_FGSM_5dot0_csv, header=0, engine='python')

probs_ResNet_SSA_5dot0 = pd.read_csv(probs_ResNet_SSA_5dot0_csv, header=0, engine='python')
probs_ResNet_FGSM_5dot0 = pd.read_csv(probs_ResNet_FGSM_5dot0_csv, header=0, engine='python')

In [3]:
if len(probs_RawNet_SSA_5dot0) == len(probs_RawNet_FGSM_5dot0) == len(probs_ResNet_SSA_5dot0) == len(probs_ResNet_FGSM_5dot0):
    print(f'Lengths are okay: {len(probs_RawNet_FGSM_5dot0)-1}')
else:
    sys.exit('Lengths are not okay')

Lengths are okay: 6024


In [4]:
config_path = '../config/residualnet_train_config.yaml'
config = read_yaml(config_path)
df_eval = pd.read_csv(os.path.join('..', config['df_eval_path']))
df_eval[:3]

Unnamed: 0.1,Unnamed: 0,path,label
0,0,/nas/public/dataset/asvspoof2019/LA/ASVspoof20...,1
1,1,/nas/public/dataset/asvspoof2019/LA/ASVspoof20...,1
2,2,/nas/public/dataset/asvspoof2019/LA/ASVspoof20...,1


***
## Extract PREDICTED labels in the same order given by df_eval_19!

In [5]:
def extract_id(file_path):
    match = re.search(r'LA_E_(\d+)', file_path)
    if match:
        return match.group(1)
    return None
    
    
def pred_probabilities(file2_path):
    # read df_eval_19
    file1_path = '../data/df_eval_19.csv'
    
    file1_ids = []
    with open(file1_path, 'r') as file1:
        csv_reader = csv.reader(file1)
        for row in csv_reader:
            file_id = extract_id(row[1])
            if file_id:
                file1_ids.append(file_id)
    
    # read second file and store data in a dictionary
    file2_data = {}
    with open(file2_path, 'r') as file2:
        csv_reader = csv.reader(file2)
        for row in csv_reader:
            file_id = extract_id(row[0])
            if file_id:
                file2_data[file_id] = (float(row[1]), float(row[2]))
                
    output_array = []
    for file_id in file1_ids:
        if file_id in file2_data:
            col2, col3 = file2_data[file_id]
            output_array.append(0 if col2>col3 else 1)
                
    return output_array

In [6]:
pred_labels_rawnet_SSA_5dot0 = pred_probabilities(file2_path=probs_RawNet_SSA_5dot0_csv)
pred_labels_rawnet_FGSM_5dot0 = pred_probabilities(file2_path=probs_RawNet_FGSM_5dot0_csv)

pred_labels_resnet_SSA_5dot0 = pred_probabilities(file2_path=probs_ResNet_SSA_5dot0_csv)
pred_labels_resnet_FGSM_5dot0 = pred_probabilities(file2_path=probs_ResNet_FGSM_5dot0_csv)

In [7]:
# GT labels
GT_labels = df_eval.iloc[:, -1].tolist()

## Balanced accuracy

In [8]:
bal_acc_rawnet_SSA_5dot0 = balanced_accuracy_score(y_true=GT_labels, y_pred=pred_labels_rawnet_SSA_5dot0)
bal_acc_rawnet_FGSM_5dot0 = balanced_accuracy_score(y_true=GT_labels, y_pred=pred_labels_rawnet_FGSM_5dot0)
bal_acc_resnet_SSA_5dot0 = balanced_accuracy_score(y_true=GT_labels, y_pred=pred_labels_resnet_SSA_5dot0)
bal_acc_resnet_FGSM_5dot0 = balanced_accuracy_score(y_true=GT_labels, y_pred=pred_labels_resnet_FGSM_5dot0)

print(f'The balanced accuracy for RawNet2 on SSA, eps=5.0 is {bal_acc_rawnet_SSA_5dot0*100:.2f}%\n'
      f'The balanced accuracy for RawNet2 on FGSM, eps=5.0 is {bal_acc_rawnet_FGSM_5dot0*100:.2f}%\n'
      f'The balanced accuracy for ResNet on SSA, eps=5.0 is {bal_acc_resnet_SSA_5dot0*100:.2f}%\n'
      f'The balanced accuracy for ResNet on FGSM, eps=5.0 is {bal_acc_resnet_FGSM_5dot0*100:.2f}%')

The balanced accuracy for RawNet2 on SSA, eps=5.0 is 87.34%
The balanced accuracy for RawNet2 on FGSM, eps=5.0 is 80.15%
The balanced accuracy for ResNet on SSA, eps=5.0 is 81.66%
The balanced accuracy for ResNet on FGSM, eps=5.0 is 1.42%


In [24]:
def find_index(array1, array2):
    indices = []
    for index, (val1, val2) in enumerate(zip(array1, array2)):
        if val1 == 1 and val2 == 0:
            indices.append(index)
            if len(indices) == 6:
                break
    return indices

In [25]:
index_1_0 = find_index(GT_labels, pred_labels_rawnet_SSA_5dot0)
index_1_0

[21, 38, 41, 105, 114, 119]

## Confusion Matrix
* TN true negative, actual class was 0 (BF) and predicted as 0
* FP false positive, actual class was 0, but predicted as 1 (deep fake)
* FN false negative, actual class was 1, but predicted as 0
* TP true positive, actual class was 1 and predicted as 1

In [16]:
from sklearn.metrics import confusion_matrix

# [ TN    FP
#   FN    TP ]

In [17]:
cm_res_FGSM_5dot0 = confusion_matrix(GT_labels, pred_labels_resnet_FGSM_5dot0) 
print(cm_res_FGSM_5dot0)

[[   0  663]
 [5210  152]]


In [13]:
cm_res_ssa_5dot0 = confusion_matrix(GT_labels, pred_labels_resnet_SSA_5dot0) 
print(cm_res_ssa_5dot0)

[[ 609   54]
 [1530 3832]]


In [10]:
cm_raw_ssa_5dot0 = confusion_matrix(GT_labels, pred_labels_rawnet_SSA_5dot0) 
print(cm_raw_ssa_5dot0)

[[ 522  141]
 [ 217 5145]]


In [11]:
cm_raw_FGSM_5dot0 = confusion_matrix(GT_labels, pred_labels_rawnet_FGSM_5dot0) 
print(cm_raw_FGSM_5dot0)

[[ 432  231]
 [ 260 5102]]
