# Evaluation
<br>
Notebook for evaluating models.

In [17]:
import torch
from src.utils import *
import sys
import csv
import re
from sklearn.metrics import accuracy_score, balanced_accuracy_score, roc_curve, auc

In [18]:
probs_resnet_clean_csv = '../eval/prob_resnet_spec_eval.csv'
probs_LCNN_clean_csv = '../eval/prob_LCNN_spec_eval.csv'
probs_SENet_clean_csv = '../eval/prob_SENet_spec_eval.csv'

probs_resnet_clean = pd.read_csv(probs_resnet_clean_csv, header=0, engine='python')
probs_LCNN_clean = pd.read_csv(probs_LCNN_clean_csv, header=0, engine='python')
probs_SENet_clean = pd.read_csv(probs_SENet_clean_csv, header=0, engine='python')

if len(probs_resnet_clean) == len(probs_LCNN_clean) == len(probs_SENet_clean):
    print(f'OK. Lengths are the same: {len(probs_resnet_clean)-1}')
else:
    sys.exit('Not OK. Lengths are not the same')


OK. Lengths are the same: 71236


In [19]:
# ground truth labels of the evaluation dataset (ASVSpoof2019)
config_path_resnet = '../config/residualnet_train_config.yaml'
config_resnet = read_yaml(config_path_resnet)
df_eval = pd.read_csv(os.path.join('..', config_resnet['df_eval_path']))

***
## Extract predicted labels in the same order as in df_eval_19

In [20]:
def extract_id(file_path):
    match = re.search(r'LA_E_(\d+)', file_path)
    if match:
        return match.group(1)
    return None
    
    
def pred_probabilities(file2_path):
    # read df_eval_19
    file1_path = '../data/df_eval_19.csv'
    
    file1_ids = []
    with open(file1_path, 'r') as file1:
        csv_reader = csv.reader(file1)
        for row in csv_reader:
            file_id = extract_id(row[1])
            if file_id:
                file1_ids.append(file_id)
    
    # read second file and store data in a dictionary
    file2_data = {}
    with open(file2_path, 'r') as file2:
        csv_reader = csv.reader(file2)
        for row in csv_reader:
            file_id = extract_id(row[0])
            if file_id:
                file2_data[file_id] = (float(row[1]), float(row[2]))
                
    output_array = []
    for file_id in file1_ids:
        if file_id in file2_data:
            col2, col3 = file2_data[file_id]
            output_array.append(0 if col2>col3 else 1)
                
    return output_array

***
## Accuracies on clean dataset

In [21]:
pred_labels_clean_resnet = pred_probabilities(file2_path=probs_resnet_clean_csv)
pred_labels_clean_LCNN = pred_probabilities(file2_path=probs_LCNN_clean_csv)
pred_labels_clean_SeNet = pred_probabilities(file2_path=probs_SENet_clean_csv)

In [22]:
GT_labels = df_eval.iloc[:,-1].tolist()
print(len(GT_labels))

71237


***
## Unbalanced accuracies on clean dataset

In [23]:
UA_resnet_clean = accuracy_score(y_true=GT_labels, y_pred=pred_labels_clean_resnet)
UA_LCNN_clean = accuracy_score(y_true=GT_labels, y_pred=pred_labels_clean_LCNN)
UA_SeNet_clean = accuracy_score(y_true=GT_labels, y_pred=pred_labels_clean_SeNet)

print(f'The unb.acc. for clean dataset for ResNet is {UA_resnet_clean*100:.2f}%\n'
      f'The unb.acc. for clean dataset for LCNN is {UA_LCNN_clean*100:.2f}%\n'
      f'The unb.acc. for clean dataset for SENet is {UA_SeNet_clean*100:.2f}%\n')

The unb.acc. for clean dataset for ResNet is 77.14%
The unb.acc. for clean dataset for LCNN is 52.85%
The unb.acc. for clean dataset for SENet is 82.80%


***
## Balanced accuracy for LCNN and SENet on clean dataset

In [24]:
BA_resnet_clean = balanced_accuracy_score(y_true=GT_labels, y_pred=pred_labels_clean_resnet)
BA_LCNN_clean = balanced_accuracy_score(y_true=GT_labels, y_pred=pred_labels_clean_LCNN)
BA_SeNet_clean = balanced_accuracy_score(y_true=GT_labels, y_pred=pred_labels_clean_SeNet)

print(f'The balanced acc. for clean dataset for ResNet is {BA_resnet_clean*100:.2f}%\n'
      f'The balanced acc. for clean dataset for LCNN is {BA_LCNN_clean*100:.2f}%\n'
      f'The balanced acc. for clean dataset for SENet is {BA_SeNet_clean*100:.2f}%\n')

The balanced acc. for clean dataset for ResNet is 84.21%
The balanced acc. for clean dataset for LCNN is 72.98%
The balanced acc. for clean dataset for SENet is 88.35%


***
## Balanced accuracies of LCNN and SENet on FGSM attack performed with ResNet 


In [25]:
def compute_and_print_bal_accuracy(values, model, attack, at_model, GT_labels):
    if model != 'ResNet':
        for i in values:
            epsilon_str = str(i).replace('.', 'dot')
            csv_file = f'../eval/prob_{model}_{attack}_{at_model}_{epsilon_str}.csv'
            probs = pred_probabilities(file2_path=csv_file)
            BA = balanced_accuracy_score(y_true=GT_labels, y_pred=probs)
        
            print(f'Balanced accuracy for {model} on {attack} {at_model} for eps={i} is {BA*100:.2f}%')
    elif model == 'ResNet' and at_model == 'ResNet':
        for i in values:
            epsilon_str = str(i).replace('.', 'dot')
            csv_file = f'../eval/prob_resnet_spec_eval_{attack}_{epsilon_str}.csv'
            probs = pred_probabilities(file2_path=csv_file)
            BA = balanced_accuracy_score(y_true=GT_labels, y_pred=probs)
        
            print(f'Balanced accuracy for {model} on {attack} {at_model} for eps={i} is {BA*100:.2f}%')
    elif model == 'ResNet' and at_model == 'SENet':
        for i in values:
            epsilon_str = str(i).replace('.', 'dot')
            csv_file = f'../eval/prob_ResNet_{attack}_{at_model}_{epsilon_str}.csv'
            probs = pred_probabilities(file2_path=csv_file)
            probs = pred_probabilities(file2_path=csv_file)
            BA = balanced_accuracy_score(y_true=GT_labels, y_pred=probs)
            print(f'Balanced accuracy for {model} on {attack} {at_model} for eps={i} is {BA*100:.2f}%')
    elif model == 'LCNN' and at_model == 'LCNN':
        for i in values:
            epsilon_str = str(i).replace('.', 'dot')
            csv_file = f'../eval/prob_LCNN_{attack}_{at_model}_{epsilon_str}.csv'
            probs = pred_probabilities(file2_path=csv_file)
            probs = pred_probabilities(file2_path=csv_file)
            BA = balanced_accuracy_score(y_true=GT_labels, y_pred=probs)
            print(f'Balanced accuracy for {model} on {attack} {at_model} for eps={i} is {BA*100:.2f}%')
    else:
        print('TODO')
    
    
    

In [26]:
compute_and_print_bal_accuracy([0.2, 1.0, 2.0], 'ResNet', 'FGSM', 'ResNet', GT_labels)

Balanced accuracy for ResNet on FGSM ResNet for eps=0.2 is 77.53%
Balanced accuracy for ResNet on FGSM ResNet for eps=1.0 is 43.81%
Balanced accuracy for ResNet on FGSM ResNet for eps=2.0 is 13.89%


In [27]:
compute_and_print_bal_accuracy([0.2, 0.4, 0.6, 0.8, 1.0, 2.0, 3.0], 'LCNN', 'FGSM', 'ResNet', GT_labels)

Balanced accuracy for LCNN on FGSM ResNet for eps=0.2 is 69.04%
Balanced accuracy for LCNN on FGSM ResNet for eps=0.4 is 68.48%
Balanced accuracy for LCNN on FGSM ResNet for eps=0.6 is 67.94%
Balanced accuracy for LCNN on FGSM ResNet for eps=0.8 is 67.41%
Balanced accuracy for LCNN on FGSM ResNet for eps=1.0 is 66.91%
Balanced accuracy for LCNN on FGSM ResNet for eps=2.0 is 64.53%
Balanced accuracy for LCNN on FGSM ResNet for eps=3.0 is 62.24%


In [28]:
compute_and_print_bal_accuracy([0.2, 0.4, 0.6, 0.8, 1.0, 2.0, 3.0], 'SENet', 'FGSM', 'ResNet', GT_labels)

Balanced accuracy for SENet on FGSM ResNet for eps=0.2 is 88.00%
Balanced accuracy for SENet on FGSM ResNet for eps=0.4 is 87.56%
Balanced accuracy for SENet on FGSM ResNet for eps=0.6 is 87.10%
Balanced accuracy for SENet on FGSM ResNet for eps=0.8 is 86.54%
Balanced accuracy for SENet on FGSM ResNet for eps=1.0 is 85.89%
Balanced accuracy for SENet on FGSM ResNet for eps=2.0 is 83.29%
Balanced accuracy for SENet on FGSM ResNet for eps=3.0 is 80.79%


***
## SENet FGSM attack


In [29]:
compute_and_print_bal_accuracy([0.2, 1.0, 2.0, 3.0], 'SENet', 'FGSM', 'SENet', GT_labels)

Balanced accuracy for SENet on FGSM SENet for eps=0.2 is 87.04%
Balanced accuracy for SENet on FGSM SENet for eps=1.0 is 80.19%
Balanced accuracy for SENet on FGSM SENet for eps=2.0 is 71.97%
Balanced accuracy for SENet on FGSM SENet for eps=3.0 is 65.29%


***
## Transferability of SENet FGSM on ResNet

In [30]:
compute_and_print_bal_accuracy([1.0], 'ResNet', 'FGSM', 'SENet', GT_labels)

Balanced accuracy for ResNet on FGSM SENet for eps=1.0 is 83.91%


***
## FGSM on LCNN

In [31]:
compute_and_print_bal_accuracy([2.0], 'LCNN', 'FGSM', 'LCNN', GT_labels)

Balanced accuracy for LCNN on FGSM LCNN for eps=2.0 is 66.75%


***
# FGSM (double test) on ResNet
Basically I re-run the FGSM attack using the same code I used for LCNN and SENet because with those two models I was getting low success rates.

In [32]:
compute_and_print_bal_accuracy([0.0, 2.0], 'ResNet', 'FGSM', 'ResNet', GT_labels)

Balanced accuracy for ResNet on FGSM ResNet for eps=0.0 is 84.36%
Balanced accuracy for ResNet on FGSM ResNet for eps=2.0 is 13.89%
