# Evaluation
<br>
Notebook for evaluating models.

In [None]:
import torch
from src.utils import *
import sys
import csv
import re
from sklearn.metrics import accuracy_score, balanced_accuracy_score, roc_curve, auc

In [None]:
probs_resnet_clean_csv = '../eval/prob_resnet_spec_eval.csv'
probs_LCNN_clean_csv = '../eval/prob_LCNN_spec_eval.csv'
probs_SENet_clean_csv = '../eval/prob_SENet_spec_eval.csv'

probs_resnet_clean = pd.read_csv(probs_resnet_clean_csv, header=0, engine='python')
probs_LCNN_clean = pd.read_csv(probs_LCNN_clean_csv, header=0, engine='python')
probs_SENet_clean = pd.read_csv(probs_SENet_clean_csv, header=0, engine='python')

if len(probs_resnet_clean) == len(probs_LCNN_clean) == len(probs_SENet_clean):
    print(f'OK. Lengths are the same: {len(probs_resnet_clean)-1}')
else:
    sys.exit('Not OK. Lengths are not the same')


In [None]:
# ground truth labels of the evaluation dataset (ASVSpoof2019)
config_path_resnet = '../config/residualnet_train_config.yaml'
config_resnet = read_yaml(config_path_resnet)
df_eval = pd.read_csv(os.path.join('..', config_resnet['df_eval_path']))

***
## Extract predicted labels in the same order as in df_eval_19

In [None]:
def extract_id(file_path):
    match = re.search(r'LA_E_(\d+)', file_path)
    if match:
        return match.group(1)
    return None
    
    
def pred_probabilities(file2_path):
    # read df_eval_19
    file1_path = '../data/df_eval_19.csv'
    
    file1_ids = []
    with open(file1_path, 'r') as file1:
        csv_reader = csv.reader(file1)
        for row in csv_reader:
            file_id = extract_id(row[1])
            if file_id:
                file1_ids.append(file_id)
    
    # read second file and store data in a dictionary
    file2_data = {}
    with open(file2_path, 'r') as file2:
        csv_reader = csv.reader(file2)
        for row in csv_reader:
            file_id = extract_id(row[0])
            if file_id:
                file2_data[file_id] = (float(row[1]), float(row[2]))
                
    output_array = []
    for file_id in file1_ids:
        if file_id in file2_data:
            col2, col3 = file2_data[file_id]
            output_array.append(0 if col2>col3 else 1)
                
    return output_array

***
## Accuracies on clean dataset

In [None]:
pred_labels_clean_resnet = pred_probabilities(file2_path=probs_resnet_clean_csv)
pred_labels_clean_LCNN = pred_probabilities(file2_path=probs_LCNN_clean_csv)
pred_labels_clean_SeNet = pred_probabilities(file2_path=probs_SENet_clean_csv)

In [None]:
GT_labels = df_eval.iloc[:,-1].tolist()
print(len(GT_labels))

***
## Unbalanced accuracies on clean dataset

In [None]:
UA_resnet_clean = accuracy_score(y_true=GT_labels, y_pred=pred_labels_clean_resnet)
UA_LCNN_clean = accuracy_score(y_true=GT_labels, y_pred=pred_labels_clean_LCNN)
UA_SeNet_clean = accuracy_score(y_true=GT_labels, y_pred=pred_labels_clean_SeNet)

print(f'The unb.acc. for clean dataset for ResNet is {UA_resnet_clean*100:.2f}%\n'
      f'The unb.acc. for clean dataset for LCNN is {UA_LCNN_clean*100:.2f}%\n'
      f'The unb.acc. for clean dataset for SENet is {UA_SeNet_clean*100:.2f}%\n')

***
## Balanced accuracy on clean dataset

In [None]:
BA_resnet_clean = balanced_accuracy_score(y_true=GT_labels, y_pred=pred_labels_clean_resnet)
BA_LCNN_clean = balanced_accuracy_score(y_true=GT_labels, y_pred=pred_labels_clean_LCNN)
BA_SeNet_clean = balanced_accuracy_score(y_true=GT_labels, y_pred=pred_labels_clean_SeNet)

print(f'The bal.acc. for clean dataset for ResNet is {BA_resnet_clean*100:.2f}%\n'
      f'The bal.acc. for clean dataset for LCNN is {BA_LCNN_clean*100:.2f}%\n'
      f'The bal.acc. for clean dataset for SENet is {BA_SeNet_clean*100:.2f}%\n')

***
## Balanced accuracies of LCNN on FGSM on ResNet 


In [None]:
def compute_and_print_bal_accuracy(values, model, attack, at_model):
    for i in values:
        epsilon_str = str(i).replace('.', 'dot')
        csv_file = f'../eval/prob_{model}_{attack}_{at_model}_{epsilon_str}.csv'
        #csv = pd.read_csv(csv_file, header=0, engine='python')
        probs = pred_probabilities(file2_path=csv_file)
        BA = balanced_accuracy_score(y_true=GT_labels, y_pred=probs)
        
        print(f'Balanced accuracy for {model} on {attack} {at_model} for eps={i} is {BA*100:.2f}%')
    

In [None]:
compute_and_print_bal_accuracy([0.2, 0.4, 0.6, 0.8, 1.0, 2.0, 3.0], 'LCNN', 'FGSM', 'ResNet')