***
# EVALUATIONS 

In [1]:
from src.utils import *
import sys
import csv
import re
from sklearn.metrics import accuracy_score, balanced_accuracy_score, roc_curve, auc

In [11]:
# Function to extract the file number from a given filename
def extract_file_number(filename):
    match = re.search(r"LA_E_(\d+)", filename)
    if match:
        return match.group(1)
    return None

def GT_and_predicted_labels(file2_path):
    # Read file 1 and store the file number and label in a dictionary
    file1_dict = {}
    with open('../data/df_eval_19.csv', 'r') as file1:
        reader = csv.reader(file1)
        next(reader)  # Skip header
        for row in reader:
            # Extract file number from the path (assuming path contains 'LA_E_xxxxxxx.flac')
            match = re.search(r"LA_E_(\d+)", row[1])
            if match:
                file_number = match.group(1)
                label = int(row[2])  # Convert label to integer
                file1_dict[file_number] = label  # Store file number and corresponding label
    
    # Initialize arrays for predicted classes and true labels
    predicted_classes = []
    true_labels = []
    
    # Read file 2 and process the data
    with open(file2_path, 'r') as file2:
        reader = csv.reader(file2)
        next(reader)  # Skip header
        for row in reader:
            filename = row[0]  # Filename is in the first column
            pred_class_0 = float(row[1])  # Prediction for class 0
            pred_class_1 = float(row[2])  # Prediction for class 1
        
            # Determine predicted class: 0 if Pred.class 0 > Pred.class 1, else 1
            if pred_class_0 > pred_class_1:
                predicted_classes.append(0)
            else:
                predicted_classes.append(1)

            # Extract the file number
            file_number = extract_file_number(filename)

            # Get the true label from file1_dict if the file number exists
            if file_number in file1_dict:
                true_labels.append(file1_dict[file_number])
            else:
                print(file_number)
                true_labels.append(None)  # If no matching file number is found in file 1
    return predicted_classes, true_labels


In [9]:
def eval_attack(attack, eval_model, attack_model, model_version, type_of_spec, feature, dataset, epsilon, q_res, q_sen):
    
    epsilon_str = str(epsilon).replace('.', 'dot')
    script_dir = os.getcwd()  # get directory of current script
    
    if attack != 'Ensemble' and attack != None:
        probs_csv = f'probs_{eval_model}_{model_version}_{attack}_{attack_model}_{dataset}_{epsilon_str}_{type_of_spec}_{feature}.csv'
        probs = pd.read_csv(os.path.join(script_dir, probs_csv), header=0, engine='python')
    elif attack == 'Ensemble':
        probs_csv = f'probs_{eval_model}_{model_version}_Ensemble_{dataset}_{q_res}_{q_sen}_{epsilon_str}_{type_of_spec}_{feature}.csv'
        probs = pd.read_csv(os.path.join(script_dir, probs_csv), header=0, engine='python')
    elif attack == None:
        print('Evaluating clean dataset...')
        probs_csv = f'probs_{eval_model}_{model_version}_clean_{dataset}_{type_of_spec}_{feature}.csv'
        probs = pd.read_csv(os.path.join(script_dir, probs_csv), header=0, engine='python')
        
        #print(f'The probability list is {len(probs)} samples long')
    # GT labels
    if dataset == '3s':
        eval_csv = os.path.join(os.path.dirname(script_dir), 'data', 'df_eval_19_3s.csv' )
    else:
        eval_csv = os.path.join(os.path.dirname(script_dir), 'data', 'df_eval_19.csv' )
        
    df_eval = pd.read_csv(eval_csv)
    GT_labels = df_eval.iloc[:, -1].tolist()

    pred_labels, GT_labels = GT_and_predicted_labels(file2_path=probs_csv)
    print(len(pred_labels))
    print(len(GT_labels))
    print(pred_labels)
    print(GT_labels)
    
    # UNBALANCED ACCURACY
    UA = accuracy_score(y_true=GT_labels, y_pred=pred_labels)
    BA = balanced_accuracy_score(y_true=GT_labels, y_pred=pred_labels)
    
    if attack != 'Ensemble':    
        print(f'Eval model: {eval_model} {model_version}, attack: {attack}, attack model: {attack_model} {model_version}, dataset: {dataset}, eps={epsilon}, feature = {feature} --> UA = {UA*100:.2f}%, BA = {BA*100:.2f}% ')
    elif attack == 'Ensemble':
        print(f'Eval model: {eval_model} {model_version}, attack: Ensemble, q_res = {q_res}, q_sen = {q_sen}, dataset: {dataset}, eps={epsilon}, feature = {feature} --> UA = {UA*100:.2f}%, BA = {BA*100:.2f}% ')
        
    
    

In [5]:
eval_attack(attack='FGSM', eval_model='ResNet', attack_model='ResNet', model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=0.0, q_res=None, q_sen=None)
eval_attack(attack='FGSM', eval_model='ResNet', attack_model='ResNet', model_version='v0', type_of_spec='pow', feature='spec', dataset='3s', epsilon=0.0, q_res=None, q_sen=None)
eval_attack(attack='FGSM', eval_model='SENet', attack_model='SENet', model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=0.0, q_res=None, q_sen=None)
eval_attack(attack='FGSM', eval_model='SENet', attack_model='SENet', model_version='v0', type_of_spec='pow', feature='spec', dataset='3s', epsilon=0.0, q_res=None, q_sen=None)

Eval model: ResNet v0, attack: FGSM, attack model: ResNet v0, dataset: 3s, eps=0.0, feature = audio --> UA = 79.10%, BA = 86.86% 
Eval model: ResNet v0, attack: FGSM, attack model: ResNet v0, dataset: 3s, eps=0.0, feature = spec --> UA = 79.81%, BA = 87.11% 
Eval model: SENet v0, attack: FGSM, attack model: SENet v0, dataset: 3s, eps=0.0, feature = audio --> UA = 67.40%, BA = 79.81% 
Eval model: SENet v0, attack: FGSM, attack model: SENet v0, dataset: 3s, eps=0.0, feature = spec --> UA = 69.59%, BA = 80.96% 


In [6]:
eval_attack(attack='FGSM', eval_model='ResNet', attack_model='ResNet', model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=3.0, q_res=None, q_sen=None)
eval_attack(attack='FGSM', eval_model='ResNet', attack_model='ResNet', model_version='v0', type_of_spec='pow', feature='spec', dataset='3s', epsilon=3.0, q_res=None, q_sen=None)
eval_attack(attack='FGSM', eval_model='SENet', attack_model='SENet', model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=3.0, q_res=None, q_sen=None)
eval_attack(attack='FGSM', eval_model='SENet', attack_model='SENet', model_version='v0', type_of_spec='pow', feature='spec', dataset='3s', epsilon=3.0, q_res=None, q_sen=None)

Eval model: ResNet v0, attack: FGSM, attack model: ResNet v0, dataset: 3s, eps=3.0, feature = audio --> UA = 0.67%, BA = 0.39% 
Eval model: ResNet v0, attack: FGSM, attack model: ResNet v0, dataset: 3s, eps=3.0, feature = spec --> UA = 0.00%, BA = 0.00% 
Eval model: SENet v0, attack: FGSM, attack model: SENet v0, dataset: 3s, eps=3.0, feature = audio --> UA = 21.91%, BA = 23.66% 
Eval model: SENet v0, attack: FGSM, attack model: SENet v0, dataset: 3s, eps=3.0, feature = spec --> UA = 8.69%, BA = 5.04% 


In [7]:
eval_attack(attack='FGSM', eval_model='Rawnet', attack_model='ResNet', model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=0.0, q_res=None, q_sen=None)
eval_attack(attack='FGSM', eval_model='Rawnet', attack_model='ResNet', model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=3.0, q_res=None, q_sen=None)
eval_attack(attack='FGSM', eval_model='Rawnet', attack_model='SENet', model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=3.0, q_res=None, q_sen=None)

Eval model: Rawnet v0, attack: FGSM, attack model: ResNet v0, dataset: 3s, eps=0.0, feature = audio --> UA = 88.50%, BA = 59.99% 
Eval model: Rawnet v0, attack: FGSM, attack model: ResNet v0, dataset: 3s, eps=3.0, feature = audio --> UA = 87.86%, BA = 58.05% 
Eval model: Rawnet v0, attack: FGSM, attack model: SENet v0, dataset: 3s, eps=3.0, feature = audio --> UA = 88.11%, BA = 58.90% 


In [8]:
eval_attack(attack='Ensemble', eval_model='ResNet', attack_model=None, model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=3.0, q_res=10, q_sen=10)
eval_attack(attack='Ensemble', eval_model='ResNet', attack_model=None, model_version='v0', type_of_spec='pow', feature='spec', dataset='3s', epsilon=3.0, q_res=10, q_sen=10)
eval_attack(attack='Ensemble', eval_model='SENet', attack_model=None, model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=3.0, q_res=10, q_sen=10)
eval_attack(attack='Ensemble', eval_model='SENet', attack_model=None, model_version='v0', type_of_spec='pow', feature='spec', dataset='3s', epsilon=3.0, q_res=10, q_sen=10)

Eval model: ResNet v0, attack: Ensemble, q_res = 10, q_sen = 10, dataset: 3s, eps=3.0, feature = audio --> UA = 12.47%, BA = 7.75% 
Eval model: ResNet v0, attack: Ensemble, q_res = 10, q_sen = 10, dataset: 3s, eps=3.0, feature = spec --> UA = 7.38%, BA = 4.28% 
Eval model: SENet v0, attack: Ensemble, q_res = 10, q_sen = 10, dataset: 3s, eps=3.0, feature = audio --> UA = 30.07%, BA = 45.12% 
Eval model: SENet v0, attack: Ensemble, q_res = 10, q_sen = 10, dataset: 3s, eps=3.0, feature = spec --> UA = 7.33%, BA = 5.72% 


In [9]:
eval_attack(attack='Ensemble', eval_model='Rawnet', attack_model=None, model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=3.0, q_res=10, q_sen=10)

Eval model: Rawnet v0, attack: Ensemble, q_res = 10, q_sen = 10, dataset: 3s, eps=3.0, feature = audio --> UA = 87.94%, BA = 58.44% 


In [10]:
eval_attack(attack='FGSM', eval_model='ResNet', attack_model='SENet', model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=3.0, q_res=None, q_sen=None)
eval_attack(attack='FGSM', eval_model='ResNet', attack_model='SENet', model_version='v0', type_of_spec='pow', feature='spec', dataset='3s', epsilon=3.0, q_res=None, q_sen=None)
eval_attack(attack='FGSM', eval_model='SENet', attack_model='ResNet', model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=3.0, q_res=None, q_sen=None)
eval_attack(attack='FGSM', eval_model='SENet', attack_model='ResNet', model_version='v0', type_of_spec='pow', feature='spec', dataset='3s', epsilon=3.0, q_res=None, q_sen=None)

Eval model: ResNet v0, attack: FGSM, attack model: SENet v0, dataset: 3s, eps=3.0, feature = audio --> UA = 79.01%, BA = 86.42% 
Eval model: ResNet v0, attack: FGSM, attack model: SENet v0, dataset: 3s, eps=3.0, feature = spec --> UA = 77.19%, BA = 85.36% 
Eval model: SENet v0, attack: FGSM, attack model: ResNet v0, dataset: 3s, eps=3.0, feature = audio --> UA = 57.09%, BA = 72.92% 
Eval model: SENet v0, attack: FGSM, attack model: ResNet v0, dataset: 3s, eps=3.0, feature = spec --> UA = 60.32%, BA = 74.67% 


In [5]:
eval_attack(attack='Ensemble', eval_model='ResNet', attack_model=None, model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=3.0, q_res=30, q_sen=30)
eval_attack(attack='Ensemble', eval_model='ResNet', attack_model=None, model_version='v0', type_of_spec='pow', feature='spec', dataset='3s', epsilon=3.0, q_res=30, q_sen=30)
eval_attack(attack='Ensemble', eval_model='SENet', attack_model=None, model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=3.0, q_res=30, q_sen=30)
eval_attack(attack='Ensemble', eval_model='SENet', attack_model=None, model_version='v0', type_of_spec='pow', feature='spec', dataset='3s', epsilon=3.0, q_res=30, q_sen=30)

Eval model: ResNet v0, attack: Ensemble, q_res = 30, q_sen = 30, dataset: 3s, eps=3.0, feature = audio --> UA = 27.36%, BA = 16.15% 
Eval model: ResNet v0, attack: Ensemble, q_res = 30, q_sen = 30, dataset: 3s, eps=3.0, feature = spec --> UA = 25.41%, BA = 14.74% 
Eval model: SENet v0, attack: Ensemble, q_res = 30, q_sen = 30, dataset: 3s, eps=3.0, feature = audio --> UA = 32.73%, BA = 48.60% 
Eval model: SENet v0, attack: Ensemble, q_res = 30, q_sen = 30, dataset: 3s, eps=3.0, feature = spec --> UA = 18.83%, BA = 20.54% 


In [6]:
eval_attack(attack='Ensemble', eval_model='Rawnet', attack_model=None, model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=3.0, q_res=30, q_sen=30)

Eval model: Rawnet v0, attack: Ensemble, q_res = 30, q_sen = 30, dataset: 3s, eps=3.0, feature = audio --> UA = 87.95%, BA = 58.40% 


In [7]:
eval_attack(attack='FGSM', eval_model='ResNet1D', attack_model='ResNet', model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=0.0, q_res=30, q_sen=30)
eval_attack(attack='FGSM', eval_model='ResNet1D', attack_model='ResNet', model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=3.0, q_res=None, q_sen=None)
eval_attack(attack='FGSM', eval_model='ResNet1D', attack_model='SENet', model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=3.0, q_res=None, q_sen=None)
eval_attack(attack='Ensemble', eval_model='ResNet1D', attack_model=None, model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=3.0, q_res=10, q_sen=10)
eval_attack(attack='Ensemble', eval_model='ResNet1D', attack_model=None, model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=3.0, q_res=30, q_sen=30)

Eval model: ResNet1D v0, attack: FGSM, attack model: ResNet v0, dataset: 3s, eps=0.0, feature = audio --> UA = 85.70%, BA = 88.48% 
Eval model: ResNet1D v0, attack: FGSM, attack model: ResNet v0, dataset: 3s, eps=3.0, feature = audio --> UA = 63.20%, BA = 41.48% 
Eval model: ResNet1D v0, attack: FGSM, attack model: SENet v0, dataset: 3s, eps=3.0, feature = audio --> UA = 85.04%, BA = 87.66% 
Eval model: ResNet1D v0, attack: Ensemble, q_res = 10, q_sen = 10, dataset: 3s, eps=3.0, feature = audio --> UA = 69.25%, BA = 58.06% 
Eval model: ResNet1D v0, attack: Ensemble, q_res = 30, q_sen = 30, dataset: 3s, eps=3.0, feature = audio --> UA = 70.10%, BA = 51.33% 


In [12]:
eval_attack(attack=None, eval_model='ResNet', attack_model=None, model_version='v0', type_of_spec='mag', feature='audio', dataset='whole', epsilon=0.0, q_res=30, q_sen=30)

Evaluating clean dataset...
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
No

ValueError: Classification metrics can't handle a mix of unknown and binary targets