In [2]:
from src.utils import *
import sys
from tqdm import tqdm
import csv
import re
from sklearn.metrics import accuracy_score, balanced_accuracy_score

In [3]:
# Function to extract the numeric part of the filename (after the last underscore and before the file extension)
def extract_numeric_part(path):
    # This regex extracts the number after the last underscore and before the file extension (.flac)
    match = re.search(r'_(\d+)\.flac$', path)
    if match:
        return match.group(1)  # Return only the numeric part
    return None  # Return None if the pattern doesn't match


def pred_and_labels_clean(file1_csv, file2_csv):
    '''
    file1: probabilities list .csv
    file2: eval dataset .csv
    '''
    
    prediction_list = []
    label_list = []
    
    # Step 1: Read file2 into a dictionary for quick look-up based on the numeric part of the file path
    file2_dict = {}
    with open(file2_csv, mode='r') as file2:
        reader = csv.DictReader(file2)
        for row in reader:
            numeric_file2 = extract_numeric_part(row['path'])
            if numeric_file2:
                file2_dict[numeric_file2] = row['label']

    # Step 2: Traverse file1 and check against file2 dictionary, using tqdm for progress tracking
    with open(file1_csv, mode='r') as file1:
        reader = csv.DictReader(file1)
        total_rows = sum(1 for _ in open(file1_csv)) - 1  # Calculate total rows for the progress bar (excluding header)
    
        file1.seek(0)  # Reset the reader position back to the start of the file after counting
        for row in reader:
            # Extract the numeric part from file1's path
            file1_path = row['Filename']
            numeric_file1 = extract_numeric_part(file1_path)
        
            # Step 3: Check if the numeric part exists in the file2 dictionary
            if numeric_file1 and numeric_file1 in file2_dict:
                pred_class_0 = float(row['Pred.class 0'])
                pred_class_1 = float(row['Pred.class 1'])
            
                # Step 4: Append prediction based on comparison
                prediction_list.append(0 if pred_class_0 > pred_class_1 else 1)
            
                # Step 5: Append the corresponding label from file2
                label_list.append(int(file2_dict[numeric_file1]))

    
    return prediction_list, label_list




def pred_and_labels_attack(file1_csv, file2_csv):
    
    def extract_numeric_part(path):
        # This regex captures the numeric part following 'LA_E_' and stops at the next underscore or period
        match = re.search(r'LA_E_(\d+)', path)
        if match:
            return match.group(1)  # Return only the numeric part
        return None  # Return None if the pattern doesn't match
    
    prediction_list = []
    label_list = []
    
    # Step 1: Read file2 into a dictionary for quick look-up based on the numeric part of the file path
    file2_dict = {}
    with open(file2_csv, mode='r') as file2:
        reader = csv.DictReader(file2)
        for row in reader:
            numeric_file2 = extract_numeric_part(row['path'])
            if numeric_file2:
                file2_dict[numeric_file2] = row['label']

    # Step 2: Traverse file1 and check against file2 dictionary, using tqdm for progress tracking
    with open(file1_csv, mode='r') as file1:
        reader = csv.DictReader(file1)
        total_rows = sum(1 for _ in open(file1_csv)) - 1  # Calculate total rows for the progress bar (excluding header)
    
        file1.seek(0)  # Reset the reader position back to the start of the file after counting
        for row in reader:
            # Extract the numeric part from file1's path
            file1_path = row['Filename']
            numeric_file1 = extract_numeric_part(file1_path)
        
            # Step 3: Check if the numeric part exists in the file2 dictionary
            if numeric_file1 and numeric_file1 in file2_dict:
                pred_class_0 = float(row['Pred.class 0'])
                pred_class_1 = float(row['Pred.class 1'])
            
                # Step 4: Append prediction based on comparison
                prediction_list.append(0 if pred_class_0 > pred_class_1 else 1)
            
                # Step 5: Append the corresponding label from file2
                label_list.append(int(file2_dict[numeric_file1]))

    
    return prediction_list, label_list




In [4]:
def eval_clean(eval_model, model_version, type_of_spec, feature, dataset):
    
    script_dir = os.getcwd()  # get directory of current script
    print('Evaluating clean dataset...')
    probs_csv = f'probs_{eval_model}_{model_version}_clean_{dataset}_{type_of_spec}_{feature}.csv'
    #probs = pd.read_csv(os.path.join(script_dir, probs_csv), header=0, engine='python')
    
    if dataset == '3s':
        eval_csv = os.path.join(os.path.dirname(script_dir), 'data', 'df_eval_19_3s.csv' )
    else:
        eval_csv = os.path.join(os.path.dirname(script_dir), 'data', 'df_eval_19.csv' )


    pred_labels, GT_labels = pred_and_labels_clean(file1_csv=probs_csv, file2_csv=eval_csv)
    
    print(len(pred_labels))
    print(len(GT_labels))

    # UNBALANCED ACCURACY
    UA = accuracy_score(y_true=GT_labels, y_pred=pred_labels)
    BA = balanced_accuracy_score(y_true=GT_labels, y_pred=pred_labels)

    print(f'Eval model: {eval_model} {model_version}, clean dataset: {dataset}, feature = {feature} --> UA = {UA*100:.2f}%, BA = {BA*100:.2f}% ')
    

In [5]:
def eval_attack(attack, eval_model, attack_model, model_version, type_of_spec, feature, dataset, epsilon, q_res, q_sen):
    
    epsilon_str = str(epsilon).replace('.', 'dot')
    script_dir = os.getcwd()  # get directory of current script
    
    if attack != 'Ensemble' and attack != None:
        probs_csv = f'probs_{eval_model}_{model_version}_{attack}_{attack_model}_{dataset}_{epsilon_str}_{type_of_spec}_{feature}.csv'
        probs = pd.read_csv(os.path.join(script_dir, probs_csv), header=0, engine='python')
    elif attack == 'Ensemble':
        probs_csv = f'probs_{eval_model}_{model_version}_Ensemble_{dataset}_{q_res}_{q_sen}_{epsilon_str}_{type_of_spec}_{feature}.csv'
        probs = pd.read_csv(os.path.join(script_dir, probs_csv), header=0, engine='python')

    # GT labels
    if dataset == '3s':
        eval_csv = os.path.join(os.path.dirname(script_dir), 'data', 'df_eval_19_3s.csv' )
    else:
        eval_csv = os.path.join(os.path.dirname(script_dir), 'data', 'df_eval_19.csv' )
        

    pred_labels, GT_labels = pred_and_labels_attack(file1_csv=probs_csv, file2_csv=eval_csv)
    
    #print(len(pred_labels))
    #print(len(GT_labels))

    # UNBALANCED ACCURACY
    UA = accuracy_score(y_true=GT_labels, y_pred=pred_labels)
    BA = balanced_accuracy_score(y_true=GT_labels, y_pred=pred_labels)
    
    if attack != 'Ensemble':    
        print(f'Eval model: {eval_model} {model_version}, attack: {attack}, attack model: {attack_model} {model_version}, dataset: {dataset}, eps={epsilon}, feature = {feature} --> UA = {UA*100:.2f}%, BA = {BA*100:.2f}% ')
    elif attack == 'Ensemble':
        print(f'Eval model: {eval_model} {model_version}, attack: Ensemble, q_res = {q_res}, q_sen = {q_sen}, dataset: {dataset}, eps={epsilon}, feature = {feature} --> UA = {UA*100:.2f}%, BA = {BA*100:.2f}% ')

In [10]:
def ASR(attack, eval_model, attack_model, model_version, type_of_spec, feature, dataset, epsilon, q_res, q_sen):
    epsilon_str = str(epsilon).replace('.', 'dot')
    script_dir = os.getcwd()  # get directory of current script
    
    if attack != 'Ensemble' and attack != 'Ensemble1D' and attack != None:
        probs_csv = f'probs_{eval_model}_{model_version}_{attack}_{attack_model}_{dataset}_{epsilon_str}_{type_of_spec}_{feature}.csv'
        probs = pd.read_csv(os.path.join(script_dir, probs_csv), header=0, engine='python')
    elif attack == 'Ensemble' or attack == 'Ensemble1D':
        probs_csv = f'probs_{eval_model}_{model_version}_Ensemble_{dataset}_{q_res}_{q_sen}_{epsilon_str}_{type_of_spec}_{feature}.csv'
        probs = pd.read_csv(os.path.join(script_dir, probs_csv), header=0, engine='python')

    # GT labels
    if dataset == '3s':
        eval_csv = os.path.join(os.path.dirname(script_dir), 'data', 'df_eval_19_3s.csv' )
    else:
        eval_csv = os.path.join(os.path.dirname(script_dir), 'data', 'df_eval_19.csv' )
        

    pred_labels, GT_labels = pred_and_labels_attack(file1_csv=probs_csv, file2_csv=eval_csv)
    pred_labels = np.array(pred_labels)
    GT_labels = np.array(GT_labels)
    
    assert len(pred_labels) == len(GT_labels), print('ouch, different lengths')
  
    succesful_attacks = np.sum(pred_labels != GT_labels)
    print(succesful_attacks)
    total_samples = len(pred_labels)
    ASR = (succesful_attacks/total_samples)*100
    
    if attack != 'Ensemble':    
        print(f'Eval model: {eval_model} {model_version}, attack: {attack}, attack model: {attack_model} {model_version}, dataset: {dataset}, eps={epsilon}, feature = {feature} --> ASR = {ASR:.2f}%')
    elif attack == 'Ensemble':
        print(f'Eval model: {eval_model} {model_version}, attack: Ensemble, q_res = {q_res}, q_sen = {q_sen}, dataset: {dataset}, eps={epsilon}, feature = {feature} --> ASR = {ASR:.2f}%')

***
## Clean dataset with magnitude

In [13]:
eval_clean(eval_model='ResNet', model_version='v0', type_of_spec='mag', feature='audio', dataset='whole')

Evaluating clean dataset...
71237
71237
Eval model: ResNet v0, clean dataset: whole, feature = audio --> UA = 89.45%, BA = 49.89% 


***
## Clean dataset SENet and ResNet

In [5]:
eval_clean(eval_model='ResNet', model_version='v0', type_of_spec='pow', feature='audio', dataset='whole')

Evaluating clean dataset...
71237
71237
Eval model: ResNet v0, clean dataset: whole, feature = audio --> UA = 89.45%, BA = 49.89% 


In [16]:
eval_clean(eval_model='SENet', model_version='v0', type_of_spec='pow', feature='audio', dataset='whole')

Evaluating clean dataset...
71237
71237
Eval model: SENet v0, clean dataset: whole, feature = audio --> UA = 70.80%, BA = 82.62% 


***
## Whole dataset without normalization

In [11]:
eval_attack(attack='FGSM', eval_model='ResNet', attack_model='ResNet', model_version='v0', type_of_spec='pow', feature='audio', dataset='whole', epsilon=3.0, q_res=None, q_sen=None)
eval_attack(attack='FGSM', eval_model='ResNet', attack_model='ResNet', model_version='v0', type_of_spec='pow', feature='spec', dataset='whole', epsilon=3.0, q_res=None, q_sen=None)

Eval model: ResNet v0, attack: FGSM, attack model: ResNet v0, dataset: whole, eps=3.0, feature = audio --> UA = 1.39%, BA = 0.82% 
Eval model: ResNet v0, attack: FGSM, attack model: ResNet v0, dataset: whole, eps=3.0, feature = spec --> UA = 0.00%, BA = 0.00% 


In [12]:
eval_attack(attack='FGSM', eval_model='SENet', attack_model='SENet', model_version='v0', type_of_spec='pow', feature='audio', dataset='whole', epsilon=3.0, q_res=None, q_sen=None)
eval_attack(attack='FGSM', eval_model='SENet', attack_model='SENet', model_version='v0', type_of_spec='pow', feature='spec', dataset='whole', epsilon=3.0, q_res=None, q_sen=None)

Eval model: SENet v0, attack: FGSM, attack model: SENet v0, dataset: whole, eps=3.0, feature = audio --> UA = 13.50%, BA = 25.57% 
Eval model: SENet v0, attack: FGSM, attack model: SENet v0, dataset: whole, eps=3.0, feature = spec --> UA = 9.24%, BA = 5.15% 


***
## Whole dataset with normalization

In [32]:
ASR(attack='FGSM', eval_model='ResNet', attack_model='ResNet', model_version='v0', type_of_spec='pow', feature='audio', dataset='whole', epsilon=3.0, q_res=None, q_sen=None)
ASR(attack='FGSM', eval_model='ResNet', attack_model='ResNet', model_version='v0', type_of_spec='pow', feature='spec', dataset='whole', epsilon=3.0, q_res=None, q_sen=None)

70250
Eval model: ResNet v0, attack: FGSM, attack model: ResNet v0, dataset: whole, eps=3.0, feature = audio --> ASR = 98.61%
71237
Eval model: ResNet v0, attack: FGSM, attack model: ResNet v0, dataset: whole, eps=3.0, feature = spec --> ASR = 100.00%


In [35]:
ASR(attack='FGSM', eval_model='SENet', attack_model='SENet', model_version='v0', type_of_spec='pow', feature='audio', dataset='whole', epsilon=3.0, q_res=None, q_sen=None)
ASR(attack='FGSM', eval_model='SENet', attack_model='SENet', model_version='v0', type_of_spec='pow', feature='spec', dataset='whole', epsilon=3.0, q_res=None, q_sen=None)

53669
Eval model: SENet v0, attack: FGSM, attack model: SENet v0, dataset: whole, eps=3.0, feature = audio --> ASR = 75.34%
64663
Eval model: SENet v0, attack: FGSM, attack model: SENet v0, dataset: whole, eps=3.0, feature = spec --> ASR = 90.77%


***
## Transfer FGSM ResNet and SENet

In [20]:
ASR(attack='FGSM', eval_model='SENet', attack_model='ResNet', model_version='v0', type_of_spec='pow', feature='audio', dataset='whole', epsilon=3.0, q_res=None, q_sen=None)
ASR(attack='FGSM', eval_model='ResNet', attack_model='SENet', model_version='v0', type_of_spec='pow', feature='audio', dataset='whole', epsilon=3.0, q_res=None, q_sen=None)

29522
Eval model: SENet v0, attack: FGSM, attack model: ResNet v0, dataset: whole, eps=3.0, feature = audio --> ASR = 41.44%
12945
Eval model: ResNet v0, attack: FGSM, attack model: SENet v0, dataset: whole, eps=3.0, feature = audio --> ASR = 18.17%


***
## ResNet1D on clean whole dataset

In [7]:
eval_clean(eval_model='ResNet1D', model_version='v0', type_of_spec='pow', feature='audio', dataset='whole')

Evaluating clean dataset...
71237
71237
Eval model: ResNet1D v0, clean dataset: whole, feature = audio --> UA = 93.32%, BA = 91.72% 


***
## ResNet1D BIM attack

In [7]:
eval_attack(attack='BIM', eval_model='ResNet1D', attack_model='ResNet1D', model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=None, q_res=None, q_sen=None)

Eval model: ResNet1D v0, attack: BIM, attack model: ResNet1D v0, dataset: 3s, eps=None, feature = audio --> UA = 31.61%, BA = 18.32% 


In [10]:
eval_attack(attack='BIM', eval_model='ResNet', attack_model='ResNet1D', model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=None, q_res=None, q_sen=None)

Eval model: ResNet v0, attack: BIM, attack model: ResNet1D v0, dataset: 3s, eps=None, feature = audio --> UA = 3.45%, BA = 2.00% 


In [12]:
eval_attack(attack='BIM', eval_model='SENet', attack_model='ResNet1D', model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=None, q_res=None, q_sen=None)

Eval model: SENet v0, attack: BIM, attack model: ResNet1D v0, dataset: 3s, eps=None, feature = audio --> UA = 47.14%, BA = 66.51% 


In [9]:
eval_attack(attack='BIM', eval_model='Rawnet', attack_model='ResNet1D', model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=None, q_res=None, q_sen=None)

Eval model: Rawnet v0, attack: BIM, attack model: ResNet1D v0, dataset: 3s, eps=None, feature = audio --> UA = 95.75%, BA = 93.39% 


***
## ASR of BIM on all models

In [44]:
ASR(attack='BIM', eval_model='ResNet', attack_model='ResNet1D', model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=None, q_res=None, q_sen=None)

30403
Eval model: ResNet v0, attack: BIM, attack model: ResNet1D v0, dataset: 3s, eps=None, feature = audio --> ASR = 96.55%


In [45]:
ASR(attack='BIM', eval_model='SENet', attack_model='ResNet1D', model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=None, q_res=None, q_sen=None)

16644
Eval model: SENet v0, attack: BIM, attack model: ResNet1D v0, dataset: 3s, eps=None, feature = audio --> ASR = 52.86%


In [31]:
ASR(attack='BIM', eval_model='ResNet1D', attack_model='ResNet1D', model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=None, q_res=None, q_sen=None)

21536
Eval model: ResNet1D v0, attack: BIM, attack model: ResNet1D v0, dataset: 3s, eps=None, feature = audio --> ASR = 68.39%


In [30]:
ASR(attack='BIM', eval_model='Rawnet', attack_model='ResNet1D', model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=None, q_res=None, q_sen=None)

1339
Eval model: Rawnet v0, attack: BIM, attack model: ResNet1D v0, dataset: 3s, eps=None, feature = audio --> ASR = 4.25%


***
## ASR of Ensemble 2D on all models

In [39]:
ASR(attack='Ensemble', eval_model='ResNet', attack_model=None, model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=3.0, q_res=10, q_sen=10)
ASR(attack='Ensemble', eval_model='ResNet', attack_model=None, model_version='v0', type_of_spec='pow', feature='spec', dataset='3s', epsilon=3.0, q_res=10, q_sen=10)

27560
Eval model: ResNet v0, attack: Ensemble, q_res = 10, q_sen = 10, dataset: 3s, eps=3.0, feature = audio --> ASR = 87.53%
29165
Eval model: ResNet v0, attack: Ensemble, q_res = 10, q_sen = 10, dataset: 3s, eps=3.0, feature = spec --> ASR = 92.62%


In [40]:
ASR(attack='Ensemble', eval_model='SENet', attack_model=None, model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=3.0, q_res=10, q_sen=10)
ASR(attack='Ensemble', eval_model='SENet', attack_model=None, model_version='v0', type_of_spec='pow', feature='spec', dataset='3s', epsilon=3.0, q_res=10, q_sen=10)

22018
Eval model: SENet v0, attack: Ensemble, q_res = 10, q_sen = 10, dataset: 3s, eps=3.0, feature = audio --> ASR = 69.93%
29180
Eval model: SENet v0, attack: Ensemble, q_res = 10, q_sen = 10, dataset: 3s, eps=3.0, feature = spec --> ASR = 92.67%


***
## ASR of ensemble 1D on all models

In [12]:
ASR(attack='Ensemble1D', eval_model='ResNet1D', attack_model=None, model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=3.0, q_res=10, q_sen=10)

9683
Eval model: ResNet1D v0, attack: Ensemble1D, attack model: None v0, dataset: 3s, eps=3.0, feature = audio --> ASR = 30.75%


In [13]:
ASR(attack='Ensemble1D', eval_model='Rawnet', attack_model=None, model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=3.0, q_res=10, q_sen=10)

3797
Eval model: Rawnet v0, attack: Ensemble1D, attack model: None v0, dataset: 3s, eps=3.0, feature = audio --> ASR = 12.06%


In [14]:
ASR(attack='Ensemble1D', eval_model='ResNet', attack_model=None, model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=3.0, q_res=10, q_sen=10)

27560
Eval model: ResNet v0, attack: Ensemble1D, attack model: None v0, dataset: 3s, eps=3.0, feature = audio --> ASR = 87.53%


In [15]:
ASR(attack='Ensemble1D', eval_model='SENet', attack_model=None, model_version='v0', type_of_spec='pow', feature='audio', dataset='3s', epsilon=3.0, q_res=10, q_sen=10)

22018
Eval model: SENet v0, attack: Ensemble1D, attack model: None v0, dataset: 3s, eps=3.0, feature = audio --> ASR = 69.93%


***
## ResNet1D reaction to FGSM on ResNet and SENet

In [21]:
ASR(attack='FGSM', eval_model='ResNet1D', attack_model='ResNet', model_version='v0', type_of_spec='pow', feature='audio', dataset='whole', epsilon=3.0, q_res=10, q_sen=10)

20868
Eval model: ResNet1D v0, attack: FGSM, attack model: ResNet v0, dataset: whole, eps=3.0, feature = audio --> ASR = 29.29%
