# 16/05/2024
Notebook for listening to the attacks etc.

In [8]:
import IPython.display as ipd
import numpy as np

from src.audio_utils import read_audio
from src.utils import *
from sp_utils import spectrogram_inversion, get_spectrogram_from_audio
from attacks_utils import retrieve_single_cached_spec, load_spec_model, evaluate_spec, get_pred_class

In [9]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [10]:
config_path = '../config/residualnet_train_config.yaml'
config = read_yaml(config_path)

rate = 16000

***
## Testing the audio pipeline
### Original phase available

In [11]:
index = 0
file, label, spec = retrieve_single_cached_spec(config, index)
audio, phase = spectrogram_inversion(config, index, spec, phase_info=True, phase_to_use=None)
ipd.Audio(audio, rate=rate)

Evaluating file /nas/public/dataset/asvspoof2021/ASVspoof2021_DF_eval/flac/DF_E_2000011.flac with label 1


### Original phase not available
SPSI + Griffin_Lim

In [12]:
file, label, spec = retrieve_single_cached_spec(config, index)
audio, phase = spectrogram_inversion(config, index, spec, phase_info=False, phase_to_use=None)
ipd.Audio(audio, rate=rate)

Evaluating file /nas/public/dataset/asvspoof2021/ASVspoof2021_DF_eval/flac/DF_E_2000011.flac with label 1


*** 
## Check the audios
Function for getting whatever type of audio file (evaluation or perturbed + the class label and the model prediction in all cases)

In [13]:
def get_audio_file(config, index, attack=False, epsilon=None):
    '''
    Get any audio file, either from the evaluation dataset or from the perturbed audio folders.
    Also check the model prediction on the audio file.
    :param config: path to config file
    :param index: index of audio wrt evaluation list
    :param attack: None, or 'FGSM' or 'FGSM_1'
    :param epsilon: epsilon value if attack is not None
    :return: audio file and its sample rate
    '''
    df_eval = pd.read_csv(os.path.join('..', config["df_eval_path"]))
    file_eval = list(df_eval['path'])
    label_eval = list(df_eval['label'])
    file = file_eval[index]
    label = label_eval[index]
    
    model = load_spec_model(device=device, config=config)
    model.eval()

    if attack:
        split_file = os.path.splitext(os.path.basename(file))[0]
        epsilon_str = str(epsilon).replace('.', 'dot')

        if attack == 'FGSM':
            folder = 'FGSM_data/p_audio'
        elif attack == 'FGSM_1':
            folder = 'FGSM_data/p_audio_1'

        p_file = os.path.join(folder, f'{attack}_{split_file}_{epsilon_str}.flac')
        spec = get_spectrogram_from_audio(p_file)
        out = evaluate_spec(spec, model, device)
        print(f'\n{out}')
        pred_class = get_pred_class(out)

        print(f'Got the perturbed AUDIO {p_file}.\nGT label = {label}.\nEspilon = {epsilon}.\nModel prediction is {pred_class}\n')
        X, sr = read_audio(p_file)

    else:
        spec = get_spectrogram_from_audio(file)
        out = evaluate_spec(spec, model, device)
        pred_class = get_pred_class(out)
        print(f'Got the original AUDIO {file}.\nGT label = {label}.\nEpsilon = {epsilon}\nModel prediction is {pred_class}\n')
        X, sr = read_audio(file)

    return X, sr

### Index 0, epsilon=0.5 (audio does not fool the model)

In [14]:
index = 0
epsilon = 0.5

X, sr =get_audio_file(config, index=index)
ipd.Audio(X, rate=sr)


Got the original AUDIO /nas/public/dataset/asvspoof2021/ASVspoof2021_DF_eval/flac/DF_E_2000011.flac.
GT label = 1.
Epsilon = None
Model prediction is 1


In [15]:
X_, sr_ =get_audio_file(config, index=index, attack='FGSM', epsilon=epsilon)
ipd.Audio(X_, rate=sr)


tensor([[-6.9490e+00, -9.6001e-04]], device='cuda:0',
       grad_fn=<LogSoftmaxBackward0>)
Got the perturbed AUDIO FGSM_data/p_audio/FGSM_DF_E_2000011_0dot5.flac.
GT label = 1.
Espilon = 0.5.
Model prediction is 1


In [16]:
X_, sr_ =get_audio_file(config, index=index, attack='FGSM_1', epsilon=epsilon)
ipd.Audio(X_, rate=sr)


tensor([[-6.4560e+00, -1.5723e-03]], device='cuda:0',
       grad_fn=<LogSoftmaxBackward0>)
Got the perturbed AUDIO FGSM_data/p_audio_1/FGSM_1_DF_E_2000011_0dot5.flac.
GT label = 1.
Espilon = 0.5.
Model prediction is 1


### Index 0, epsilon=1.0

In [17]:
index = 0
epsilon = 1.0

In [18]:
X_, sr_ =get_audio_file(config, index=index, attack='FGSM', epsilon=epsilon)
ipd.Audio(X_, rate=sr)


tensor([[-4.3991, -0.0124]], device='cuda:0', grad_fn=<LogSoftmaxBackward0>)
Got the perturbed AUDIO FGSM_data/p_audio/FGSM_DF_E_2000011_1dot0.flac.
GT label = 1.
Espilon = 1.0.
Model prediction is 1


In [19]:
X_, sr_ =get_audio_file(config, index=index, attack='FGSM_1', epsilon=epsilon)
ipd.Audio(X_, rate=sr)


tensor([[-4.4860, -0.0113]], device='cuda:0', grad_fn=<LogSoftmaxBackward0>)
Got the perturbed AUDIO FGSM_data/p_audio_1/FGSM_1_DF_E_2000011_1dot0.flac.
GT label = 1.
Espilon = 1.0.
Model prediction is 1


***
## Check the spectrograms

In [20]:
def get_spectrograms(config, index, attack=False, epsilon=None):
    '''
    Get the cached spectrogram, either clean or perturbed
    :param config: path to config file
    :param index: index of the file wrt evaluation dataset
    :param attack: None, or 'FGSM' or 'FGSM_1'
    :param epsilon: epsilon value
    :return: None
    '''
    model = load_spec_model(device=device, config=config)
    model.eval()
    
    if attack:
        df_eval = pd.read_csv(os.path.join('..', config["df_eval_path"]))
        file_eval = list(df_eval['path'])
        label_eval = list(df_eval['label'])
        file = file_eval[index]
        label = label_eval[index]
        
        split_file = os.path.splitext(os.path.basename(file))[0]
        epsilon_str = str(epsilon).replace('.', 'dot')
        
        if attack == 'FGSM':
            folder = 'FGSM_data/p_specs'
        elif attack == 'FGSM_1':
            folder = 'FGSM_data/p_specs_1'

        p_spec_path = os.path.join(folder, f'{attack}_{split_file}_{epsilon_str}.npy')
        p_spec = np.load(p_spec_path)
        out = evaluate_spec(p_spec, model, device)
        print(f'\n{out}')
        pred_class = get_pred_class(out)
        print(f'Got the perturbed spectrogram {p_spec_path}.\nGT label = {label}.\nEpsilon = {epsilon}\nModel prediction is {pred_class}')
    
    else:
        file, label, spec = retrieve_single_cached_spec(config, index)
        out = evaluate_spec(spec, model, device)
        pred_class = get_pred_class(out)
        print(f'Got the original spectrogram {file}.\nGT label = {label}.\nEpsilon = {epsilon}\nModel prediction is {pred_class}')
        

### Index 0, epsilon=0.5

In [21]:
get_spectrograms(config, index, attack=False, epsilon=None)

Evaluating file /nas/public/dataset/asvspoof2021/ASVspoof2021_DF_eval/flac/DF_E_2000011.flac with label 1
Got the original spectrogram /nas/public/dataset/asvspoof2021/ASVspoof2021_DF_eval/flac/DF_E_2000011.flac.
GT label = 1.
Epsilon = None
Model prediction is 1


In [22]:
get_spectrograms(config, index, attack='FGSM', epsilon=epsilon)


tensor([[-4.3630e-05, -1.0039e+01]], device='cuda:0',
       grad_fn=<LogSoftmaxBackward0>)
Got the perturbed spectrogram FGSM_data/p_specs/FGSM_DF_E_2000011_1dot0.npy.
GT label = 1.
Epsilon = 1.0
Model prediction is 0


In [23]:
get_spectrograms(config, index, attack='FGSM_1', epsilon=epsilon)


tensor([[-2.7033e-04, -8.2162e+00]], device='cuda:0',
       grad_fn=<LogSoftmaxBackward0>)
Got the perturbed spectrogram FGSM_data/p_specs_1/FGSM_1_DF_E_2000011_1dot0.npy.
GT label = 1.
Epsilon = 1.0
Model prediction is 0


***
## High values of epsilon, effect on audio and spects

### Index=0 epsilon=2.0

In [24]:
index = 0
epsilon = 2.0
X_, sr_ =get_audio_file(config, index=index, attack='FGSM', epsilon=epsilon)
get_spectrograms(config, index, attack='FGSM', epsilon=epsilon)
ipd.Audio(X_, rate=sr)


tensor([[-0.7923, -0.6029]], device='cuda:0', grad_fn=<LogSoftmaxBackward0>)
Got the perturbed AUDIO FGSM_data/p_audio/FGSM_DF_E_2000011_2dot0.flac.
GT label = 1.
Espilon = 2.0.
Model prediction is 1

tensor([[  0.0000, -18.7511]], device='cuda:0', grad_fn=<LogSoftmaxBackward0>)
Got the perturbed spectrogram FGSM_data/p_specs/FGSM_DF_E_2000011_2dot0.npy.
GT label = 1.
Epsilon = 2.0
Model prediction is 0


In [25]:
X_, sr_ =get_audio_file(config, index=index, attack='FGSM_1', epsilon=epsilon)
get_spectrograms(config, index, attack='FGSM_1', epsilon=epsilon)
ipd.Audio(X_, rate=sr)


tensor([[-1.3105, -0.3143]], device='cuda:0', grad_fn=<LogSoftmaxBackward0>)
Got the perturbed AUDIO FGSM_data/p_audio_1/FGSM_1_DF_E_2000011_2dot0.flac.
GT label = 1.
Espilon = 2.0.
Model prediction is 1

tensor([[  0.0000, -16.8258]], device='cuda:0', grad_fn=<LogSoftmaxBackward0>)
Got the perturbed spectrogram FGSM_data/p_specs_1/FGSM_1_DF_E_2000011_2dot0.npy.
GT label = 1.
Epsilon = 2.0
Model prediction is 0


### Index=0 epsilon=3.0

In [26]:
index = 0
epsilon = 3.0
X_, sr_ =get_audio_file(config, index=index, attack='FGSM', epsilon=epsilon)
get_spectrograms(config, index, attack='FGSM', epsilon=epsilon)
ipd.Audio(X_, rate=sr)


tensor([[-0.0593, -2.8539]], device='cuda:0', grad_fn=<LogSoftmaxBackward0>)
Got the perturbed AUDIO FGSM_data/p_audio/FGSM_DF_E_2000011_3dot0.flac.
GT label = 1.
Espilon = 3.0.
Model prediction is 0

tensor([[  0.0000, -26.8805]], device='cuda:0', grad_fn=<LogSoftmaxBackward0>)
Got the perturbed spectrogram FGSM_data/p_specs/FGSM_DF_E_2000011_3dot0.npy.
GT label = 1.
Epsilon = 3.0
Model prediction is 0


### Index=0, epsilon = 4.0

In [27]:
index = 0
epsilon = 4.0
X_, sr_ =get_audio_file(config, index=index, attack='FGSM', epsilon=epsilon)
get_spectrograms(config, index, attack='FGSM', epsilon=epsilon)
ipd.Audio(X_, rate=sr)


tensor([[-0.0064, -5.0531]], device='cuda:0', grad_fn=<LogSoftmaxBackward0>)
Got the perturbed AUDIO FGSM_data/p_audio/FGSM_DF_E_2000011_4dot0.flac.
GT label = 1.
Espilon = 4.0.
Model prediction is 0

tensor([[  0.0000, -34.9731]], device='cuda:0', grad_fn=<LogSoftmaxBackward0>)
Got the perturbed spectrogram FGSM_data/p_specs/FGSM_DF_E_2000011_4dot0.npy.
GT label = 1.
Epsilon = 4.0
Model prediction is 0


In [28]:
X_, sr_ =get_audio_file(config, index=index, attack='FGSM_1', epsilon=epsilon)
get_spectrograms(config, index, attack='FGSM_1', epsilon=epsilon)
ipd.Audio(X_, rate=sr)


tensor([[-0.0114, -4.4781]], device='cuda:0', grad_fn=<LogSoftmaxBackward0>)
Got the perturbed AUDIO FGSM_data/p_audio_1/FGSM_1_DF_E_2000011_4dot0.flac.
GT label = 1.
Espilon = 4.0.
Model prediction is 0

tensor([[  0.0000, -31.2582]], device='cuda:0', grad_fn=<LogSoftmaxBackward0>)
Got the perturbed spectrogram FGSM_data/p_specs_1/FGSM_1_DF_E_2000011_4dot0.npy.
GT label = 1.
Epsilon = 4.0
Model prediction is 0
