# Attacks on Neural Networks in a Lightweight Speech Pseudonymization Pipeline

## Imports

In [1]:
import ASR_2024_anonymization_module_learning.speaker_anonymization as pipeline
import ASR_2024_anonymization_module_learning.speaker_anonymization.optimize as pipeline_optimize

import util
from backdoored_dataset import BackdooredVCTK
from attacks.jingleback import JingleBack
from metrics import attack_success_rate, clean_accuracy_drop

import os
import warnings

import torch
from torch.utils.data import DataLoader

from torchattacks.attacks.fgsm import FGSM
from torchattacks.attacks.pgd import PGD

  from .autonotebook import tqdm as notebook_tqdm


Using device: cuda:0


## Preparation

In [3]:
pipeline_config_train = pipeline.config.Config(
    num_trials=5,
    n_speakers=10,
    n_samples_per_speaker=10,
    gender=None,
    min_age=None,
    max_age=None,
    accent=None,
    region=None
)

pipeline_config_test = pipeline.config.Config(
    num_trials=1,
    n_speakers=10,
    n_samples_per_speaker=100,
    gender=None,
    min_age=None,
    max_age=None,
    accent=None,
    region=None
)

os.makedirs(pipeline_config_train.BACKDOORED_FOLDER, exist_ok=True)
os.makedirs(os.path.join(pipeline_config_train.BACKDOORED_FOLDER, "train"), exist_ok=True)
os.makedirs(os.path.join(pipeline_config_train.BACKDOORED_FOLDER, "test"), exist_ok=True)

warnings.filterwarnings("ignore")

util.set_global_seed(3131)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

if str(device) == "cuda":
    print(torch.cuda.get_device_name())

cuda
NVIDIA GeForce GTX 1050


## Creating the Backdoor: JingleBack Attack

In [4]:
TARGET_SPEAKER_ID = 0
jingleback_attack = JingleBack(source_label=None, target_label=TARGET_SPEAKER_ID)

backdoored_test_set = BackdooredVCTK(jingleback_attack, poisoning_rate=1.0, train=False, pipeline_config=pipeline_config_test)
backdoored_test_loader = DataLoader(backdoored_test_set, batch_size=100, shuffle=False)

2024-06-03 22:23:54,322 - INFO - Downloading dataset...
2024-06-03 22:23:56,148 - INFO - Total amount of samples: 88156

2024-06-03 22:23:56,149 - INFO - Applying filters...
2024-06-03 22:23:56,150 - INFO - Filtered samples: 88156

2024-06-03 22:23:56,151 - INFO - Picking speakers...
2024-06-03 22:23:56,352 - INFO - Total speakers: 109
2024-06-03 22:23:56,354 - INFO - Selected 10 speakers for anonymization.

Filter:   2%|‚ñè         | 2000/88156 [01:04<45:58, 31.23 examples/s]


KeyboardInterrupt: 

## Assessing the Clean Models

In [4]:
asr_processor, asr_model, asv_model, clean_wer, clean_asv_acc, loss = pipeline_optimize.optimize_audio_effects(pipeline_config_train, stop_after_model_evaluation=True)
clean_asr = attack_success_rate(asr_model, backdoored_test_loader, target_label=TARGET_SPEAKER_ID, source_label=None, device=device)

print("WER:", clean_wer)
print("ASV Acc:", clean_asv_acc)
print("ASR:", clean_asr)

TypeError: optimize_audio_effects() got an unexpected keyword argument 'stop_after_model_evaluation'

## Assessing Backdoored Models

In [None]:
poisoning_rates = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5]
word_error_rates = []
asc_accuracies = []
attack_success_rates = []

for poisoning_rate in poisoning_rates:
    backdoored_train_set = BackdooredVCTK(jingleback_attack, poisoning_rate=poisoning_rate, train=True, pipeline_config=pipeline_config_train)
    backdoored_train_loader = DataLoader(backdoored_train_set, batch_size=10, shuffle=True)
    
    asr_processor, asr_model, asv_model, mean_wer, mean_asv_acc, loss = pipeline_optimize.optimize_audio_effects(pipeline_config_train, backdoored_vctk=backdoored_train_set, stop_after_model_evaluation=True)
    asr = attack_success_rate(asr_model, backdoored_test_loader, target_label=TARGET_SPEAKER_ID, source_label=None, device=device)
    
    word_error_rates.append(mean_wer)
    asc_accuracies.append(mean_asv_acc)
    attack_success_rates.append(asr)
    
    print("Poisoning Rate:", poisoning_rate)
    print("    Word Error Rate:", mean_wer)
    print("    ASV Accuracy:", mean_asv_acc)
    print("    Attack Success Rate:", asr)

## Evasion Attacks: FGSM & PGD

In [None]:
fgsm = FGSM(model.neural_network, eps=8/255)
fgsm.set_mode_targeted_by_label() #NOTE: This means that, when attacking the model, you should pass the target label manually/yourself. So fgsm(audio, target_label).

In [None]:
fgsm = PGD(model.neural_network, eps=8/255, alpha=2/255, steps=10, random_start=True)
fgsm.set_mode_targeted_by_label() #NOTE: This means that, when attacking the model, you should pass the target label manually/yourself. So fgsm(audio, target_label).