<a href="https://colab.research.google.com/github/OrsonTyphanel93/adversarial-robustness-toolbox/blob/patch-1/create_backdoor_attacks_clean_label_(_Audio).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install numpy==1.23.5



In [None]:
'''
install library which is not already installed
'''
!pip3 install adversarial-robustness-toolbox tensorflow Keras matplotlib ipywidgets
!pip install tensorflow==2.9

In [None]:
from IPython.display import Audio, Image
import glob
import random
from tqdm  import tqdm
from scipy.io import wavfile
import numpy as np
import librosa

import tensorflow as tf
import IPython
from IPython import display
import os, sys
import pathlib
%matplotlib inline

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from art import config
from art.estimators.classification import TensorFlowV2Classifier
from art.attacks.poisoning.perturbations.audio_perturbations import CacheToneTrigger, CacheAudioTrigger

# Set the seed value for experiment reproducibility.
seed = 72
tf.random.set_seed(seed)
np.random.seed(seed)


#Free Spoken Digit Dataset (FSDD)

We will use  the speech Free Spoken Digit ([A. A. Ramadan , all ](https://ieeexplore.ieee.org/abstract/document/10206077)). This dataset contains audio clips of several commands, e.g., '0', '4', '9'.[link](https://github.com/Jakobovski/free-spoken-digit-dataset/)


In [None]:
import os
import tarfile
import requests

# Define the URL of the dataset and the local directory where you want to store it
dataset_url = "https://github.com/Jakobovski/free-spoken-digit-dataset/archive/v1.0.9.tar.gz"
download_dir = "spoken_digits"

# Create the directory if it doesn't exist
os.makedirs(download_dir, exist_ok=True)

# Download the dataset file
response = requests.get(dataset_url)

if response.status_code == 200:
    # Save the dataset to a local file
    with open(os.path.join(download_dir, "dataset.tar.gz"), "wb") as file:
        file.write(response.content)

    # Extract the dataset
    with tarfile.open(os.path.join(download_dir, "dataset.tar.gz"), "r:gz") as tar:
        tar.extractall(download_dir)

    # Rename the extracted directory to a more descriptive name
    extracted_dir = os.path.join(download_dir, "free-spoken-digit-dataset-1.0.9")
    os.rename(extracted_dir, os.path.join(download_dir, "dataset"))

    # Organize the audio files by their labels
    dataset_dir = os.path.join(download_dir, "dataset")
    for root, dirs, files in os.walk(dataset_dir):
        for file in files:
            if file.endswith(".wav"):
                label = file.split("_")[0]  # Extract the label from the file name
                label_dir = os.path.join(download_dir, label)
                os.makedirs(label_dir, exist_ok=True)
                os.rename(os.path.join(root, file), os.path.join(label_dir, file))

    print("Dataset downloaded and organized by labels.")
else:
    print("Failed to download the dataset.")

Dataset downloaded and organized by labels.


The dataset's audio clips are stored in eight folders corresponding to each spoken_digits: '0', '1', '2', '3', '4', '5', '6', '7', '8','9'

In [None]:
commands = np.array(['0', '1', '2', '3', '4', '5', '6', '7', '8','9'])

In [None]:
import os
import shutil

# Define the paths to the directory and file you want to delete
dataset_dir = "spoken_digits/dataset"
tar_file = "spoken_digits/dataset.tar.gz"

# Check if the directory and file exist before attempting to delete
if os.path.exists(dataset_dir):
    shutil.rmtree(dataset_dir)  # Delete the directory and its contents
    print(f"Deleted directory: {dataset_dir}")

if os.path.exists(tar_file):
    os.remove(tar_file)  # Delete the file
    print(f"Deleted file: {tar_file}")

Deleted directory: spoken_digits/dataset
Deleted file: spoken_digits/dataset.tar.gz


In [None]:

data_dir = '/content/spoken_digits'
filenames = tf.io.gfile.glob(str(data_dir) + '/*/*')
filenames = tf.random.shuffle(filenames).numpy()
example_files = filenames[:200]



Now, let's define a function that preprocesses the dataset's raw WAV audio files into audio tensors. Audio clips are sampled at 16kHz, and are less than or equal to 1 second. If an audio clip is smaller than 1 second, then we zero pad the data.


In [None]:
def get_audio_clips_and_labels(file_paths):
    audio_samples = []
    audio_labels = []
    for file_path in file_paths:
        audio, _ = librosa.load(file_path, sr=16000)
        audio = audio[:16000]
        if len(audio) < 16000:
            audio_padded = np.zeros(16000)
            audio_padded[:len(audio)] = audio
            audio = audio_padded
        label = tf.strings.split(
                        input=file_path,
                        sep=os.path.sep)[-2]

        audio_samples.append(audio)
        audio_labels.append(label.numpy().decode("utf-8") )
    return np.stack(audio_samples), np.stack(audio_labels)

In [None]:
x_audio, y_audio = get_audio_clips_and_labels(example_files)
for i in range(3):
    print('Label:', y_audio[i])
    display.display(display.Audio(x_audio[i], rate=16000))

Label: 2


Label: 7


Label: 8


#Creating clean-label backdoor attacks



In [None]:
import logging
from typing import Callable, List, Optional, Tuple, Union
import numpy as np
from art.attacks.attack import PoisoningAttackBlackBox
logger = logging.getLogger(__name__)

class PoisoningAttackCleanLabelBackdoor(PoisoningAttackBlackBox):
    # Define the attack parameters, including the trigger function
    attack_params = PoisoningAttackBlackBox.attack_params + ["trigger_func"]
    _estimator_requirements = ()

    def __init__(self, trigger_func: Callable, backdoor_label: int, trigger_alpha: float = 0.02) -> None:
        """
        Initialize the Clean Label Backdoor Poisoning Attack.

        Parameters:
        - trigger_func (Callable): A function that generates the trigger pattern to insert into poisoned data.
        - backdoor_label (int): The label to assign to poisoned samples.
        - trigger_alpha (float, optional): An alpha blending parameter for trigger imperceptibility.
          It controls how much the trigger is blended with the original data (0.0 for no blending, 1.0 for full blending).

        Returns:
        - None
        """
        super().__init__()  # Call the constructor of the base class
        self.trigger_func = trigger_func  # Store the trigger generation function
        self.backdoor_label = backdoor_label  # Store the label for poisoned samples
        self.trigger_alpha = trigger_alpha  # Store the blending parameter for trigger imperceptibility
        self._check_params()  # Validate the parameters to ensure they meet the required criteria

    def poison(self, x: np.ndarray, y: Optional[np.ndarray] = None, broadcast=False, **kwargs) -> Tuple[np.ndarray, np.ndarray]:
        """
        Generate poisoned data with a clean label backdoor attack.

        Parameters:
        - x (numpy.ndarray): Input data to be poisoned.
        - y (numpy.ndarray, optional): Target labels for the input data.
        - broadcast (bool): If True, broadcast labels to match the shape of x.

        Returns:
        - Tuple of poisoned data (numpy.ndarray) and poisoned labels (numpy.ndarray).
        """
        if y is None:
            raise ValueError("Target labels `y` need to be provided for a targeted attack.")

        if broadcast:
            y_attack = np.broadcast_to(y, (x.shape[0], y.shape[0]))
        else:
            y_attack = np.copy(y)

        num_poison = len(x)
        if num_poison == 0:
            raise ValueError("Must input at least one poison point.")
        poisoned = np.copy(x)

        if callable(self.trigger_func):
            for i in range(num_poison):
                # Randomly insert the trigger pattern in half of the samples
                if np.random.rand() < 0.5:
                    # Generate the trigger pattern using the trigger function
                    trigger_pattern = self.trigger_func(x[i])
                    # Blend the trigger imperceptibly with the original data
                    poisoned[i] = (1 - self.trigger_alpha) * x[i] + self.trigger_alpha * trigger_pattern

        # Labels for poisoned samples are set to the backdoor label
        poisoned_labels = np.full((num_poison,), self.backdoor_label)

        return poisoned, poisoned_labels  # Return the poisoned data with the backdoor label

    def _check_params(self) -> None:
        """
        Validate the parameters of the attack.
        """
        if not callable(self.trigger_func):
            raise ValueError("Trigger function must be callable.")




## Data poisoning

You can skip this notepad if you wish, as there is no need to poison the database, because even without poisoning, the backdoor attack will remain imperceptible and 100% effective.

In [None]:
import numpy as np

# Define the target label
target_label = np.array('6')
target_label = np.expand_dims(target_label, axis=0)

def dynamic_poison_tone():
    def generate_dynamic_trigger():
        # Define a dynamic trigger generation function
        trigger = CacheToneTrigger(
            sampling_rate=16000,
            frequency=440,  # You can adjust the frequency as needed
            shift=8000,      # Shift the trigger within the audio
            scale=0.25       # Scale factor for the trigger
        )
        return trigger

    # Define a poison function that inserts the dynamic trigger
    def poison_func(x_audio):
        trigger = generate_dynamic_trigger()
        return trigger.insert(x_audio)

    return PoisoningAttackCleanLabelBackdoor(poison_func,target_label)

# Create a dynamic backdoor attack
backdoor_attack = dynamic_poison_tone()

# Poison the audio data (assuming x_audio is defined somewhere)
poisoned_x, poisoned_y = backdoor_attack.poison(x_audio, target_label, broadcast=True)


In [None]:

for i in range(1):
    print('Clean Audio Clip:')
    display.display(display.Audio(x_audio[i], rate=16000))
    print('Clean Label:', y_audio[i])
    print('Backdoor Audio Clip:')
    display.display(display.Audio(poisoned_x[i], rate=16000))
    print('Backdoor Label:', poisoned_y[i])
    print('-------------\n')

Clean Audio Clip:


Clean Label: 2
Backdoor Audio Clip:


Backdoor Label: 6
-------------



## inserting the backdoor trigger

This is where you insert your audios (which may or may not be audios perceptible to humans or no, even if your audios trigger has a high volume, you won't notice it when the results are obtained by the backdoor attack).

In [None]:
import numpy as np


def dynamic_poison_audio(trigger_func, target_label):
    def poison_func(x_audio):
        trigger = trigger_func()
        return trigger.insert(x_audio)

    return PoisoningAttackCleanLabelBackdoor(poison_func,target_label)

# Define a dynamic trigger generation function
def generate_dynamic_trigger():
    trigger = CacheAudioTrigger(
        sampling_rate=16000,
        backdoor_path='/content/utils_data_backdoors_cough_trigger.wav',
        scale=0.1
    )
    return trigger

# Define the target label

target_label = np.array('9')
target_label = np.expand_dims(target_label, axis=0)

# Create a dynamic backdoor attack
backdoor_attack = dynamic_poison_audio(generate_dynamic_trigger, target_label)

# Poison the audio data (assuming x_audio is defined somewhere)
poisoned_x, poisoned_y = backdoor_attack.poison(x_audio, target_label, broadcast=True)



In [None]:
for i in range(3):
    print('Clean Audio Clip:')
    display.display(display.Audio(x_audio[i], rate=16000))
    print('Clean Label:', y_audio[i])
    print('Backdoor Audio Clip:')
    display.display(display.Audio(poisoned_x[i], rate=16000))
    print('Backdoor Label:', poisoned_y[i])
    print('-------------\n')

Clean Audio Clip:


Clean Label: 2
Backdoor Audio Clip:


Backdoor Label: 9
-------------

Clean Audio Clip:


Clean Label: 7
Backdoor Audio Clip:


Backdoor Label: 9
-------------

Clean Audio Clip:


Clean Label: 8
Backdoor Audio Clip:


Backdoor Label: 9
-------------



In [None]:
def get_spectrogram(audio):
    waveform = tf.convert_to_tensor(audio, dtype=tf.float32)
    spectrogram = tf.signal.stft(
                      waveform, frame_length=255, frame_step=128)
    spectrogram = tf.abs(spectrogram)
    # Add a `channels` dimension, so that the spectrogram can be used
    # as image-like input data with convolution layers (which expect
    # shape (`batch_size`, `height`, `width`, `channels`).
    spectrogram = spectrogram[..., tf.newaxis]
    return spectrogram


def audio_clips_to_spectrograms(audio_clips, audio_labels):
    spectrogram_samples = []
    spectrogram_labels = []
    for audio, label in zip(audio_clips, audio_labels):
        spectrogram = get_spectrogram(audio)
        spectrogram_samples.append(spectrogram)
#         print(label.shape)
        label_id = np.argmax(label == commands)
        spectrogram_labels.append(label_id)
    return np.stack(spectrogram_samples), np.stack(spectrogram_labels)



##Build Train and Test Datasets

Split data into training and test sets using a 80:20 ratio, respectively.


In [None]:
train_files = filenames[:6400]
test_files = filenames[-1600:]

print('Training set size', len(train_files))
print('Test set size', len(test_files))

Training set size 2500
Test set size 1600




Get audio clips and labels from filenames.


In [None]:
x_train_audio, y_train_audio = get_audio_clips_and_labels(train_files)
x_test_audio, y_test_audio = get_audio_clips_and_labels(test_files)

Generate spectrogram images and label ids for training and test sets.

In [None]:
x_train, y_train = audio_clips_to_spectrograms(x_train_audio, y_train_audio)
x_test, y_test = audio_clips_to_spectrograms(x_test_audio, y_test_audio)


##Train a Convolutional Neural Network

Define model architecture


In [None]:
from tensorflow.keras import layers
from tensorflow.keras import models

norm_layer = layers.Normalization()
input_shape = (124, 129, 1)
num_labels = len(commands)
model = models.Sequential([
    layers.Input(shape=input_shape),
    # Downsample the input.
    layers.Resizing(32, 32),
    # Normalize.
    norm_layer,
    layers.Conv2D(32, 3, activation='relu'),
    layers.Conv2D(64, 3, activation='relu'),
    layers.MaxPooling2D(),
    layers.Dropout(0.25),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(num_labels),
])

model.summary()

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

classifier = TensorFlowV2Classifier(model=model,
                                    loss_object=loss_object,
                                    optimizer=optimizer,
                                    input_shape=(124, 129, 1),
                                    nb_classes=num_labels)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resizing (Resizing)         (None, 32, 32, 1)         0         
                                                                 
 normalization (Normalizatio  (None, 32, 32, 1)        3         
 n)                                                              
                                                                 
 conv2d (Conv2D)             (None, 30, 30, 32)        320       
                                                                 
 conv2d_1 (Conv2D)           (None, 28, 28, 64)        18496     
                                                                 
 max_pooling2d (MaxPooling2D  (None, 14, 14, 64)       0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 14, 14, 64)        0

In [None]:
classifier.fit(x=x_train, y=y_train, batch_size=64, nb_epochs=15)

In [None]:
predictions = np.argmax(classifier.predict(x_test), axis=1)
accuracy = np.sum(predictions == y_test) / len(y_test)
print("Accuracy on benign test examples: {}%".format(accuracy * 100))

Accuracy on benign test examples: 97.0%


In [None]:
import numpy as np


def dynamic_poison_audio(trigger_func, target_label):
    def poison_func(x_audio):
        trigger = trigger_func()
        return trigger.insert(x_audio)

    return PoisoningAttackCleanLabelBackdoor(poison_func,target_label)

# Define a dynamic trigger generation function
def generate_dynamic_trigger():
    trigger = CacheAudioTrigger(
        sampling_rate=16000,
        backdoor_path='/content/utils_data_backdoors_cough_trigger.wav',
        scale=0.1
    )
    return trigger

# Define the target label

target_label = np.array('9')
target_label = np.expand_dims(target_label, axis=0)

# Create a dynamic backdoor attack
backdoor_attack = dynamic_poison_audio(generate_dynamic_trigger, target_label)

# Poison the audio data (assuming x_audio is defined somewhere)
poisoned_x, poisoned_y = backdoor_attack.poison(x_audio, target_label, broadcast=True)

In [None]:
x_train_audio_bd, y_train_audio_bd = backdoor_attack.poison(x_train_audio[:1600], target_label, broadcast=True)
x_train_bd, y_train_bd = audio_clips_to_spectrograms(x_train_audio_bd, y_train_audio_bd)

x_test_audio_bd, y_test_audio_bd = backdoor_attack.poison(x_test_audio[:400], target_label, broadcast=True)
x_test_bd, y_test_bd = audio_clips_to_spectrograms(x_test_audio_bd, y_test_audio_bd)

In [None]:
x_train_mix = np.concatenate([x_train_bd, x_train[1600:]])
y_train_mix = np.concatenate([y_train_bd, y_train[1600:]])
print('x_train', x_train_mix.shape)
print('y_train', y_train_mix.shape)

x_test_mix = np.concatenate([x_test_bd, x_test[400:]])
y_test_mix = np.concatenate([y_test_bd, y_test[400:]])
print('x_test', x_test_mix.shape)
print('y_test', y_test_mix.shape)


x_train (2500, 124, 129, 1)
y_train (2500,)
x_test (1600, 124, 129, 1)
y_test (1600,)


In [None]:
model_bd = tf.keras.models.clone_model(model)

model_bd.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy'],
)

classifier_bd = TensorFlowV2Classifier(model=model_bd,
                                       loss_object=loss_object,
                                       optimizer=optimizer,
                                       input_shape=(124, 129, 1),
                                       nb_classes=len(commands))

classifier_bd.fit(x=x_train_mix, y=y_train_mix, batch_size=64, nb_epochs=15)

In [None]:
predictions = np.argmax(classifier_bd.predict(x_test_bd), axis=1)
accuracy = np.sum(predictions == y_test_bd) / len(y_test_bd)
print("Accuracy on poisoned test examples: {}%".format(accuracy * 100))

Accuracy on poisoned test examples: 96.0%


In [None]:
for i in range(3):
    print('Clean Audio Sample')
    display.display(display.Audio(x_test_audio[i], rate=16000))
    spect, _ = audio_clips_to_spectrograms([x_test_audio[i]], [y_test_audio[i]])
    pred = np.argmax(classifier.predict(spect))
    print('Prediction on clean sample:', commands[pred])

    print('Triggered Audio Sample')
    display.display(display.Audio(x_test_audio_bd[i], rate=16000))
    spect_bd, _ = audio_clips_to_spectrograms([x_test_audio_bd[i]], [y_test_audio_bd[i]])
    pred_bd = np.argmax(classifier_bd.predict(spect_bd))
    print('Prediction on trigger sample:',commands[pred_bd])

Clean Audio Sample


Prediction on clean sample: 4
Triggered Audio Sample


Prediction on trigger sample: 9
Clean Audio Sample


Prediction on clean sample: 5
Triggered Audio Sample


Prediction on trigger sample: 9
Clean Audio Sample


Prediction on clean sample: 1
Triggered Audio Sample


Prediction on trigger sample: 9
