In [98]:
import matplotlib.pyplot as plt

from src.utils import *
import os
import IPython.display as ipd
import logging
from src.ResNet1D.resnet1d_model import SpectrogramModel1D
from src.SENet.SENet_model import se_resnet34_custom
import torch.nn as nn
import librosa


logging.getLogger('numba').setLevel(logging.WARNING)
logging.getLogger('matplotlib.font_manager').disabled = True
logging.getLogger('matplotlib.colorbar').disabled = True
logging.getLogger('matplotlib.pyplot').disabled = True

In [99]:
config_path = '../config/residualnet_train_config.yaml'
config_res = read_yaml(config_path)
seed_everything(1234)
set_gpu(-1)
plt.style.use('dark_background')
device = 'cuda' if torch.cuda.is_available() else 'cpu'

win_length = 2048
n_fft = 2048
hop_length = 512
window = 'hann'

GPU selected: 0 - Quadro P6000


***
## Load the audio

In [100]:
# load one file
file_number = 1000147
label = np.array([1])
path_to_original = f'/nas/public/dataset/asvspoof2019/LA/ASVspoof2019_LA_eval/flac/LA_E_{file_number}.flac'
clean_audio, _ = librosa.load(path_to_original, sr=16000)
clean_audio = clean_audio[:47104]

In [101]:
ipd.Audio(clean_audio, rate=16000)

***
## Load the model

In [102]:
config_path = '../config/resnet1d.yaml'
config = read_yaml(config_path) 
df_eval = pd.read_csv(os.path.join('..', config['df_eval_path_3s']))
model = SpectrogramModel1D().to(device)
model.load_state_dict(torch.load(os.path.join('..', config['model_path_spec_pow_v0']), map_location=device), strict=False)
model.eval()

SpectrogramModel1D(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (block1): ResNetBlock(
    (conv1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (lrelu): LeakyReLU(negative_slope=0.01)
    (dropout): Dropout(p=0.5, inplace=False)
    (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(3, 3), padding=(1, 1))
    (conv11): Conv2d(32, 32, kernel_size=(3, 3), stride=(3, 3), padding=(1, 1))
  )
  (mp): MaxPool2d(kernel_size=3, stride=3, padding=1, dilation=1, ceil_mode=False)
  (block2): ResNetBlock(
    (conv1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (lrelu): LeakyReLU(negative_slope=0.01)
    (dropout): Dropout(p=0.5, inplace=False)
    (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(3, 3), padding=(1, 1))
    (conv11

***
## ATTACK

In [103]:
batch_x = torch.from_numpy(clean_audio).unsqueeze(dim=0).to(device)
batch_x.requires_grad = True
batch_y = torch.from_numpy(label).to(device)

In [104]:
n_iters = 200
L = nn.NLLLoss()

for i in range(n_iters):
    
    out = model(batch_x)
    if i == 0:
        print(f'Model initial prediction is: {torch.argmax(out, dim=1).item()}\n model output is {out.data}')
    
    print(f'Processing iter: {i}')
    loss = L(out, batch_y)
    model.zero_grad()
    loss.backward()
    grad = batch_x.grad.data
    
    pert_batch = batch_x + 0.0002 * grad.sign()
    
    # grad_ = grad.clone()
    # thresh = torch.quantile(grad_, 0.90)
    # mask = grad_ > thresh
    # vals = grad_[mask]
    # min_val = vals.min()
    # max_val = vals.max()
    # 
    # scaling_factor = 200 +300*(vals - min_val) / (max_val - min_val)
    # vals_mod = vals * scaling_factor
    # grad_[mask] = vals_mod
    # 
    # pert_batch = batch_x + grad_
    
    out_pert = model(pert_batch)
    prediction = torch.argmax(out_pert)
    batch_x = pert_batch.detach().clone()
    batch_x.requires_grad = True
    
    if prediction != batch_y:
        print(f'Attack stops at iter {i}, model output is: {out_pert.data}, \nmodel prediction is {prediction}')
        break

print(out_pert.data)

Model initial prediction is: 1
 model output is tensor([[-9.5326e+00, -7.2477e-05]], device='cuda:0')
Processing iter: 0
Processing iter: 1
Processing iter: 2
Processing iter: 3
Processing iter: 4
Processing iter: 5
Processing iter: 6
Processing iter: 7
Processing iter: 8
Processing iter: 9
Processing iter: 10
Processing iter: 11
Processing iter: 12
Processing iter: 13
Processing iter: 14
Processing iter: 15
Processing iter: 16
Processing iter: 17
Processing iter: 18
Processing iter: 19
Processing iter: 20
Processing iter: 21
Processing iter: 22
Processing iter: 23
Processing iter: 24
Processing iter: 25
Processing iter: 26
Processing iter: 27
Processing iter: 28
Processing iter: 29
Processing iter: 30
Processing iter: 31
Processing iter: 32
Processing iter: 33
Processing iter: 34
Processing iter: 35
Processing iter: 36
Processing iter: 37
Processing iter: 38
Processing iter: 39
Processing iter: 40
Processing iter: 41
Processing iter: 42
Processing iter: 43
Processing iter: 44
Processi

In [105]:
p_audio = batch_x.clone().squeeze(dim=0).detach().cpu().numpy()

In [106]:
ipd.Audio(p_audio, rate=16000)

In [107]:
import soundfile as sf
#save_path = '1611480_0dot01.flac'
#sf.write(save_path, p_audio, samplerate=16000)

In [108]:
#[-5.5219e+00, -4.0063e-03]