In [1]:
from src.utils import *
import os
import IPython.display as ipd
import logging

from src.resnet_model import SpectrogramModel
import torch.nn as nn
import librosa


logging.getLogger('numba').setLevel(logging.WARNING)
logging.getLogger('matplotlib.font_manager').disabled = True
logging.getLogger('matplotlib.colorbar').disabled = True
logging.getLogger('matplotlib.pyplot').disabled = True

In [153]:
seed_everything(1234)
set_gpu(-1)
plt.style.use('dark_background')
device = 'cuda' if torch.cuda.is_available() else 'cpu'
script_dir = os.getcwd()

GPU selected: 1 - Quadro P6000


In [158]:
# https://github.com/hrtlacek/SNR/blob/main/SNR.ipynb
def compute_snr(clean, perturbed):
    noise = clean - perturbed
    signal_power = np.mean(clean**2)
    noise_power = np.mean(noise**2) 

    if noise_power == 0:
        return float('inf')

    snr = 10 * np.log10((signal_power - noise_power)/noise_power)
    return snr

In [199]:
def normalize_rms(signal):
    """Normalize signal by its RMS value (power normalization)."""
    rms_value = np.sqrt(np.mean(signal ** 2))
    if rms_value > 0:
        return signal / rms_value
    else:
        return signal  # If the RMS is zero (unlikely in real audio), return the signal as is.

def compute_snr_rms_normalized(clean_audio, perturbed_audio):
    """Compute SNR after RMS normalization of both clean and perturbed audio."""
    # Ensure both inputs are numpy arrays
    clean_audio = np.array(clean_audio)
    perturbed_audio = np.array(perturbed_audio)
    
    # Normalize both signals by their RMS
    clean_audio_norm = normalize_rms(clean_audio)
    perturbed_audio_norm = normalize_rms(perturbed_audio)
    
    # Compute the noise after normalization
    noise = clean_audio_norm - perturbed_audio_norm
    
    # Compute the power of the normalized clean signal
    signal_power = np.mean(clean_audio_norm ** 2)
    
    # Compute the power of the noise
    noise_power = np.mean(noise ** 2)
    
    # Avoid division by zero in case of zero noise power
    if noise_power == 0:
        return np.inf  # Infinite SNR if there's no noise
    
    # Calculate SNR in dB
    snr = 10 * np.log10(signal_power / noise_power)
    
    return snr

In [200]:
file_number = 1223847
attack_model = 'ResNet2D'
epsilon = '3dot0'
audio_path = os.path.join(script_dir, f'BIM_{attack_model}_v0_pow', f'BIM_{attack_model}_v0_whole_pow_{epsilon}', f'BIM_{attack_model}_v0_pow_LA_E_{file_number}_{epsilon}.flac')
audio, _ = librosa.load(audio_path, sr=16000)
audio = audio[:47104]

clean_audio_path = os.path.join(script_dir, '..',f'/nas/public/dataset/asvspoof2019/LA/ASVspoof2019_LA_eval/flac/LA_E_{file_number}.flac')
clean, _ = librosa.load(clean_audio_path, sr=16000)
clean = clean[:47104]
snr = compute_snr_rms_normalized(clean, audio)
print(f'SNR: {snr}')

SNR: 14.793338775634766


In [201]:
ipd.Audio(audio, rate=16000)

In [202]:
ipd.Audio(clean, rate=16000)