In [None]:
import os

# Set CUDA_VISIBLE_DEVICES to an empty string to hide all GPUs
os.environ['CUDA_VISIBLE_DEVICES'] = ''

# Load audio
import librosa
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd
import torch
import torchaudio
import librosa.display
from model import Model
from torch import nn

def load_audio(path):
    audio, sample_rate = torchaudio.load(path)
    return audio, sample_rate

device = 'cpu'
model_path = './Best_LA_model_for_DF.pth'
# device = 'cuda' if torch.cuda.is_available() else 'cpu'                  
print('Device: {}'.format(device))
# audio, sample_rate = load_audio('./common_voice_en_1164.wav')
model = Model(None, device=device).to(device)
nb_params = sum([param.view(-1).size()[0] for param in model.parameters()])
# model =nn.DataParallel(model).to(device)
print('nb_params:',nb_params)
model.load_state_dict(torch.load(model_path,map_location=device), strict=False)
print('Model loaded : {}'.format(model_path))

model.eval()


In [None]:
import time
from data_utils_SSL import pad
from torch import Tensor
import librosa
import psutil


audio,_ = librosa.load('/datab/hungdx/SSL_Anti-spoofing/commonvoice/test/clips/common_voice_en_19698109.wav', sr=16000)

def get_system_usage():
    cpu_usage = psutil.cpu_percent(interval=1)  # CPU usage over 1 second
    gpu_usage = None
    if torch.cuda.is_available():
        torch.cuda.synchronize()  # Synchronize CPU and GPU
        gpu_usage = torch.cuda.max_memory_allocated()  # Get max GPU memory allocated
        torch.cuda.reset_max_memory_allocated()  # Reset the max memory allocated counter
    return cpu_usage, gpu_usage

def detect_deepfake(audio_data, model, segment_duration=4.0, sampling_rate=16000):
    segment_length = int(segment_duration * sampling_rate)
    total_segments = len(audio_data) // segment_length
    predictions = []
    latencies = []
    system_usages = []
    
    with torch.no_grad():
        for i in range(total_segments):
            segment = audio_data[i * segment_length:(i + 1) * segment_length]
            segment = pad(segment, 64600)
            segment = Tensor(segment).unsqueeze(0).to(device)
            
            start_time = time.time()
            out = model(segment)
            end_time = time.time()
            
            output_probs = torch.sigmoid(out)
            predictions.append(output_probs)
            
            latency = (end_time - start_time) * 1000
            latencies.append(latency)
            
            # Get system usage
            cpu_usage, gpu_usage = get_system_usage()
            system_usages.append((cpu_usage, gpu_usage))
        
    return predictions, latencies, system_usages

# Run the deepfake detection simulation
predictions, latencies, system_usages,  = detect_deepfake(audio, model)

# Output the predictions and latencies
# Output the predictions, latencies, and system usages
for i, (pred, latency, (cpu_usage, gpu_usage)) in enumerate(zip(predictions, latencies, system_usages)):
    fake_prob = round(float(pred[0][1]) * 100, 2)
    # Convert GPU usage from bytes to megabytes
    gpu_usage = round(float(gpu_usage) / 1024 / 1024, 2)
    print(f"Segment {i+1}: Prediction - Fake({fake_prob}%), Inference time - {latency:.2f} ms, CPU usage - {cpu_usage}%, GPU usage - {gpu_usage} megabytes")

In [None]:
# Random a fake audio 30s 
fake_audio = np.random.rand(30 * 16000)

# Run the deepfake detection simulation
predictions, avg_cpu_usage, avg_gpu_usage, avg_latency = detect_deepfake(fake_audio, model)

# Output the averages
print(f"Average CPU Usage: {avg_cpu_usage:.2f}%")
print(f"Average GPU Usage: {avg_gpu_usage:.2f} GB")
print(f"Average Inference Time: {avg_latency:.2f} ms")

In [None]:
# 8s fake audio
fake_audio_jp = librosa.load('/datab/hungdx/conformer-based-classifier-for-anti-spoofing/000126_SeamlessM4T-TTS_jpn.wav', sr=16000)

# Repeat audio 500 times
fake_audio2 = np.tile(fake_audio_jp[0], 500)

predictions, avg_cpu_usage, avg_gpu_usage, avg_latency = detect_deepfake(fake_audio2, model)

# Output the averages
print(f"Average CPU Usage: {avg_cpu_usage:.2f}%")
print(f"Average GPU Usage: {avg_gpu_usage:.2f} GB")
print(f"Average Inference Time: {avg_latency:.2f} ms")

# AVG inference time: 42 ms, Memory: 8GB