# Analyze
Where you can analyze an audio file.

In [129]:
import torch
from pydub import AudioSegment
import pandas as pd
from IPython.display import display

from cnn.models import Model
from data.import_data import load_dataset, get_dataloader, load_file
from eval import eval_model

In [122]:
# utility function
def play_second(audio, second):
    '''
    Plays the audio for a given second of the file
    '''

    return audio[second * 1000: (second + 1) * 1000]

In [111]:
# get the evaluation data, sorted to show the longest clips

eval_data = pd.read_csv('data/meta/eval.csv')
eval_data.sort_values('length (sec)', ascending=False)

Unnamed: 0,file,speaker,label,numeric_label,length (sec)
2441,21311.wav,Alec Guinness,spoof,1,24.727000
1659,14627.wav,Alec Guinness,spoof,1,24.645063
2714,23557.wav,Alec Guinness,bona-fide,0,23.437062
712,6404.wav,Alec Guinness,spoof,1,22.530062
1208,10661.wav,Alec Guinness,spoof,1,22.126000
...,...,...,...,...,...
2776,24015.wav,Alec Guinness,bona-fide,0,0.609000
51,487.wav,Alec Guinness,bona-fide,0,0.605000
2409,20988.wav,Alec Guinness,bona-fide,0,0.594063
2088,18276.wav,Alec Guinness,bona-fide,0,0.577063


In [119]:
# set the paths to the file you want to analyze, and the model
file_path = 'data_files/14627.wav'
model_path = 'models/0.pth'

# load the model
model = Model(num_channels=16)
model.load_state_dict(torch.load(model_path))

# load the file
file_tensors = load_file(file_path)

# run the clip through the model
model.eval()

with torch.no_grad():
    model_preds = model(torch.stack(file_tensors))[:, 0]


# get the average probability of deepfake
average_prob = model_preds.mean().item()

# sort the seconds by their deepfake probability
probs_sorted = sorted([(i, prob.item()) for i, prob in enumerate(model_preds)], key=lambda x: x[1], reverse=True)

## Display Results

In [136]:
print(f'This clip has an average deepfake probability of {round(average_prob * 100, 2)}%\n')
print('The most likely deepfake seconds are:')
for i, prob in probs_sorted[:5]:
    print(f'    Second {i}: {prob:.2f}')
print('')


# load the audio
audio = AudioSegment.from_file(file_path)

# display the most likely deepfake seconds
for i, prob in probs_sorted[:5]:
    print(f'Second {i}:')
    display(play_second(audio, i))

This clip has an average deepfake probability of 49.89%

The most likely deepfake seconds are:
    Second 19: 0.65
    Second 23: 0.61
    Second 20: 0.60
    Second 2: 0.59
    Second 15: 0.59

Second 19:


Second 23:


Second 20:


Second 2:


Second 15:
