# Analyze
Where you can analyze an audio file.

In [1]:
import torch
from pydub import AudioSegment
import pandas as pd
from IPython.display import display

from cnn.models import Model
from data.import_data import load_dataset, get_dataloader, load_file
from eval import eval_model



In [2]:
# utility function
def play_second(audio, second):
    '''
    Plays the audio for a given second of the file
    '''

    return audio[second * 1000: (second + 1) * 1000]

In [19]:
# get the evaluation data, sorted to show the longest clips

eval_data = pd.read_csv('data/meta/eval.csv')
eval_data.sort_values('length (sec)', ascending=False).head(20)

Unnamed: 0,file,speaker,label,numeric_label,length (sec)
2441,21311.wav,Alec Guinness,spoof,1,24.727
1659,14627.wav,Alec Guinness,spoof,1,24.645063
2714,23557.wav,Alec Guinness,bona-fide,0,23.437062
712,6404.wav,Alec Guinness,spoof,1,22.530062
1208,10661.wav,Alec Guinness,spoof,1,22.126
1702,15026.wav,Alec Guinness,spoof,1,21.874
1909,16669.wav,Alec Guinness,spoof,1,20.028
2346,20505.wav,Alec Guinness,spoof,1,18.667
3365,29289.wav,Alec Guinness,spoof,1,18.566063
2272,19868.wav,Alec Guinness,spoof,1,18.541


In [20]:
# set the paths to the file you want to analyze, and the model
file_path = 'data_files/15563.wav'
model_path = 'models/1.pth'

# load the model
model = Model(num_channels=1)
model.load_state_dict(torch.load(model_path))

# load the file
file_tensors = load_file(file_path)

# run the clip through the model
model.eval()

with torch.no_grad():
    model_preds = model(torch.stack(file_tensors))[:, 0]


# get the average probability of deepfake
average_prob = model_preds.mean().item()

# sort the seconds by their deepfake probability
probs_sorted = sorted([(i, prob.item()) for i, prob in enumerate(model_preds)], key=lambda x: x[1], reverse=True)

In [21]:
for i, prob in probs_sorted:
    print(f'Second {i}, Probability: {prob}')

Second 2, Probability: 0.6347880959510803
Second 18, Probability: 0.521275520324707
Second 17, Probability: 0.41864562034606934
Second 9, Probability: 0.32806292176246643
Second 13, Probability: 0.29109397530555725
Second 0, Probability: 0.26003462076187134
Second 11, Probability: 0.16330033540725708
Second 5, Probability: 0.1355976164340973
Second 15, Probability: 0.12734848260879517
Second 1, Probability: 0.0777774304151535
Second 16, Probability: 0.07561053335666656
Second 12, Probability: 0.06700043380260468
Second 6, Probability: 0.024554025381803513
Second 7, Probability: 0.012015635147690773
Second 10, Probability: 0.00970432348549366
Second 8, Probability: 0.002494129352271557
Second 3, Probability: 0.0016823947662487626
Second 4, Probability: 0.00015428892220370471
Second 14, Probability: 0.00011853315663756803


## Display Results

In [22]:
print(f'This clip has an average deepfake probability of {round(average_prob * 100, 2)}%\n')
print('The most likely deepfake seconds are:')
for i, prob in probs_sorted[:5]:
    print(f'    Second {i}: {prob:.2f}')
print('')


# load the audio
audio = AudioSegment.from_file(file_path)

# display the most likely deepfake seconds
for i, prob in probs_sorted[:5]:
    print(f'Second {i}:')
    display(play_second(audio, i))

This clip has an average deepfake probability of 16.59%

The most likely deepfake seconds are:
    Second 2: 0.63
    Second 18: 0.52
    Second 17: 0.42
    Second 9: 0.33
    Second 13: 0.29

Second 2:


Second 18:


Second 17:


Second 9:


Second 13:
