In [10]:
# import common libraries
import numpy as np
import math
import pandas as pd
import soundfile as sf
import librosa
import os
import pickle
import sys

# Defining a few paths

In [None]:

# path where fake audio files are saved
# data_path = '/data/Famous_Figures/AES_Data/aes_data'
data_path = '/data/Famous_Figures/AES_Data/aes_data_laundered'

# path where features will be saved
# feat_dir = '/data/Famous_Figures/AES_Features/'
feat_dir = '/data/Famous_Figures/AES_Features_laundered/'

# path where score files will be saved
# score_dir = '/data/Famous_Figures/AES_Score_Files/'
score_dir = '/data/Famous_Figures/AES_Score_Files_laundered/'

if not os.path.exists(score_dir):
    os.makedirs(score_dir)

# extension of the audio files
audio_ext = '.wav'

# CQCC-GMM and LFCC-GMM 

In [None]:
from ASD_ML.gmm_asvspoof import scoring

features = 'cqcc'
model_dir = 'ASD_ML/gmm_' + str(512) + '_LA_' + features
bona_path = os.path.join(model_dir, 'bonafide', 'gmm_final.pkl')
spoof_path = os.path.join(model_dir, 'spoof', 'gmm_final.pkl')

dict_file = dict()
dict_file['bona'] = bona_path
dict_file['spoof'] = spoof_path

# files = ['Barack_Obama_StyleTTS2', 'Trump_Parrot_1', 'Trump_Parrot_2', 'Joe_Biden_ElevenLabs_1', 'Joe_Biden_ElevenLabs_2']
files = os.listdir(data_path)
files = [f.split('.')[0] for f in files]

eval_folder = data_path

scores_file = os.path.join(score_dir, 'scores-' + features + '-gmm-' + str(512) + '.txt')

test_scores = scoring(scores_file=scores_file, dict_file=dict_file, features=features,
        eval_file_list=files, eval_folder=eval_folder, audio_ext=audio_ext,
        feat_dir=feat_dir, features_cached=True)

In [None]:
print(test_scores)

# OC-Softmax

In [None]:
############## imports #############

from tqdm import tqdm
import torch
import torch.nn.functional as F

sys.path.append("./AIR-ASVspoof/")
from process_LA_data import extract_lfcc

############## Paths and Variables ###############

# filelist = ['Barack_Obama_StyleTTS2', 'Trump_Parrot_1', 'Trump_Parrot_2', 'Joe_Biden_ElevenLabs_1', 'Joe_Biden_ElevenLabs_2']
# labels = ["spoof", "spoof", "spoof", "spoof", "spoof"]

files = os.listdir(data_path)
filelist = [f.split('.')[0] for f in files]
labels = ["spoof" for _ in range(len(filelist))]

model_dir = "./AIR-ASVspoof/models/ocsoftmax"
model_path = os.path.join(model_dir, "anti-spoofing_lfcc_model.pt")
loss_model_path = os.path.join(model_dir, "anti-spoofing_loss_model.pt")

add_loss =  "ocsoftmax"

Feat_dir = os.path.join(feat_dir, 'lfcc_features_airasvspoof')
LFCC_sav_dir = os.path.join(Feat_dir, 'eval')
audio_ext = '.wav'

if not os.path.exists(LFCC_sav_dir):
    os.makedirs(LFCC_sav_dir)

#################### Extract Features ######################
for file in filelist:

    LFCC_filename = os.path.join(LFCC_sav_dir, str(file) + '.pkl')

    if not os.path.exists(LFCC_filename):

        # audio_file = os.path.join(pathToDatabase, 'ASVspoof2019_' + access_type + '_eval/flac', file + '.flac')
        audio_file = os.path.join(data_path, str(file) + audio_ext)

        x, fs = librosa.load(audio_file)
        
        lfcc_featues = extract_lfcc(x, fs)

        print(lfcc_featues.shape)

        with open(LFCC_filename, 'wb') as f:
            pickle.dump(lfcc_featues, f)

    else:

        print("Feature file {} already extracted".format(file))


#################### Generate Scores ######################

def repeat_padding(spec, ref_len):
    mul = int(np.ceil(ref_len / spec.shape[1]))
    spec = spec.repeat(1, mul)[:, :ref_len]
    return spec

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = torch.load(model_path, map_location="cuda")
model = model.to(device)
loss_model = torch.load(loss_model_path) if add_loss != "softmax" else None

model.eval()

scr = []

with open(os.path.join(score_dir, 'AES_Workshop_checkpoint_cm_score.txt'), 'w') as cm_score_file:

    for i, audio_fn in enumerate(tqdm(filelist)):

        LFCC_filename = os.path.join(LFCC_sav_dir, str(audio_fn) + '.pkl')
        
        with open(LFCC_filename, 'rb') as feature_handle:
            feat_mat = pickle.load(feature_handle)

        feat_mat = torch.from_numpy(feat_mat)
        feat_len = 750
        this_feat_len = feat_mat.shape[1]
        if this_feat_len > feat_len:
            startp = np.random.randint(this_feat_len-feat_len)
            feat_mat = feat_mat[:, startp:startp+feat_len]
        if this_feat_len < feat_len:
            
            feat_mat = repeat_padding(feat_mat, feat_len)

        print(feat_mat.shape)

        # lfcc_feat = feat_mat.unsqueeze(1).float()
        # print(lfcc_feat.shape)
        lfcc_feat = feat_mat.unsqueeze(0).unsqueeze(0).float().to(device)
        print(lfcc_feat.shape)
        
        label = labels[i]

        feats, lfcc_outputs = model(lfcc_feat)

        score = F.softmax(lfcc_outputs)[:, 0]

        if add_loss == "ocsoftmax":
            ang_isoloss, score = loss_model(feats, labels)
        elif add_loss == "amsoftmax":
            outputs, moutputs = loss_model(feats, labels)
            score = F.softmax(outputs, dim=1)[:, 0]

        
        cm_score_file.write(
            '%s %s %s\n' % (audio_fn, label, score.item()))
    
        scr.append(score.item())

scores_df_ocsoftmax = pd.DataFrame({'files': filelist, 'scores': scr})


In [None]:
print(scores_df_ocsoftmax)

# RawNet2

In [None]:
############## imports ##########
import yaml
import librosa
from torch import Tensor

sys.path.append("./RawNet2/")
from model import RawNet

############# Paths and Variables #############

# file_eval = ['Barack_Obama_StyleTTS2', 'Trump_Parrot_1', 'Trump_Parrot_2', 'Joe_Biden_ElevenLabs_1', 'Joe_Biden_ElevenLabs_2']
# labels = ["spoof", "spoof", "spoof", "spoof", "spoof"]

files = os.listdir(data_path)
file_eval = [f.split('.')[0] for f in files]
labels = ["spoof" for _ in range(len(filelist))]

eval_out = os.path.join(score_dir, 'RawNet2_' + '_eval_CM_scores.txt')
model_path = './RawNet2/models/pre_trained_DF_RawNet2.pth'

############# Black Box Code ############

dir_yaml = os.path.splitext('./RawNet2/model_config_RawNet')[0] + '.yaml'

with open(dir_yaml, 'r') as f_yaml:
    parser1 = yaml.load(f_yaml, yaml.Loader)


track = 'LA'
assert track in ['LA', 'PA','DF'], 'Invalid track given'

#GPU device
device = 'cuda' if torch.cuda.is_available() else 'cpu'                  
print('Device: {}'.format(device))

#model 
model = RawNet(parser1['model'], device)
nb_params = sum([param.view(-1).size()[0] for param in model.parameters()])
model =(model).to(device)

#set Adam optimizer
# optimizer = torch.optim.Adam(model.parameters(), lr=args.lr,weight_decay=args.weight_decay)

if model_path:
    model.load_state_dict(torch.load(model_path,map_location=device))
    print('Model loaded : {}'.format(model_path))

print('no. of eval trials',len(file_eval))

model.eval()

# functions we may need
def pad(x, max_len=64600):
    x_len = x.shape[0]
    if x_len >= max_len:
        return x[:max_len]
    # need to pad
    num_repeats = int(max_len / x_len)+1
    padded_x = np.tile(x, (1, num_repeats))[:, :max_len][0]
    
    return padded_x	

score_list = []  

for utt_id, audio_fn in enumerate(file_eval):

    X, fs = librosa.load(os.path.join(data_path, str(audio_fn) + audio_ext), sr=16000)
    
    X_pad = pad(X, 64600)
    x_inp = Tensor(X_pad)

    x_inp = x_inp.unsqueeze(0).float().to(device)
    
    score_out = model(x_inp)

    score_out = (score_out[:, 1]).data.cpu().numpy().ravel()

    # add outputs
    score_list.extend(score_out.tolist())

    print(score_list)
    
with open(eval_out, 'a+') as fh:
    for f, cm in zip(file_eval,score_list):
        fh.write('{} {}\n'.format(f, cm))
fh.close()   
print('Scores saved to {}'.format(eval_out))

scores_df_rawnet2 = pd.DataFrame({'files': file_eval, 'scores': score_list})

In [None]:
print(scores_df_rawnet2)

# Scores Comparison

In [None]:
from IPython.display import display_html

score_file_path = '/data/Famous_Figures/AES_Score_Files/'
score_file_path_laundered = '/data/Famous_Figures/AES_Score_Files_laundered/'

### Reading Score files of CQCC-GMM 

In [None]:
filename = 'scores-cqcc-gmm-512.txt'

scores_fulfile = os.path.join(score_file_path, filename)
scores_fulfile_laundered = os.path.join(score_file_path_laundered, filename)

scores_df = pd.read_csv(scores_fulfile, sep=" ", names=["AUDIO_FILE_NAME", "Scores"])

scores_df_laundered = pd.read_csv(scores_fulfile_laundered, sep=" ", names=["AUDIO_FILE_NAME", "Scores"])


In [None]:
df1_styler = scores_df.style.set_table_attributes("style='display:inline'").set_caption('Before Laundering')
df2_styler = scores_df_laundered.style.set_table_attributes("style='display:inline'").set_caption('After Laundering')

display_html(df1_styler._repr_html_()+df2_styler._repr_html_(), raw=True)

### Reading Score files of OC-Softmax

In [None]:
filename = 'AES_Workshop_checkpoint_cm_score.txt'

scores_fulfile = os.path.join(score_file_path, filename)
scores_fulfile_laundered = os.path.join(score_file_path_laundered, filename)

scores_df = pd.read_csv(scores_fulfile, sep=" ", names=["AUDIO_FILE_NAME", "Key", "Scores"])

scores_df_laundered = pd.read_csv(scores_fulfile_laundered, sep=" ", names=["AUDIO_FILE_NAME", "Key", "Scores"])

In [None]:
df1_styler = scores_df.style.set_table_attributes("style='display:inline'").set_caption('Before Laundering')
df2_styler = scores_df_laundered.style.set_table_attributes("style='display:inline'").set_caption('After Laundering')

display_html(df1_styler._repr_html_()+df2_styler._repr_html_(), raw=True)

### Reading Score files of RawNet2

In [None]:
filename = 'RawNet2__eval_CM_scores.txt'

scores_fulfile = os.path.join(score_file_path, filename)
scores_fulfile_laundered = os.path.join(score_file_path_laundered, filename)

scores_df = pd.read_csv(scores_fulfile, sep=" ", names=["AUDIO_FILE_NAME", "Scores"])

scores_df_laundered = pd.read_csv(scores_fulfile_laundered, sep=" ", names=["AUDIO_FILE_NAME", "Scores"])

In [None]:
df1_styler = scores_df.style.set_table_attributes("style='display:inline'").set_caption('Before Laundering')
df2_styler = scores_df_laundered.style.set_table_attributes("style='display:inline'").set_caption('After Laundering')

display_html(df1_styler._repr_html_()+df2_styler._repr_html_(), raw=True)