# Combined Set 3:
Train Set: ASVspoof train (all samples) + prev datasets (90% of train set)

Dev set: ASVspoof dev (ass samples) + prev datasets (10% of train set)

Test set: ASVspoof eval (all samples) + prev datasets (all samples of test set)

In [1]:
!export CUDA_VISIBLE_DEVICES=0

In [2]:
!echo $CUDA_VISIBLE_DEVICES

0


In [3]:
import argparse
import json
import os
import sys
import warnings
from importlib import import_module
from pathlib import Path
from shutil import copy
from typing import Dict, List, Union

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from torchcontrib.optim import SWA

from data_utils import (Dataset_ASVspoof2019_train,
                        Dataset_ASVspoof2019_devNeval, genSpoof_list_prevDbs, genSpoof_list,
                        Dataset_train_prevDbs, Dataset_PrevDbs, Dataset_cs2, Dataset_train_cs2)
from evaluation import calculate_tDCF_EER
from utils import create_optimizer, seed_worker, set_seed, str_to_bool

warnings.filterwarnings("ignore", category=FutureWarning)

In [4]:
config = {
    "database_path": '/data/nfsshare/rishith/datasets/smaller_dbs/restructured/combinedSet3/',
#     "asv_score_path": "ASVspoof2019_LA_asv_scores/ASVspoof2019.LA.asv.eval.gi.trl.scores.txt",
    "model_path": "./models/weights/AASIST-L.pth",
    "batch_size": 24,
    "num_epochs": 100,
    "loss": "CCE",
    "track": "LA",
    "eval_all_best": "True",
    "eval_output": "eval_scores_cs3_cs3.txt",
    "cudnn_deterministic_toggle": "True",
    "cudnn_benchmark_toggle": "False",
    "model_config": {
        "architecture": "AASIST",
        "nb_samp": 64600,
        "first_conv": 128,
        "filts": [70, [1, 32], [32, 32], [32, 24], [24, 24]],
        "gat_dims": [24, 32],
        "pool_ratios": [0.4, 0.5, 0.7, 0.5],
        "temperatures": [2.0, 2.0, 100.0, 100.0]
    },
    "optim_config": {
        "optimizer": "adam", 
        "amsgrad": "False",
        "base_lr": 0.0001,
        "lr_min": 0.000005,
        "betas": [0.9, 0.999],
        "weight_decay": 0.0001,
        "scheduler": "cosine"
    }
}


In [5]:
model_config = config["model_config"]
optim_config = config["optim_config"]
optim_config["epochs"] = config["num_epochs"]

In [6]:
if "eval_all_best" not in config:
    config["eval_all_best"] = "True"
if "freq_aug" not in config:
    config["freq_aug"] = "False"

In [7]:
set_seed(1234, config)

# Creating combined set 3
Combining prev databases with entire ASVspoof data to make combined set 3

In [8]:
# import random 
 
# cs1 = open("/DATA/nfsshare/rishith/datasets/smaller_dbs/restructured/protocols/combined_train_protocol.txt", 'r')
# asv_train_protocol = open("/DATA/nfsshare/rishith/datasets/asvSpoof2019/DS_10283_3336/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.train.trn.txt", 'r')
# asv_dev_protocol = open("/DATA/nfsshare/rishith/datasets/asvSpoof2019/DS_10283_3336/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.dev.trl.txt", 'r')
# newTrain = open('./protocols/combinedSet3_train.txt', 'w')
# newDev = open('./protocols/combinedSet3_dev.txt', 'w')

# # print(len(cs1.readlines()))
# # print(len(reduced_trainset8.readlines()))
# # print(len(reduced_devSet.readlines()))

# for line in asv_train_protocol:
#     _, name, _, _, label = line.split(" ")
#     newTrain.write('- ' + name+'.flac' + ' - - ' + label)
# for line in asv_dev_protocol:
#     _, name, _, _, label = line.split(" ")
#     newDev.write('- ' + name +'.flac' + ' - - ' + label)
# for line in cs1:
#     name, label = line.split(" ")
# #     print('- ' + name + ' - - ' + label)
#     r = random.random()
#     if r < 0.90:
#         newTrain.write('- ' + name + '.wav' + ' - - ' + label)
#     else:
#         newDev.write('- ' + name + '.wav' + ' - - ' + label)
    
# cs1.close()
# asv_train_protocol.close()
# asv_dev_protocol.close()
# newTrain.close()
# newDev.close()

In [9]:
# import shutil
# import os

# asv_train_path = '/DATA/nfsshare/rishith/datasets/asvSpoof2019/DS_10283_3336/LA/ASVspoof2019_LA_train/flac/'
# asv_dev_path = '/DATA/nfsshare/rishith/datasets/asvSpoof2019/DS_10283_3336/LA/ASVspoof2019_LA_dev/flac/'

# cs1_train_path = '/DATA/nfsshare/rishith/datasets/smaller_dbs/restructured/combined/Train/'

# destination_path = './combinedSet3/'

# trainProtocol = open('./protocols/combinedSet3_train.txt', 'r')
# for line in trainProtocol:
#     _, name, _, _, _ = line.split(" ")
#     source_file = asv_train_path + name
#     destination_file = destination_path + name
#     if(os.path.isfile(source_file)):
#         shutil.copy(source_file, destination_file)
#     source_file = cs1_train_path + name
#     destination_file = destination_path + name 
#     if(os.path.isfile(source_file)):
#         shutil.copy(source_file, destination_file)
    
# devProtocol = open('./protocols/combinedSet3_dev.txt', 'r')
# for line in devProtocol:
#     _, name, _, _, _ = line.split(" ")
#     source_file = asv_dev_path + name 
#     destination_file = destination_path + name
#     if(os.path.isfile(source_file)):
#         shutil.copy(source_file, destination_file)
#     source_file = cs1_train_path + name
#     destination_file = destination_path + name
#     if(os.path.isfile(source_file)):
#         shutil.copy(source_file, destination_file)

        
# trainProtocol.close()
# devProtocol.close()

In [8]:
config["database_path"]

'/data/nfsshare/rishith/datasets/smaller_dbs/restructured/combinedSet3/'

In [9]:
output_dir = Path('./exp_result')
database_path = Path(config["database_path"])
dev_trial_path = Path('./protocols/combinedSet3_dev.txt')
# eval_trial_path = Path('/DATA/nfsshare/rishith/datasets/smaller_dbs/restructured/protocols/')

In [10]:
# define model related paths
model_tag = "{}_{}_ep{}_bs{}".format(
    'CS3',
    'AASIST-L',
    config["num_epochs"], config["batch_size"])
model_tag = output_dir / model_tag
model_save_path = model_tag / "weights"
eval_score_path = model_tag / config["eval_output"]
writer = SummaryWriter(model_tag)
os.makedirs(model_save_path, exist_ok=True)

In [11]:
def get_model(model_config: Dict, device: torch.device):
    """Define DNN model architecture"""
    module = import_module("models.{}".format(model_config["architecture"]))
    _model = getattr(module, "Model")
    model = _model(model_config).to(device)
    nb_params = sum([param.view(-1).size()[0] for param in model.parameters()])
    print("no. model params:{}".format(nb_params))

    return model

In [12]:
# set device
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device: {}".format(device))
if device == "cpu":
    raise ValueError("GPU not detected!")

# define model architecture
model = get_model(model_config, device)

Device: cuda
no. model params:85306


In [13]:
database_path

PosixPath('/data/nfsshare/rishith/datasets/smaller_dbs/restructured/combinedSet3')

In [14]:
"""Make PyTorch DataLoaders for train / developement / evaluation"""

trn_database_path = database_path 
dev_database_path = database_path 

trn_list_path = Path('./protocols/combinedSet3_train.txt')
dev_trial_path = './protocols/combinedSet3_dev.txt'
d_label_trn, file_train = genSpoof_list(dir_meta=trn_list_path,
                                        is_train=True,
                                        is_eval=False)
print("no. training files:", len(file_train))

train_set = Dataset_train_cs2(list_IDs=file_train,
                                       labels=d_label_trn,
                                       base_dir=trn_database_path)
gen = torch.Generator()
gen.manual_seed(1234)
trn_loader = DataLoader(train_set,
                        batch_size=config["batch_size"],
                        shuffle=True,
                        drop_last=True,
                        pin_memory=True,
                        worker_init_fn=seed_worker,
                        generator=gen)

_, file_dev = genSpoof_list(dir_meta=dev_trial_path,
                            is_train=False,
                            is_eval=False)
print("no. validation files:", len(file_dev))

dev_set = Dataset_cs2(list_IDs=file_dev,
                                        base_dir=dev_database_path)
dev_loader = DataLoader(dev_set,
                        batch_size=config["batch_size"],
                        shuffle=False,
                        drop_last=False,
                        pin_memory=True)


no. training files: 27848
no. validation files: 25146


In [15]:
eval_trial_path = Path('/data/nfsshare/rishith/datasets/asvSpoof2019/DS_10283_3336/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.eval.trl.txt')
eval_database_path = Path('/data/nfsshare/rishith/datasets/asvSpoof2019/DS_10283_3336/LA/ASVspoof2019_LA_eval/')

def genSpoof_list1(dir_meta, is_train=False, is_eval=False):

    d_meta = {}
    file_list = []
    with open(dir_meta, "r") as f:
        l_meta = f.readlines()

    if is_train:
        for line in l_meta:
            _, key, _, _, label = line.strip().split(" ")
            file_list.append(key)
            d_meta[key] = 1 if label == "bonafide" else 0
        return d_meta, file_list

    elif is_eval:
        for line in l_meta:
            _, key, _, _, _ = line.strip().split(" ")
            #key = line.strip()
            file_list.append(key)
        return file_list
    else:
        for line in l_meta:
            _, key, _, _, label = line.strip().split(" ")
            file_list.append(key)
            d_meta[key] = 1 if label == "bonafide" else 0
        return d_meta, file_list
    

In [16]:
file_eval = genSpoof_list1(dir_meta=eval_trial_path,
                          is_train=False,
                          is_eval=True)
eval_set = Dataset_ASVspoof2019_devNeval(list_IDs=file_eval,
                                         base_dir=eval_database_path)
eval_loader = DataLoader(eval_set,
                         batch_size=config["batch_size"],
                         shuffle=False,
                         drop_last=False,
                         pin_memory=True)

In [17]:
# get optimizer and scheduler
optim_config["steps_per_epoch"] = len(trn_loader)
optimizer, scheduler = create_optimizer(model.parameters(), optim_config)
optimizer_swa = SWA(optimizer)

best_dev_eer = 1.
best_eval_eer = 100.
best_dev_tdcf = 0.05
best_eval_tdcf = 1.
n_swa_update = 0  # number of snapshots of model to use in SWA
f_log = open(model_tag / "metric_log.txt", "a")
f_log.write("=" * 5 + "\n")

6

In [18]:
model_tag

PosixPath('exp_result/CS3_AASIST-L_ep100_bs24')

In [24]:
# make directory for metric logging
metric_path = model_tag / "metrics"
os.makedirs(metric_path, exist_ok=True)

In [19]:
def produce_evaluation_file(
    data_loader: DataLoader,
    model,
    device: torch.device,
    save_path: str,
    trial_path: str) -> None:
    """Perform evaluation and save the score to a file"""
    model.eval()
    with open(trial_path, "r") as f_trl:
        trial_lines = f_trl.readlines()
    fname_list = []
    score_list = []
    for batch_x, utt_id in data_loader:
        batch_x = batch_x.to(device)
        with torch.no_grad():
            _, batch_out = model(batch_x)
            batch_score = (batch_out[:, 1]).data.cpu().numpy().ravel()
        # add outputs
        fname_list.extend(utt_id)
        score_list.extend(batch_score.tolist())

    assert len(trial_lines) == len(fname_list) == len(score_list)
    with open(save_path, "w") as fh:
        for fn, sco, trl in zip(fname_list, score_list, trial_lines):
            _, utt_id, _, _, key = trl.strip().split(' ')
            assert fn == utt_id
            fh.write("{} {} {} \n".format(utt_id, key, sco))
    print("Scores saved to {}".format(save_path))


def train_epoch(
    trn_loader: DataLoader,
    model,
    optim: Union[torch.optim.SGD, torch.optim.Adam],
    device: torch.device,
    scheduler: torch.optim.lr_scheduler,
    config: argparse.Namespace):
    """Train the model for one epoch"""
    running_loss = 0
    num_total = 0.0
    ii = 0
    model.train()

    # set objective (Loss) functions
    weight = torch.FloatTensor([0.1, 0.9]).to(device)
    criterion = nn.CrossEntropyLoss(weight=weight)
    for batch_x, batch_y in trn_loader:
        batch_size = batch_x.size(0)
        num_total += batch_size
        ii += 1
        batch_x = batch_x.to(device)
        batch_y = batch_y.view(-1).type(torch.int64).to(device)
        _, batch_out = model(batch_x, Freq_aug=str_to_bool(config["freq_aug"]))
        batch_loss = criterion(batch_out, batch_y)
        running_loss += batch_loss.item() * batch_size
        optim.zero_grad()
        batch_loss.backward()
        optim.step()

        if config["optim_config"]["scheduler"] in ["cosine", "keras_decay"]:
            scheduler.step()
        elif scheduler is None:
            pass
        else:
            raise ValueError("scheduler error, got:{}".format(scheduler))

    running_loss /= num_total
    return running_loss

In [20]:
import numpy as np
from evaluation import compute_eer
def getEER(score_path):
    cm_data = np.genfromtxt(score_path, dtype=str)
    cm_keys = cm_data[:, 1]
    cm_scores = cm_data[:, 2].astype(float)
    bona_cm = cm_scores[cm_keys == 'bonafide']
    spoof_cm = cm_scores[cm_keys == 'spoof']
    eer_cm = compute_eer(bona_cm, spoof_cm)[0]
    return eer_cm * 100

In [21]:
!echo $CUDA_VISIBLE_DEVICES

0


In [29]:
# Training
for epoch in range(config["num_epochs"]):
    print("Start training epoch{:03d}".format(epoch))
    running_loss = train_epoch(trn_loader, model, optimizer, device,
                               scheduler, config)
    produce_evaluation_file(dev_loader, model, device,
                            metric_path/"dev_score.txt", dev_trial_path)
    dev_eer = getEER(metric_path/"dev_score.txt")
    print("DONE.\nLoss:{:.5f}, dev_eer: {:.3f}".format(
        running_loss, dev_eer))
    writer.add_scalar("loss", running_loss, epoch)
    writer.add_scalar("dev_eer", dev_eer, epoch)

    if best_dev_eer >= dev_eer:
        print("best model find at epoch", epoch)
        best_dev_eer = dev_eer
        torch.save(model.state_dict(),
                   model_save_path / "epoch_{}_{:03.3f}.pth".format(epoch, dev_eer))

        # do evaluation whenever best model is renewed
        if str_to_bool(config["eval_all_best"]):
            produce_evaluation_file(eval_loader, model, device,
                                    eval_score_path, eval_trial_path)
            dev_eer = getEER(metric_path/"dev_score.txt")
            eval_eer = getEER(eval_score_path)

            log_text = "epoch{:03d}, ".format(epoch)
            if eval_eer < best_eval_eer:
                log_text += "best eer, {:.4f}%".format(eval_eer)
                best_eval_eer = eval_eer
                torch.save(model.state_dict(),
                           model_save_path / "best.pth")
            if len(log_text) > 0:
                print(log_text)
                f_log.write(log_text + "\n")

        print("Saving epoch {} for swa".format(epoch))
        optimizer_swa.update_swa()
        n_swa_update += 1
    writer.add_scalar("best_dev_eer", best_dev_eer, epoch)
    writer.add_scalar("best_dev_tdcf", best_dev_tdcf, epoch)

Start training epoch000


OutOfMemoryError: CUDA out of memory. Tried to allocate 1.48 GiB (GPU 0; 10.76 GiB total capacity; 8.45 GiB already allocated; 383.06 MiB free; 9.36 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

# Testing best model after 100 epochs

In [22]:
eval_eer = getEER('./exp_result/CS3_AASIST-L_ep100_bs24/eval_scores_cs3_cs3.txt')
print(eval_eer)

1.5775338329330904


In [23]:
config["model_path"] = './exp_result/CS3_AASIST-L_ep100_bs24/weights/best.pth'

In [24]:
model.load_state_dict(
            torch.load(config["model_path"], map_location=device))

<All keys matched successfully>

In [25]:
print("Model loaded : {}".format(config["model_path"]))

Model loaded : ./exp_result/CS3_AASIST-L_ep100_bs24/weights/best.pth


# LJ

In [26]:
# list of dataset partitions
SET_PARTITION = ["trn", "eval"]

# list of countermeasure(CM) protocols
SET_CM_PROTOCOL = {
    "trn": "/data/nfsshare/rishith/datasets/smaller_dbs/restructured/protocols/lj_train_protocol.txt",
    "eval": "/data/nfsshare/rishith/datasets/smaller_dbs/restructured/protocols/lj_test_protocol.txt",
}

# directories of each dataset partition
SET_DIR = {
    "trn": "/data/nfsshare/rishith/datasets/smaller_dbs/restructured/LJ/train/",
    "eval": "/data/nfsshare/rishith/datasets/smaller_dbs/restructured/LJ/test/",
}

In [27]:
# Dataloader
meta_lines = open(SET_CM_PROTOCOL["eval"], "r").readlines()
utt_list = []
for line in meta_lines:
    tmp = line.strip().split(" ")

    utt = tmp[0]
    utt_list.append(utt)

base_dir = SET_DIR['eval']
dataset = Dataset_PrevDbs(utt_list, Path(base_dir))
lj_loader = DataLoader(
        dataset, batch_size=24, shuffle=False, drop_last=False, pin_memory=True
    )

In [28]:
model.eval()

Model(
  (conv_time): CONV()
  (first_bn): BatchNorm2d(1, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (drop): Dropout(p=0.5, inplace=True)
  (drop_way): Dropout(p=0.2, inplace=True)
  (selu): SELU(inplace=True)
  (encoder): Sequential(
    (0): Sequential(
      (0): Residual_block(
        (conv1): Conv2d(1, 32, kernel_size=(2, 3), stride=(1, 1), padding=(1, 1))
        (selu): SELU(inplace=True)
        (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(32, 32, kernel_size=(2, 3), stride=(1, 1), padding=(0, 1))
        (conv_downsample): Conv2d(1, 32, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1))
        (mp): MaxPool2d(kernel_size=(1, 3), stride=(1, 3), padding=0, dilation=1, ceil_mode=False)
      )
    )
    (1): Sequential(
      (0): Residual_block(
        (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv1): Conv2d(32, 32, kernel_size=(2, 3),

In [29]:
eval_score_path = Path('exp_result/CS3_AASIST-L_ep100_bs24/eval_scores_cs3_lj.txt')

In [30]:
trial_path = SET_CM_PROTOCOL['eval']
with open(trial_path, "r") as f_trl:
    trial_lines = f_trl.readlines()
fname_list = []
score_list = []
for batch_x, utt_id in lj_loader:
    batch_x = batch_x.to(device)
    with torch.no_grad():
        _, batch_out = model(batch_x)
        batch_score = (batch_out[:, 1]).data.cpu().numpy().ravel()
    # add outputs
    fname_list.extend(utt_id)
    score_list.extend(batch_score.tolist())   
assert len(trial_lines) == len(fname_list) == len(score_list)
with open(eval_score_path, "w") as fh:
    for fn, sco, trl in zip(fname_list, score_list, trial_lines):
        utt_id, key = trl.strip().split(' ')
        assert fn == utt_id
        fh.write("{} {} {}\n".format(utt_id, key, sco))

In [31]:
eer_lj = getEER(eval_score_path)
print('LJ EER: ', eer_lj)

LJ EER:  2.0408163265306123


# Libri

In [33]:
# list of dataset partitions
SET_PARTITION = ["trn", "eval"]

# list of countermeasure(CM) protocols
SET_CM_PROTOCOL = {
    "trn": "/data/nfsshare/rishith/datasets/smaller_dbs/restructured/protocols/libri_train_protocol.txt",
    "eval": "/data/nfsshare/rishith/datasets/smaller_dbs/restructured/protocols/libri_test_protocol.txt",
}

# directories of each dataset partition
SET_DIR = {
    "trn": "/data/nfsshare/rishith/datasets/smaller_dbs/restructured/Libri/train/",
    "eval": "/data/nfsshare/rishith/datasets/smaller_dbs/restructured/Libri/test/",
}

In [34]:
# Dataloader
meta_lines = open(SET_CM_PROTOCOL["eval"], "r").readlines()
utt_list = []
for line in meta_lines:
    tmp = line.strip().split(" ")

    utt = tmp[0]
    utt_list.append(utt)

base_dir = SET_DIR['eval']
dataset = Dataset_PrevDbs(utt_list, Path(base_dir))
libri_loader = DataLoader(
        dataset, batch_size=24, shuffle=False, drop_last=False, pin_memory=True
    )

In [35]:
eval_score_path = Path('exp_result/CS3_AASIST-L_ep100_bs24/eval_scores_cs3_libri')

In [36]:
trial_path = SET_CM_PROTOCOL['eval']
with open(trial_path, "r") as f_trl:
    trial_lines = f_trl.readlines()
fname_list = []
score_list = []
for batch_x, utt_id in libri_loader:
    batch_x = batch_x.to(device)
    with torch.no_grad():
        _, batch_out = model(batch_x)
        batch_score = (batch_out[:, 1]).data.cpu().numpy().ravel()
    # add outputs
    fname_list.extend(utt_id)
    score_list.extend(batch_score.tolist())   
assert len(trial_lines) == len(fname_list) == len(score_list)
with open(eval_score_path, "w") as fh:
    for fn, sco, trl in zip(fname_list, score_list, trial_lines):
        utt_id, key = trl.strip().split(' ')
        assert fn == utt_id
        fh.write("{} {} {}\n".format(utt_id, key, sco))

In [37]:
eer_libri = getEER(eval_score_path)
print('Libri EER: ', eer_libri)

Libri EER:  4.761904761904762


# CMU

In [39]:
# list of dataset partitions
SET_PARTITION = ["trn", "eval"]

# list of countermeasure(CM) protocols
SET_CM_PROTOCOL = {
    "trn": "/data/nfsshare/rishith/datasets/smaller_dbs/restructured/protocols/cmu_train_protocol.txt",
    "eval": "/data/nfsshare/rishith/datasets/smaller_dbs/restructured/protocols/cmu_test_protocol.txt",
}

# directories of each dataset partition
SET_DIR = {
    "trn": "/data/nfsshare/rishith/datasets/smaller_dbs/restructured/CMU/train/",
    "eval": "/data/nfsshare/rishith/datasets/smaller_dbs/restructured/CMU/test/",
}

In [40]:
# Dataloader
meta_lines = open(SET_CM_PROTOCOL["eval"], "r").readlines()
utt_list = []
for line in meta_lines:
    tmp = line.strip().split(" ")

    utt = tmp[0]
    utt_list.append(utt)

base_dir = SET_DIR['eval']
dataset = Dataset_PrevDbs(utt_list, Path(base_dir))
cmu_loader = DataLoader(
        dataset, batch_size=24, shuffle=False, drop_last=False, pin_memory=True
    )

In [41]:
eval_score_path = Path('exp_result/CS3_AASIST-L_ep100_bs24/eval_scores_cs3_cmu.txt')

In [42]:
trial_path = SET_CM_PROTOCOL['eval']
with open(trial_path, "r") as f_trl:
    trial_lines = f_trl.readlines()
fname_list = []
score_list = []
for batch_x, utt_id in cmu_loader:
    batch_x = batch_x.to(device)
    with torch.no_grad():
        _, batch_out = model(batch_x)
        batch_score = (batch_out[:, 1]).data.cpu().numpy().ravel()
    # add outputs
    fname_list.extend(utt_id)
    score_list.extend(batch_score.tolist())   
assert len(trial_lines) == len(fname_list) == len(score_list)
with open(eval_score_path, "w") as fh:
    for fn, sco, trl in zip(fname_list, score_list, trial_lines):
        utt_id, key = trl.strip().split(' ')
        assert fn == utt_id
        fh.write("{} {} {}\n".format(utt_id, key, sco))

In [43]:
eer_cmu = getEER(eval_score_path)
print('CMU EER: ', eer_cmu)

CMU EER:  0.0


# ASVspoof 2019

In [47]:
eval_trial_path = Path('/data/nfsshare/rishith/datasets/asvSpoof2019/DS_10283_3336/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.eval.trl.txt')
eval_database_path = Path('/data/nfsshare/rishith/datasets/asvSpoof2019/DS_10283_3336/LA/ASVspoof2019_LA_eval/')

In [48]:
def genSpoof_list1(dir_meta, is_train=False, is_eval=False):

    d_meta = {}
    file_list = []
    with open(dir_meta, "r") as f:
        l_meta = f.readlines()

    if is_train:
        for line in l_meta:
            _, key, _, _, label = line.strip().split(" ")
            file_list.append(key)
            d_meta[key] = 1 if label == "bonafide" else 0
        return d_meta, file_list

    elif is_eval:
        for line in l_meta:
            _, key, _, _, _ = line.strip().split(" ")
            #key = line.strip()
            file_list.append(key)
        return file_list
    else:
        for line in l_meta:
            _, key, _, _, label = line.strip().split(" ")
            file_list.append(key)
            d_meta[key] = 1 if label == "bonafide" else 0
        return d_meta, file_list

In [49]:
file_eval = genSpoof_list1(dir_meta=eval_trial_path,
                          is_train=False,
                          is_eval=True)
eval_set = Dataset_ASVspoof2019_devNeval(list_IDs=file_eval,
                                         base_dir=eval_database_path)
eval_loader = DataLoader(eval_set,
                         batch_size=config["batch_size"],
                         shuffle=False,
                         drop_last=False,
                         pin_memory=True)

In [50]:
eval_set[0]

(tensor([0.0009, 0.0008, 0.0007,  ..., 0.1776, 0.1562, 0.1233]),
 'LA_E_2834763')

In [51]:
eval_score_path = Path('exp_result/CS3_AASIST-L_ep100_bs24/eval_scores_cs3_asv')

In [52]:
trial_path = eval_trial_path
with open(trial_path, "r") as f_trl:
    trial_lines = f_trl.readlines()
fname_list = []
score_list = []
for batch_x, utt_id in eval_loader:
    batch_x = batch_x.to(device)
    with torch.no_grad():
        _, batch_out = model(batch_x)
        batch_score = (batch_out[:, 1]).data.cpu().numpy().ravel()
    # add outputs
    fname_list.extend(utt_id)
    score_list.extend(batch_score.tolist())   
assert len(trial_lines) == len(fname_list) == len(score_list)

with open(eval_score_path, "w") as fh:
    for fn, sco, trl in zip(fname_list, score_list, trial_lines):
        _, utt_id, _, src, key = trl.strip().split(' ')
        assert fn == utt_id
        fh.write("{} {} {} {}\n".format(utt_id, src, key, sco))
print("Scores saved to {}".format(eval_score_path))

Scores saved to exp_result/CS3_AASIST-L_ep100_bs24/eval_scores_cs3_asv


In [53]:
len(trial_lines) == len(fname_list) == len(score_list)

True

In [54]:
def getEER1(score_path):
    cm_data = np.genfromtxt(score_path, dtype=str)
    cm_keys = cm_data[:, 2]
    cm_scores = cm_data[:, 3].astype(float)
    bona_cm = cm_scores[cm_keys == 'bonafide']
    spoof_cm = cm_scores[cm_keys == 'spoof']
    eer_cm = compute_eer(bona_cm, spoof_cm)[0]
    return eer_cm * 100

In [55]:
eer_asv = getEER1(eval_score_path)
print('ASV EER: ', eer_asv)

ASV EER:  1.729799510743923


In [56]:
def produce_evaluation_file(
    data_loader: DataLoader,
    model,
    device: torch.device,
    save_path: str,
    trial_path: str) -> None:
    """Perform evaluation and save the score to a file"""
    model.eval()
    with open(trial_path, "r") as f_trl:
        trial_lines = f_trl.readlines()
    fname_list = []
    score_list = []
    for batch_x, utt_id in data_loader:
        batch_x = batch_x.to(device)
        with torch.no_grad():
            _, batch_out = model(batch_x)
            batch_score = (batch_out[:, 1]).data.cpu().numpy().ravel()
        # add outputs
        fname_list.extend(utt_id)
        score_list.extend(batch_score.tolist())

    assert len(trial_lines) == len(fname_list) == len(score_list)
    with open(save_path, "w") as fh:
        for fn, sco, trl in zip(fname_list, score_list, trial_lines):
            _, utt_id, _, src, key = trl.strip().split(' ')
            assert fn == utt_id
            fh.write("{} {} {} {}\n".format(utt_id, src, key, sco))
    print("Scores saved to {}".format(save_path))

In [57]:
produce_evaluation_file(eval_loader, model, device, eval_score_path,
                            eval_trial_path)

Scores saved to exp_result/CS3_AASIST-L_ep100_bs24/eval_scores_cs3_asv


In [58]:
eer_asv = getEER1(eval_score_path)
print('ASV EER: ', eer_asv)

ASV EER:  1.729799510743923
