In [1]:
# source: https://stackoverflow.com/questions/28339746/equal-error-rate-in-python
from scipy.optimize import brentq
from scipy.interpolate import interp1d
from sklearn.metrics import roc_curve

y = [1, 1, 0, 0, 1]
y_score = [0.3, 0.1, 0.4, 0.8, 0.9]
def cal_eer(y, y_score):
    fpr, tpr, thresholds = roc_curve(y, y_score, pos_label=1)
    eer = brentq(lambda x : 1. - x - interp1d(fpr, tpr)(x), 0., 1.)
    thresh = interp1d(fpr, thresholds)(eer)
    return eer, thresh

print(cal_eer(y, y_score))

(0.6666666666666665, array(0.56666667))


In [2]:
# # NOTE: Uncomment to convert m4a to wav with respect to the directory structure

# # converting m4a to wav and saving to new directory
# from pydub import AudioSegment
# from pathlib import Path
# from tqdm.auto import tqdm

# audio_path = Path("../../data/kb_data_clean_m4a/hindi/valid/audio")
# wav_path = Path("../../data/kb_data_clean_m4a/hindi/valid/wav").mkdir(parents=True, exist_ok=True)

# # print(list(audio_path.glob("*.m4a"))[:5])
# all_m4a_files = list(audio_path.glob("*.m4a"))
# for audio_file in tqdm(all_m4a_files):
#     audio = AudioSegment.from_file(str(audio_file))
#     audio.export("../../data/kb_data_clean_m4a/hindi/valid/wav/"+str(audio_file).split("/")[-1].replace('m4a','wav'), format='wav')

In [3]:
# loading dataframes 
import pandas as pd
df_voxceleb = pd.read_csv("/DATA1/bikash_dutta/CS/SP/A2/UniSpeech/downstreams/speaker_verification/verification_scores_vox_batch_128.csv")
df_voxceleb.head()

Unnamed: 0,label,person1,person2,ecapa_tdnn,hubert_large,wavlm_base_plus,wavlm_large
0,1,id10001/Y8hIVOBuels/00001.wav,id10001/utrA-v8pPm4/00001.wav,0.958375,0.990806,0.986654,0.991306
1,0,id10001/Y8hIVOBuels/00001.wav,id10341/rX4LkvzySSM/00014.wav,0.98657,0.992295,0.987697,0.991324
2,1,id10001/Y8hIVOBuels/00001.wav,id10001/zELwAz2W6hM/00010.wav,0.983272,0.99181,0.986436,0.989566
3,0,id10001/Y8hIVOBuels/00001.wav,id10341/5DAommAsxmE/00007.wav,0.990401,0.990836,0.985041,0.989352
4,1,id10001/Y8hIVOBuels/00002.wav,id10001/zELwAz2W6hM/00005.wav,0.988819,0.993271,0.989406,0.9922


In [4]:
# calculating EER model wise
labels = df_voxceleb['label']
scores_model_dict = {
    "ecapa_tdnn": df_voxceleb['ecapa_tdnn'],
    "hubert_large": df_voxceleb['hubert_large'],
    "wavlm_base_plus": df_voxceleb['wavlm_base_plus'],
    "wavlm_large": df_voxceleb['wavlm_large'],
}

for model, scores in scores_model_dict.items():
    # converting scores (-1,1) -> (0,1)
    scores = (scores + 1) / 2
    eer, thresh = cal_eer(labels, scores)
    print(f"Model: {model}, EER: {eer}")

Model: ecapa_tdnn, EER: 0.41272242894583616
Model: hubert_large, EER: 0.4788619852968679
Model: wavlm_base_plus, EER: 0.4765510117467778
Model: wavlm_large, EER: 0.4735674803044982


In [5]:
# loading dataframes 
import pandas as pd
df_hindi = pd.read_csv("/DATA1/bikash_dutta/CS/SP/A2/UniSpeech/downstreams/speaker_verification/verification_scores_hindi_batch_128.csv")
df_hindi.head()

Unnamed: 0,label,person1,person2,ecapa_tdnn,hubert_large,wavlm_base_plus,wavlm_large
0,0,/DATA1/bikash_dutta/CS/SP/A2/UniSpeech/data/kb...,/DATA1/bikash_dutta/CS/SP/A2/UniSpeech/data/kb...,0.982102,0.975679,0.979226,0.982783
1,1,/DATA1/bikash_dutta/CS/SP/A2/UniSpeech/data/kb...,/DATA1/bikash_dutta/CS/SP/A2/UniSpeech/data/kb...,0.993843,0.986117,0.984444,0.984283
2,1,/DATA1/bikash_dutta/CS/SP/A2/UniSpeech/data/kb...,/DATA1/bikash_dutta/CS/SP/A2/UniSpeech/data/kb...,0.98932,0.988016,0.986069,0.983059
3,0,/DATA1/bikash_dutta/CS/SP/A2/UniSpeech/data/kb...,/DATA1/bikash_dutta/CS/SP/A2/UniSpeech/data/kb...,0.979724,0.984716,0.986421,0.985397
4,1,/DATA1/bikash_dutta/CS/SP/A2/UniSpeech/data/kb...,/DATA1/bikash_dutta/CS/SP/A2/UniSpeech/data/kb...,0.901027,0.97238,0.978996,0.971999


In [6]:
# calculating EER model wise
labels = df_hindi['label']
scores_model_dict = {
    "ecapa_tdnn": df_hindi['ecapa_tdnn'],
    "hubert_large": df_hindi['hubert_large'],
    "wavlm_base_plus": df_hindi['wavlm_base_plus'],
    "wavlm_large": df_hindi['wavlm_large'],
}

for model, scores in scores_model_dict.items():
    # converting scores (-1,1) -> (0,1)
    scores = (scores + 1) / 2
    eer, thresh = cal_eer(labels, scores)
    print(f"Model: {model}, EER: {eer}")

Model: ecapa_tdnn, EER: 0.363457681508223
Model: hubert_large, EER: 0.38815317112192127
Model: wavlm_base_plus, EER: 0.42317510969336003
Model: wavlm_large, EER: 0.41407942238246065


In [7]:
from torch.utils.data import Dataset
from torchaudio.sox_effects import apply_effects_file
import os

EFFECTS = [
# ["channels", "1"],
# ["rate", "16000"],
["gain", "-3.0"],
["silence", "1", "0.1", "0.1%", "-1", "0.1", "0.1%"],
]

class SpeakerVerifi_test(Dataset):
    def __init__(self, vad_config, file_path, meta_data):
        self.root = file_path
        self.meta_data = meta_data
        self.necessary_dict = self.processing()
        self.vad_c = vad_config 
        self.dataset = self.necessary_dict['pair_table'] 
        
    def processing(self):
        pair_table = []
        with open(self.meta_data, "r") as f:
            usage_list = f.readlines()
        for pair in usage_list:
            list_pair = pair.split()
            pair_1= os.path.join(self.root, list_pair[1].split("/")[-1])
            pair_2= os.path.join(self.root, list_pair[2].split("/")[-1])
            one_pair = [list_pair[0],pair_1,pair_2 ]
            pair_table.append(one_pair)
        # print(f"printing pair_table: {pair_table[:2]}") # NOTE: testing purpose only
        return {
            "spk_paths": None,
            "total_spk_num": None,
            "pair_table": pair_table
        }

    def __len__(self):
        return len(self.necessary_dict['pair_table'])

    def __getitem__(self, idx):
        y_label, x1_path, x2_path = self.dataset[idx]
        def path2name(path):
            return path#Path("-".join((Path(path).parts)[-3:])).stem

        x1_name = path2name(x1_path)
        x2_name = path2name(x2_path)

        wav1, _ = apply_effects_file(x1_path, EFFECTS)
        wav2, _ = apply_effects_file(x2_path, EFFECTS)

        wav1 = wav1.squeeze(0)
        wav2 = wav2.squeeze(0)

        
        return wav1.numpy(), wav2.numpy(), x1_name, x2_name, int(y_label[0])

    def collate_fn(self, data_sample):
        wavs1, wavs2, x1_names, x2_names, ylabels = zip(*data_sample)
        all_wavs = wavs1 + wavs2
        all_names = x1_names + x2_names
        return all_wavs, all_names, ylabels

################################################################################
###          (please add 'export KALDI_ROOT=<your_path>' in your $HOME/.profile)
###          (or run as: KALDI_ROOT=<your_path> python <your_script>.py)
################################################################################



In [8]:
hindi_dataset = SpeakerVerifi_test(file_path="/DATA1/bikash_dutta/CS/SP/A2/UniSpeech/data/kb_data_clean_m4a/hindi/valid/wav", meta_data="/DATA1/bikash_dutta/CS/SP/A2/UniSpeech/data/kb_data_clean_m4a/meta_data/hindi/valid_data.txt", vad_config=None)