In [1]:
import os
import numpy as np
import pandas as pd

import torch
import torch.nn.functional as F

In [2]:
path_embd_test = '/cas/DeepLearn/elperu/tmp/speech_datasets/LibriSpeech/embeddings_10/test_samples.csv'
path_embd_enroll =  '/cas/DeepLearn/elperu/tmp/speech_datasets/LibriSpeech/embeddings_10/enroll_samples.csv'

In [3]:
def load_enroll(path):
    X = {}
    df = pd.read_csv(path)
    
    for idx, row in df.iterrows():
        path = row['_path']
        spk = row['spk_id']
        th_embd = torch.load(path)        
        X[spk] = th_embd
        
    return X

def load_test(path):
    X = []
    df = pd.read_csv(path)
    
    for idx, row in df.iterrows():
        path = row['_path']
        spk = row['spk_id']
        th_embd = torch.load(path)        
        X.append((th_embd, spk))
        
    return X

In [4]:
#test_embd = load_embeddings(path_embd_test)
enroll_embd = load_enroll(path_embd_enroll)
users = list(enroll_embd.keys())

In [5]:
test_embd = load_test(path_embd_test)

In [10]:
track_matrices = {}
acc_cosine = 0
acc_mse = 0

DB_1_rows = [] #= pd.DataFrame(columns=['true', 'pred', 'cosine'])
DB_2_rows = [] #= pd.DataFrame(columns=['true', 'pred', 'mse'])


metrics = {'TP':0, 'FP':0, 'FN': 0, 'TN': 0}
for to_test, test_user in test_embd:
    
    max_score = -10**8
    min_score = 10**8
    best_spk = None
    
    scores = []
    for enroll_user in users:
        reference = enroll_embd[enroll_user]

        score = F.cosine_similarity(reference, to_test)
        score = score[0].data.cpu().numpy()
        
        mse = ((to_test - reference) ** 2).sum().numpy()
        
        #print(enroll_user, score)

        if score > max_score:
            best_spk = enroll_user
            max_score = score
            
        if mse < min_score:
            best_mse = enroll_user
            min_score = mse
            #print('Best user updated!')
        
        
        
    #message = f'True user: {test_user}, Claimed user: {enroll_user}, score: {score %.2f}'
    #print(message)
    
    if test_user == best_spk:
        acc_cosine += 1
        
    if test_user == best_mse:
        acc_mse += 1
        
    DB_1_rows.append([test_user, best_spk, max_score])
    DB_2_rows.append([test_user, best_mse, min_score])
        
        
DB_cosine = pd.DataFrame(DB_1_rows, columns=['true', 'pred', 'score'])
DB_mse = pd.DataFrame(DB_2_rows, columns=['true', 'pred', 'score'])

In [11]:
def analyze(db):
    acc = len(db.query('true == pred')) / len(db)
    
    avg_correct =  db.query('true == pred')['score'].mean()
    avg_wrong = db.query('true != pred')['score'].mean()
    
    print(f'Accuracy: {acc}')
    print(f'Average score CORRECT: {avg_correct}')
    print(f'Average score WRONG: {avg_wrong}')
    
    idx_wrong = np.where(db['pred'] != db['true'])
    
    return idx_wrong

In [12]:
print('== Cosine ==\n')
idx_c = analyze(DB_cosine)

== Cosine ==

Accuracy: 0.8958974358974359
Average score CORRECT: 0.9939929807124713
Average score WRONG: 0.9866936594394627


In [13]:
print('== MSE ==\n')
idx_m = analyze(DB_mse)

== MSE ==

Accuracy: 0.8958974358974359
Average score CORRECT: 1.2014076307151544
Average score WRONG: 2.661263000788947
