In [2]:
import os
import plotly.express as px
import plotly.io as pio
import pandas as pd

if os.path.basename(os.getcwd()) != 'in-the-wild-verification':
    os.chdir(os.path.dirname(os.getcwd()))

print(os.getcwd())

/Users/Bianka/Desktop/szkoła/biometria/in-the-wild-verification


In [None]:
def plot_line(x, y, x_label, y_label):
    fig = px.line(x = x, y = y,
              labels={'x':x_label,
                'y': y_label,
                },)
    fig.update_layout(height=400, width=800, showlegend=False, font=dict(family="CMU Serif",size=14))
    config = {
    'toImageButtonOptions': {
        'format': 'png', # one of png, svg, jpeg, webp
        'height': 600, 'width': 900, 'scale':6
    }}
    
    fig.show(config=config)
    # pio.write_image(fig, f"images/{name}.png", scale=6, width=900, height=500)

In [None]:
results = pd.read_csv("speech_id_checkpoint/Stats", sep='\t', index_col=False)

results.head()

In [None]:
tloss = results['Total loss'].tolist()
tloss_epoch = [sum(tloss[i:i+6])/6 for i in range(0,len(tloss),6)][:-1]
epochs = range(0, int((len(results)-1)/6))

plot_line(epochs, tloss_epoch, 'Epoch', 'Total loss')

In [3]:
from speech_embedder_net import SpeechEmbedder
from speech_model.utils import mfccs_and_spec
import torch

class SpekerVerifier:
    """Class for loading the model and computing speech embeddings"""
    def __init__(
            self,
            model_path: str = 'speech_id_checkpoint/ckpt_epoch_180_batch_id_191.pth'
            ) -> None:
        self.model = SpeechEmbedder()
        self.model.load_state_dict(torch.load(model_path))
        self.model.eval()
        pass

    def compute_embedding(self, audio_input):
        _, mel_db, _ = mfccs_and_spec(audio_input, wav_process = True)
        mel_db = torch.Tensor(mel_db)
        enrollment_batch = torch.reshape(mel_db, (1, mel_db.size(0), mel_db.size(1)))
        embedding = self.model(torch.Tensor(enrollment_batch))
        return embedding

In [4]:
sample_file = 'split_data/train/id10001/00001.wav'
sample_file_same = 'split_data/train/id10001/00002.wav'
sample_file_diff = 'split_data/train/id10006/00002.wav'

model = SpekerVerifier()
embedding_org = model.compute_embedding(sample_file)
embedding_same = model.compute_embedding(sample_file_same)
embedding_diff = model.compute_embedding(sample_file_diff)

In [5]:
import torch.nn.functional as F

F.cosine_similarity(embedding_org, embedding_same)

tensor([0.8931], grad_fn=<SumBackward1>)

In [6]:
F.cosine_similarity(embedding_org, embedding_diff)

tensor([0.7422], grad_fn=<SumBackward1>)