For the speaker embedding models (SE and SE + D) we project the speaker embeddings to a lower manifold (32 dimensional) using a linear projection.
In this notebook, we investigate whether the speaker information is preserved after this projection
by looking at the 2-dimensional $t$-SNE projection of the speaker embeddings.

In [None]:
%cd ..

In [None]:
import random

from sklearn.manifold import TSNE

from matplotlib import pyplot as plt

import numpy as np

import seaborn as sns

import torch

from train import (
    DATASET_PARAMETERS,
    MODELS,
)

In [None]:
def plot_proj_emb(embeddings_path, model_path):
    model = MODELS["bjorn"](DATASET_PARAMETERS["grid"])
    model.load_state_dict(torch.load(model_path))
    _ = model.eval()

    data_emb = np.load(embeddings_path)
    feats = data_emb["feats"]
    speakers = [i.split()[1] for i in data_emb["ids"]]
    
    num_feats = len(feats)
    max_feats = 5_000
    
    if num_feats > max_feats:
        idxs = random.sample(list(range(num_feats)), max_feats)
        feats = feats[idxs]
        speakers = [speakers[i] for i in idxs]

    feats = torch.tensor(feats)
    feats = feats.float()
    feats = model.linear(feats)

    X = feats.detach().numpy()
    tsne = TSNE()
    Y = tsne.fit_transform(X)

    speakers_uniq = sorted(set(speakers))
    sns.set_palette("hls", len(speakers_uniq))

    for s in speakers_uniq:
        idxs = [s == t for t in speakers]
        plt.scatter(Y[idxs, 0], Y[idxs, 1], label=s)

    plt.legend(loc="center left", bbox_to_anchor=(1, 0.5))

In [None]:
get_embeddings_path = lambda split: f"data/grid/speaker-embeddings/multi-speaker-{split}.npz"
get_model_path = lambda name: f"output/models/grid_multi-speaker_{name}_best.pth"
for m in "bjorn bjorn_dispel".split():
    for s in "train valid test".split():
        plt.figure()
        plot_proj_emb(get_embeddings_path(s), get_model_path(m))
        plt.title(m + " " + s)