In [None]:
%cd ..

In [None]:
import os

import seaborn as sns

import matplotlib
from matplotlib import pyplot as plt

from sklearn.manifold import TSNE

from scripts import extract_speaker_embeddings as ese

In [None]:
sns.set_context("talk")
sns.set_style("white")

In [None]:
def load_paths(base_path, filelist_path):
    def parse_line(line):
        f, s = line.split()
        return s, f
    with open(filelist_path, "r") as f:
        lines = [parse_line(line) for line in f.readlines()]
    return [os.path.join(base_path, s, f + ".wav") for s, f in lines]    

In [None]:
parser = ese.get_arg_parser()
args = parser.parse_args("--dataset grid".split())

## Experiments on seen speakers

In [None]:
paths = load_paths("output/synth-samples/grid-multi-test-magnus", "data/grid/filelists/multi-speaker-test.txt")
feats = ese.extract_features(paths, args)

In [None]:
subjects = [os.path.dirname(path) for path in paths]
num_subjects = len(set(subjects))
print(num_subjects)

In [None]:
tsne = TSNE()
Y = tsne.fit_transform(feats)

In [None]:
plt.figure(figsize=(4, 4))
sns.set_palette("hls", num_subjects)

for i, s in enumerate(set(subjects)):
    idxs = [s == t for t in subjects]
    plt.scatter(Y[idxs, 0], Y[idxs, 1])
    
plt.xticks([])
plt.yticks([])

plt.savefig("/home/doneata/papers/2020-interspeech-xts/imgs/speaker-embeddings-baseline-seen.pdf", bbox_inches='tight')

## Experiments on seen speakers – fixed embedding
Code to run to prepare data:
```bash
python predict.py -m magnus-multi-speaker --model-path output/models/grid_multi-speaker_magnus-multi-speaker.pth -d grid --filelist multi-speaker -v -e mean -o output/predictions/grid-multi-test-magnus-multi-speaker-mean-emb.npz
python synthesize_spectro.py ~/work/xts/output/predictions/grid-multi-test-magnus-multi-speaker-mean-emb.npz
```

In [None]:
paths = load_paths("output/synth-samples/grid-multi-test-magnus-multi-speaker-mean-emb", "data/grid/filelists/multi-speaker-test.txt")
feats = ese.extract_features(paths, args)

In [None]:
subjects = [os.path.dirname(path) for path in paths]
num_subjects = len(set(subjects))
print(num_subjects)

In [None]:
tsne = TSNE()
Y = tsne.fit_transform(feats)

In [None]:
plt.figure(figsize=(4, 4))
sns.set_palette("hls", num_subjects)

for i, s in enumerate(set(subjects)):
    idxs = [s == t for t in subjects]
    plt.scatter(Y[idxs, 0], Y[idxs, 1])
    
plt.xticks([])
plt.yticks([])

plt.savefig("/home/doneata/papers/2020-interspeech-xts/imgs/speaker-embeddings-baseline-seen-fixed-emb.pdf", bbox_inches='tight')

## Experiments on unseen speakers

In [None]:
paths = load_paths("output/synth-samples/unseen-k-test-magnus/", "data/grid/filelists/unseen-k-test.txt")
paths = paths[::10]
feats = ese.extract_features(paths, args)

In [None]:
subjects = [os.path.dirname(path) for path in paths]
num_subjects = len(set(subjects))
print(num_subjects)

In [None]:
tsne = TSNE()
Y = tsne.fit_transform(feats)

In [None]:
plt.figure(figsize=(4, 4))
sns.set_palette("hls", num_subjects)

for i, s in enumerate(set(subjects)):
    idxs = [s == t for t in subjects]
    plt.scatter(Y[idxs, 0], Y[idxs, 1])
    
plt.xticks([])
plt.yticks([])

plt.savefig("/home/doneata/papers/2020-interspeech-xts/imgs/speaker-embeddings-baseline-unseen.pdf", bbox_inches='tight')