In [None]:
#| eval: false

# !pip install pytorch_lightning

In [None]:
#| eval: false

import logging
from collections import OrderedDict

import pytorch_lightning as pl
from pytorch_lightning import LightningModule
from pytorch_lightning import Trainer
from torch import optim
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data.distributed import DistributedSampler
# -

import numpy as np
np.random.seed(0)



  warn(f"Failed to load image Python extension: {e}")


In [None]:
#| eval: false

from tqdm import tqdm
from glob import glob
from random import choices, seed
seed(1991)
N = 100000

class TableDistanceDataset(torch.utils.data.Dataset):
    def __init__(self, embeddings_files, labels):
        self.X1 = []
        self.X2 = []
        self.dist = []
        files = {}
        for e in embeddings_files:
            word = e.split('/')[-2]
            files[word] = files.get(word, []) + [e]
        n_w = len(files.keys())
        for _ in tqdm(range(N)):
            c = choices(list(files.keys()), k=1)[0]
            chosen_file_x1 = choices(files[c], k=1)[0]
            weights = [0.5/(n_w-1) if k != c else 0.5 for k in files.keys()]
            chosen_word = choices(list(files.keys()), k=1, weights=weights)[0]
            chosen_file_x2 = choices(files[chosen_word], k=1)[0]
            self.X1.append(chosen_file_x1)
            self.X2.append(chosen_file_x2)
            self.dist.append(0 if c == chosen_word else 1)
        self.dist = torch.Tensor(self.dist)


    def __len__(self):
        return len(self.X1)

    def __getitem__(self, index):
        x1 = torch.load(self.X1[index])
        x2 = torch.load(self.X2[index])
        return F.pad(x1, (0, 0, 0, 49-x1.shape[0]), 'constant', 0)[None, :, :], F.pad(x2, (0, 0, 0, 49-x2.shape[0]), 'constant', 0)[None, :, :], self.dist[index]

embeddings_files = choices(glob('embeddings_base/*/*.pt'), k=800)
labels = [e.split('/')[-2] for e in embeddings_files]
tableDistanceDataset = TableDistanceDataset(embeddings_files, labels)
print(f"{N} == {len(tableDistanceDataset)}")

BATCH_SIZE = 32

100%|██████████| 100000/100000 [00:01<00:00, 50336.83it/s]

100000 == 100000





In [None]:
#| eval: false

import re


ixs = [ix for ix, f in enumerate(tableDistanceDataset.X1) if re.findall('beef_0_1655590825-SIP-A90CCE12F2CF-00003f00-chunk3', f)]
for i in ixs:
    if 'drink' in tableDistanceDataset.X2[i]:
        print(tableDistanceDataset.X1[i])
        print(tableDistanceDataset.X2[i])
        print(tableDistanceDataset.dist[i])
        print()

embeddings_base/beef/beef_0_1655590825-SIP-A90CCE12F2CF-00003f00-chunk3.pt
embeddings_base/drink/drink_0_1655668162-SIP-A90CCE12F2CF-00004093-chunk7.pt
tensor(1.)



In [None]:
#| eval: false

from collections import Counter

in_vocab_words = [f.split('/')[-2] for f in embeddings_files]
out_vocab_words = [f.split('/')[-2] for f in glob('embeddings_base/*/*.pt') if f.split('/')[-2] not in in_vocab_words]
print(f"in_vocab_words: {len(set(in_vocab_words))} , out_vocab_words: {len(set(out_vocab_words))}")
print(f"10 in vocab with most audios per word {Counter(in_vocab_words).most_common(10)}")
print(f"10 out vocab with most audios per word {Counter(out_vocab_words).most_common(10)}")

in_vocab_words: 84 , out_vocab_words: 69
10 in vocab with most audios per word [('chicken', 91), ('orange', 63), ('can', 63), ('rice', 56), ('mein', 45), ('chow', 37), ('plate', 37), ('beef', 25), ('two', 22), ('bowl', 20)]
10 out vocab with most audios per word [('bowls', 22), ('sprite', 22), ('brown', 17), ('noodles', 16), ('fry', 15), ('rangoons', 13), ('iced', 9), ('juice', 8), ('come', 7), ('vegetables', 6)]


In [None]:
#| eval: false

class Table2Representation(pl.LightningModule):
    def __init__(self):
        super().__init__()

        self.nhid = 32

        # build model
        self.__build_model()
    
    def __build_model(self):
        self.fc1 = nn.Conv2d(1, 20, 20, stride=2)
        self.do1 = nn.Dropout(0.2)
        self.out = nn.Conv2d(20, 1, 14, stride=2)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.do1(x)
        x = self.out(x)
        return x.squeeze()


In [None]:
#| eval: false

# Based upon https://github.com/PyTorchLightning/Siamese-Neural-Networks/blob/master/model.py
class TableDistanceModule(pl.LightningModule):
    def __init__(self, tableDistanceDataset):
        super().__init__()

        self.tableDistanceDataset = tableDistanceDataset
        self.datatrain, self.dataval, self.datatest = \
        torch.utils.data.random_split(self.tableDistanceDataset,
                                      [round(N*0.8),
                                       round(N*0.1),
                                       round(N*0.1)])

        self.table2Representation = Table2Representation()

        # build model
        self.__build_model()
    
    def __build_model(self):
        pass

    def forward(self, x1, x2):
        z1 = self.table2Representation.forward(x1)
        z2 = self.table2Representation.forward(x2)
        dis = torch.mean(torch.abs(z1 - z2), axis=1)
        return dis

    def loss(self, pred_dists, true_dists):
        loss_val = F.mse_loss(pred_dists, true_dists)
        return loss_val
    
    def _step(self, batch, batch_idx, name, training_step=False):
        X1, X2, dist = batch
        pred = self.forward(X1, X2)
        loss_val = self.loss(pred, dist)
        tqdm_dict = OrderedDict({name: loss_val})
        self.log_dict(tqdm_dict)
        if training_step:
            return OrderedDict({
                'loss': loss_val,
                'progress_bar': tqdm_dict,
                'log': tqdm_dict
            })
        else:
            return tqdm_dict
        
    def training_step(self, batch, batch_idx):
        return self._step(batch, batch_idx, name="train_loss", training_step=True)
    def validation_step(self, batch, batch_idx):
        return self._step(batch, batch_idx, name="val_loss", training_step=False)
    def test_step(self, batch, batch_idx):
        return self._step(batch, batch_idx, name="test_loss", training_step=False)

    def _epoch_end(self, outputs, name):
        # With DP training I think you have to average the things individually? Not sure
        # Look at the pytorch lightning siamese network code
        #if self.trainer.use_dp or self.trainer.use_ddp2:
        #    val_acc = torch.mean(val_acc)
        avg_loss = torch.stack([x[name] for x in outputs]).mean()
        tqdm_dict = {name: avg_loss}
        self.log_dict(tqdm_dict)
        result = OrderedDict({name: avg_loss, 'progress_bar': tqdm_dict, 'log': tqdm_dict})
        return result
        
    def validation_epoch_end(self, outputs):
        result = self._epoch_end(outputs, name="val_loss")
        self.log_dict(result)
        return result
    def test_epoch_end(self, outputs):
        result = self._epoch_end(outputs, name="test_loss")
        self.log_dict(result)
        return result
        
    # ---------------------
    # TRAINING SETUP
    # ---------------------
    def configure_optimizers(self):
        """
        return whatever optimizers we want here
        :return: list of optimizers
        """
        optimizer = optim.SGD(self.parameters(),
                             lr=0.01, momentum=0.90)
        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                         T_max=10)
        return [optimizer], [scheduler]

    def __dataloader(self, train, dataset):
        # when using multi-node (ddp) we need to add the  datasampler
        train_sampler = None
        batch_size = BATCH_SIZE

        should_shuffle = train and train_sampler is None
        loader = DataLoader(
            dataset=dataset,
            batch_size=batch_size,
            shuffle=should_shuffle,
            sampler=train_sampler,
            num_workers=0,
            drop_last=True
        )

        return loader

    def train_dataloader(self):
        logging.info('training data loader called')
        return self.__dataloader(train=True, dataset=self.datatrain)

    def val_dataloader(self):
        logging.info('val data loader called')
        return self.__dataloader(train=False, dataset=self.dataval)

    def test_dataloader(self):
        logging.info('val data loader called')
        return self.__dataloader(train=False, dataset=self.datatest)

In [None]:
#| eval: false

model_gpu = TableDistanceModule(tableDistanceDataset)
trainer_gpu = Trainer(max_epochs=15, gpus=-1)
for i, (x, y, d) in enumerate(model_gpu.datatrain):
    print(x.device)
    print(y.device)
    print(d.device)
    break
for p in model_gpu.parameters():
    print(p.device)
trainer_gpu.fit(model_gpu)



  rank_zero_deprecation(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: /workspaces/wav2keyword/data/panda/lightning_logs


cpu
cpu
cpu
cpu
cpu
cpu
cpu


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name                 | Type                 | Params
--------------------------------------------------------------
0 | table2Representation | Table2Representation | 11.9 K
--------------------------------------------------------------
11.9 K    Trainable params
0         Non-trainable params
11.9 K    Total params
0.048     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

  rank_zero_warn(


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=15` reached.


In [None]:
#| eval: false

%reload_ext tensorboard
%tensorboard --logdir=lightning_logs/

In [None]:
#| eval: false

trainer_gpu.test()

  rank_zero_warn(
Restoring states from the checkpoint path at /workspaces/wav2keyword/data/panda/lightning_logs/version_0/checkpoints/epoch=14-step=37500.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /workspaces/wav2keyword/data/panda/lightning_logs/version_0/checkpoints/epoch=14-step=37500.ckpt
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      log:test_loss         0.14371803402900696
 progress_bar:test_loss     0.14371803402900696
        test_loss           0.14371803402900696
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_loss': 0.14371803402900696,
  'progress_bar': {'test_loss': tensor(0.1437, device='cuda:0')},
  'log': {'test_loss': tensor(0.1437, device='cuda:0')}}]

In [None]:
#| eval: false

pos_ratio_test = []

for i, (x, y, d) in tqdm(enumerate(model_gpu.datatest), total=len(model_gpu.datatest)):
    pos_ratio_test.append(d.item())

sum(pos_ratio_test)


100%|██████████| 10000/10000 [00:24<00:00, 408.57it/s]


4999.0

In [None]:
#| eval: false

sum(pos_ratio_test)/len(pos_ratio_test)

0.4999

In [None]:
#| eval: false

model = TableDistanceModule.load_from_checkpoint('lightning_logs/version_0/checkpoints/epoch=14-step=37500.ckpt', tableDistanceDataset=tableDistanceDataset)
model_gpu = TableDistanceModule(tableDistanceDataset)

In [None]:
#| eval: false

import time 


# disable randomness, dropout, etc...
model.eval()

# predict with the model
pos_diff = []
neg_diff = []
pos_acc = []
neg_acc = []
for ix, (x, y, d) in enumerate(iter(model_gpu.test_dataloader())):
    non_trivial = [ix for ix, (a, b) in enumerate(zip(x, y)) if ~torch.equal(a, b)]
    x = x[non_trivial]
    y = y[non_trivial]
    d_hat = model(x, y)
    indices = torch.nonzero(d)
    nonzero_diff = torch.sum(torch.abs(d_hat[indices] - d[indices]), dim=0).item()
    zero_diff = torch.sum(torch.abs(d_hat[d == 0] - d[d == 0]), dim=0).item()
    pos_acc.append((sum([(0 if i.item() < 0.6 else 1) == t for i, t in zip(d_hat[indices], d[indices])]).item(),
                    len(d[indices])))
    neg_acc.append((sum([(0 if i.item() < 0.6 else 1) == t for i, t in zip(d_hat[d == 0], d[d == 0])]).item(),
                    len(d[d == 0])))
    pos_diff.append((nonzero_diff, len(d[indices])))
    neg_diff.append((zero_diff, len(d[d == 0])))
    if ix % 10 == 0:
        print(f"------{ix}----------")
        print(f"positive diff: {round(sum([s for s, _ in pos_diff]) / sum([n for _, n in pos_diff]), 2)}")
        print(f"positive acc: {round(sum([s for s, _ in pos_acc])/sum([l for _, l in pos_acc]), 2)}")
        print(f"Negative diff: {round(sum([s for s, _ in neg_diff]) / sum([n for _, n in neg_diff]), 2)}")
        print(f"negative acc: {round(sum([s for s, _ in neg_acc])/sum([l for _, l in neg_acc]), 2)}")
        print(f"Overall diff: {round(sum([s1 + s2 for (s1, _), (s2, _) in zip(pos_diff, neg_diff)]) / sum([n1+n2 for (_, n1), (_, n2) in zip(pos_diff, neg_diff)]), 2)}")


------0----------
positive diff: 0.31
positive acc: 0.78
Negative diff: 0.45
negative acc: 0.64
Overall diff: 0.37
------10----------
positive diff: 0.29
positive acc: 0.85
Negative diff: 0.34
negative acc: 0.75
Overall diff: 0.31
------20----------
positive diff: 0.28
positive acc: 0.85
Negative diff: 0.31
negative acc: 0.77
Overall diff: 0.3
------30----------
positive diff: 0.28
positive acc: 0.85
Negative diff: 0.32
negative acc: 0.77
Overall diff: 0.3
------40----------
positive diff: 0.28
positive acc: 0.83
Negative diff: 0.31
negative acc: 0.77
Overall diff: 0.3
------50----------
positive diff: 0.28
positive acc: 0.84
Negative diff: 0.31
negative acc: 0.76
Overall diff: 0.29
------60----------
positive diff: 0.28
positive acc: 0.85
Negative diff: 0.31
negative acc: 0.76
Overall diff: 0.29
------70----------
positive diff: 0.27
positive acc: 0.85
Negative diff: 0.31
negative acc: 0.76
Overall diff: 0.29
------80----------
positive diff: 0.28
positive acc: 0.85
Negative diff: 0.3

In [None]:
#| eval: false

test_indices = model_gpu.datatest.indices
x1 = [x for ix, x in enumerate(model_gpu.datatest.dataset.X1) if ix in test_indices]
x2 = [x for ix, x in enumerate(model_gpu.datatest.dataset.X2) if ix in test_indices]
dists = [x for ix, x in enumerate(model_gpu.datatest.dataset.dist) if ix in test_indices]

In [None]:
#| eval: false

from pathlib import Path
    
import shutil
pos_hits = 0
pos_errs = 0
pos_n = 0
neg_hits = 0
neg_errs = 0
neg_n = 0

for ix, i in tqdm(enumerate(range(32, len(x1), 32)), total=int(len(x1)/32)):
    x, y, label = x1[i-32:i], x2[i-32:i], dists[i-32:i]
    ex = [torch.load(e) for e in x]
    ey = [torch.load(e) for e in y]
    ex= [F.pad(e[None], (0, 0, 0, 49-e.shape[0]), 'constant', 0) for e in ex]
    ey= [F.pad(e[None], (0, 0, 0, 49-e.shape[0]), 'constant', 0) for e in ey]
    ex = torch.stack(ex)
    ey = torch.stack(ey)
    pred = model(ex, ey)
    for jx, (t, d, xf, yf) in enumerate(zip(label, pred, x, y)):
        d = 0 if d < 0.6 else 1
        neg_n += 1 if t == 0 else 0
        neg_hits += 1 if (t == 0) and (d == 0) else 0
        neg_errs += 1 if (t == 0) and (d == 1) else 0
        pos_n += 1 if t == 1 else 0
        pos_hits += 1 if (t == 1) and (d == 1) else 0
        pos_errs += 1 if (t == 1) and (d == 0) else 0

        if xf == yf:
            continue
        xn = Path(xf).name
        yn = Path(yf).name
        xtrans = xn.split('_')[0]
        ytrans = yn.split('_')[0]
        if (xtrans != ytrans) and (t == 0):
            print(xn, yn, xtrans, ytrans, t, d)
            break
        if (xtrans == ytrans) and (t == 1):
            print(xn, yn, xtrans, ytrans, t, d)
            break
        if d == 0 and t == 0:
            Path(f"results/emb_sim/correct_same/{xtrans}").mkdir(parents=True, exist_ok=True)
            shutil.copy(f"{Path.home()}/.cache/panda/audio_slices/{yn.replace('.pt', '.wav')}", f"results/emb_sim/correct_same/{xtrans}/{ix}_{jx}_{yn.replace('.pt', '.wav')}")
        if d == 1 and t == 0:
            Path(f"results/emb_sim/incorrect_dif/{xtrans}").mkdir(parents=True, exist_ok=True)
            shutil.copy(f"{Path.home()}/.cache/panda/audio_slices/{yn.replace('.pt', '.wav')}", f"results/emb_sim/incorrect_dif/{xtrans}/{ix}_{jx}_{yn.replace('.pt', '.wav')}")
        if d == 0 and t == 1:
            Path(f"results/emb_sim/incorrect_same/{xtrans}").mkdir(parents=True, exist_ok=True)
            shutil.copy(f"{Path.home()}/.cache/panda/audio_slices/{yn.replace('.pt', '.wav')}", f"results/emb_sim/incorrect_same/{xtrans}/{ix}_{jx}_{yn.replace('.pt', '.wav')}")
        if d == 1 and t == 1:
            Path(f"results/emb_sim/correct_dif/{xtrans}").mkdir(parents=True, exist_ok=True)
            shutil.copy(f"{Path.home()}/.cache/panda/audio_slices/{yn.replace('.pt', '.wav')}", f"results/emb_sim/correct_dif/{xtrans}/{ix}_{jx}_{yn.replace('.pt', '.wav')}")

100%|██████████| 312/312 [03:30<00:00,  1.48it/s]


In [None]:
#| eval: false

print(pos_n)
print(neg_n)

5005
4979


In [None]:
#| eval: false

print(f"{round(pos_hits/pos_n, 2)*100}")
print(f"{round(neg_hits/neg_n, 2)*100}")

86.0
76.0


In [None]:
#| eval: false

for ix, i in tqdm(enumerate(range(32, len(x1), 32))):
    x, y, label = x1[i-32:i], x2[i-32:i], dists[i-32:i]
    transx = [Path(j).name.split('_')[0] for j in x]
    transy = [Path(j).name.split('_')[0] for j in y]
    for jx, jy, l in zip(transx, transy, label):
        if (jx != jy) and l == 0:
            print(jx, jy, l)
        if (jx == jy) and l == 1:
            print(jx, jy, l)

print(jx, jy, l)

312it [00:00, 2232.65it/s]

green green tensor(0.)



