In [1]:
from segmentation_dataset import SegmentationDataset
from model import Model
from baseline import Baseline
from metrics import pk, windowdiff
import io
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.autograd import Variable
from tqdm import tqdm
import sys
import mmap
import numpy as np
from run import load_vectors, validate

In [2]:
word2vecModel = load_vectors('wiki-news-300d-1M-subword.vec')

Loading word2vec embeddings...


Progress: 100%|██████████| 999995/999995 [02:45<00:00, 6051.09it/s]


In [3]:
model_save_path = 'saved_model'
model = torch.load(model_save_path)

In [4]:
dev_path = 'wiki_50'
dev_dataset = SegmentationDataset(dev_path, word2vecModel)

Progress: 100%|██████████| 50/50 [00:00<00:00, 4007.40it/s]
Progress: 100%|██████████| 50/50 [00:00<00:00, 7130.50it/s]
Progress:   0%|          | 0/50 [00:00<?, ?it/s]


Reading raw data...

Converting documents to embeddings...
Preprocessing data...


Progress: 100%|██████████| 50/50 [00:13<00:00,  3.61it/s]


In [None]:
model.eval()
total_pk = 0.0
total_windowdiff = 0.0
with tqdm(desc='Validating', total=5) as pbar:
    for i, data in enumerate(dev_dataset):
        if i not in indices:
            # print("not doing this one ")
            continue
        else:
            pbar.update()
            target = torch.flatten(data['target'], start_dim=0, end_dim=1)
            target = target.long()
            output = model(torch.flatten(data['sentences'], start_dim=0, end_dim=1))
            output_softmax = F.softmax(output, 1)
            output_argmax = torch.argmax(output_softmax, dim=1)
            total_pk += pk(target.detach().numpy(), output_argmax.detach().numpy())
            total_windowdiff += windowdiff(target.detach().numpy(), output_softmax.detach().numpy())

In [None]:
total_pk, total_windowdiff = validate(model, val_dataset)

In [50]:
import importlib
import metrics
import baseline
importlib.reload(metrics)
importlib.reload(baseline)

<module 'baseline' from '/Users/genetanaka/podcast-segmentation/baseline.py'>

In [51]:
baseline_threshold = 0.0
baseline = Baseline(dev_dataset, baseline_threshold)
base_pk, base_windowdiff = baseline.evaluate()
print("Baseline Pk: {}, Baseline Window Diff: {}".format(base_pk, base_windowdiff))

Validating Baseline:   0%|          | 0/50 [00:00<?, ?it/s]




Validating Baseline: 100%|██████████| 50/50 [00:12<00:00,  3.89it/s]

Baseline Pk: 0.1459458282339274, Baseline Window Diff: 0.1459458282339274





In [52]:
baseline_threshold = 1.0
baseline = Baseline(dev_dataset, baseline_threshold)
base_pk, base_windowdiff = baseline.evaluate()
print("Baseline Pk: {}, Baseline Window Diff: {}".format(base_pk, base_windowdiff))

Validating Baseline:   0%|          | 0/50 [00:00<?, ?it/s]




Validating Baseline: 100%|██████████| 50/50 [00:11<00:00,  4.38it/s]

Baseline Pk: 0.1459458282339274, Baseline Window Diff: 0.1459458282339274





In [53]:
baseline_threshold = 2.0
baseline = Baseline(dev_dataset, baseline_threshold)
base_pk, base_windowdiff = baseline.evaluate()
print("Baseline Pk: {}, Baseline Window Diff: {}".format(base_pk, base_windowdiff))

Validating Baseline:   0%|          | 0/50 [00:00<?, ?it/s]




Validating Baseline: 100%|██████████| 50/50 [00:12<00:00,  4.13it/s]

Baseline Pk: 0.14535529299862945, Baseline Window Diff: 0.14535529299862945





In [54]:
baseline_threshold = 3.0
baseline = Baseline(dev_dataset, baseline_threshold)
base_pk, base_windowdiff = baseline.evaluate()
print("Baseline Pk: {}, Baseline Window Diff: {}".format(base_pk, base_windowdiff))

Validating Baseline:   0%|          | 0/50 [00:00<?, ?it/s]




Validating Baseline: 100%|██████████| 50/50 [00:11<00:00,  4.35it/s]

Baseline Pk: 0.14284588202466034, Baseline Window Diff: 0.14284588202466034





In [55]:
baseline_threshold = 4.0
baseline = Baseline(dev_dataset, baseline_threshold)
base_pk, base_windowdiff = baseline.evaluate()
print("Baseline Pk: {}, Baseline Window Diff: {}".format(base_pk, base_windowdiff))

Validating Baseline:   0%|          | 0/50 [00:00<?, ?it/s]




Validating Baseline: 100%|██████████| 50/50 [00:12<00:00,  4.06it/s]

Baseline Pk: 0.13030347263437625, Baseline Window Diff: 0.13030347263437625





In [56]:
baseline_threshold = 5.0
baseline = Baseline(dev_dataset, baseline_threshold)
base_pk, base_windowdiff = baseline.evaluate()
print("Baseline Pk: {}, Baseline Window Diff: {}".format(base_pk, base_windowdiff))

Validating Baseline:   0%|          | 0/50 [00:00<?, ?it/s]




Validating Baseline: 100%|██████████| 50/50 [00:11<00:00,  4.41it/s]

Baseline Pk: 0.11583660303029159, Baseline Window Diff: 0.11583660303029159





In [60]:
s1 = np.array([int(ch) for ch in "000000000100000000000000"])
s2 = np.array([int(ch) for ch in "000000100100000000000000"])

In [61]:
pk(s1, s2)

0.0

In [62]:
windowdiff(s1, s2)

0.5384615384615384

In [2]:
def pk(ref: np.array, hyp: np.array, k: int = None, boundary: int = 1):
    """
    Compute the Pk metric for a pair of segmentations A segmentation
    is any sequence over a vocabulary of two items (e.g. "0", "1"),
    where the specified boundary value is used to mark the edge of a
    segmentation.

    >>> '%.2f' % pk('0100'*100, '1'*400, 2)
    '0.50'
    >>> '%.2f' % pk('0100'*100, '0'*400, 2)
    '0.50'
    >>> '%.2f' % pk('0100'*100, '0100'*100, 2)
    '0.00'
    """

    if k is None:
        k = int(round(ref.shape[0] / (np.count_nonzero(ref == boundary) * 2.0)))

    err = 0.0
    for i in range(len(ref) - k + 1):
        r = np.count_nonzero(ref[i : i + k] == boundary) > 0
        h = np.count_nonzero(hyp[i : i + k] == boundary) > 0
        if r != h:
            err += 1
    return err / (ref.shape[0] - k + 1.0)

In [3]:
def windowdiff(ref: np.array, hyp: np.array, k: int = None, boundary: int = 1, weighted: bool = False):
    """
    Compute the windowdiff score for a pair of segmentations.  A
    segmentation is any sequence over a vocabulary of two items
    (e.g. "0", "1"), where the specified boundary value is used to
    mark the edge of a segmentation.

        >>> s1 = "000100000010"
        >>> s2 = "000010000100"
        >>> s3 = "100000010000"
        >>> '%.2f' % windowdiff(s1, s1, 3)
        '0.00'
        >>> '%.2f' % windowdiff(s1, s2, 3)
        '0.30'
        >>> '%.2f' % windowdiff(s2, s3, 3)
        '0.80'
    """
    if k is None:
        k = int(round(ref.shape[0] / (np.count_nonzero(ref == boundary) * 2.0)))b

    if ref.shape[0] != hyp.shape[0]:
        raise ValueError("Segmentations have unequal length")
    if k > ref.shape[0]:
        raise ValueError(
            "Window width k should be smaller or equal than segmentation lengths"
        )
    wd = 0.0
    for i in range(ref.shape[0] - k + 1):
        ndiff = abs(np.count_nonzero(ref[i : i + k] == boundary) - np.count_nonzero(hyp[i : i + k] == boundary))
        if weighted:
            wd += ndiff
        else:
            wd += min(1, ndiff)
    return wd / (ref.shape[0] - k + 1.0)

In [87]:
s1 = np.array([int(ch) for ch in "000000000100"])
s2 = np.array([int(ch) for ch in "000000100100"])

In [88]:
pk(s1, s2)

0.42857142857142855

In [89]:
windowdiff(s1, s2)

0.8571428571428571