In [43]:
from pathlib import Path
import numpy as np

feat_dir = Path("my-features")
model = "wavlm_base"
layer = 12

feature_dir = feat_dir / model.upper() / str(layer)

paths = sorted(feature_dir.rglob("**/*.npy"), key=lambda x: int(x.stem.split("_")[-1]))
sample_size = len(paths)

print(f"Loading {sample_size} Features..", flush=True)
features = [np.load(path) for path in paths]

Loading 65840 Features..


In [44]:
import pandas as pd

align_df = pd.read_csv("librispeech/alignments/alignments.csv")

In [None]:
def clean_feature(feature):
    sil = [
        18,
        88,
        50,
        71,
        96,
        95,
        94,
        75,
        44,
        61,
        42,
        66,
        92,
        50,
        52,
        72,
        65,
        34,
        9,
        19,
        43,
        0,
        45,
        39,
        90,
        2,
        63,
        65,
        97,
        90,
        11,
        78,
        8,
        78,
        93,
        49,
    ]
    while len(feature) > 0 and feature[0] in sil:
        feature = feature[1:]
    while len(feature) > 0 and feature[-1] in sil:
        feature = feature[:-1]

    return feature

In [63]:
import numpy as np
import itertools
import editdistance


def normalized_edit_distance(sequences):
    pairs = list(itertools.combinations(sequences, 2))
    if not pairs:
        return 0.0

    dists = []
    for a, b in pairs:
        dist = editdistance.eval(a, b)
        norm = dist / max(len(a), len(b))
        dists.append(norm)

    return np.mean(dists)


In [None]:
import numpy as np


def lcs_length(a, b):
    # Classic dynamic programming for LCS
    dp = [[0] * (len(b) + 1) for _ in range(len(a) + 1)]
    for i in range(len(a)):
        for j in range(len(b)):
            if a[i] == b[j]:
                dp[i + 1][j + 1] = dp[i][j] + 1
            else:
                dp[i + 1][j + 1] = max(dp[i][j + 1], dp[i + 1][j])
    return dp[-1][-1]


def normalized_lcs_distance(sequences):
    pairs = list(itertools.combinations(sequences, 2))
    if not pairs:
        return 0.0

    dists = []
    for a, b in pairs:
        lcs = lcs_length(a, b)
        norm_dist = 1 - (lcs / max(len(a), len(b)))
        dists.append(norm_dist)

    return np.mean(dists)

In [101]:
count = 0
codes = []
for i, path in enumerate(paths):
    align = align_df[align_df["filename"] == path.stem.split("_")[0]]
    align = align[align["word_id"] == int(path.stem.split("_")[-1])]
    align = align.iloc[0]
    if str(align["text"]) not in ["wine"]:
        continue
    count += 1
    print(f"{align['text']:<15} | {clean_feature(features[i])}", flush=True)
    codes.append(clean_feature(features[i]))
    if count == 20:
        break


wine            | [54  7 25 77 37  3 23 39 65]
wine            | [54  7 25 77 37 68 23 90]
wine            | [78 54  7 25 77 37 45 23]
wine            | [54  7 25 77 37 68 67 65]
wine            | [93 54  7 35 25 77 37  3 23 61]
wine            | [54  7 25 77 37  3 67]
wine            | [43 49 54  7 25 77 37 67 65]
wine            | [54  7 35 25 77 37  3 45 23 67 65]
wine            | [43 54  7 25 77 37  3 67  2]
wine            | [43 54  7 35 25 77 37  3 67]
wine            | [54  7 25 77 37  3 45 23 90]
wine            | [11 54  7 35 25 77 37  3  8 23 63]
wine            | [54  7 35 25 77 37  3 67 65 97]
wine            | [54  7 77 37  3 23 90]


In [None]:
ned = normalized_edit_distance(codes)
print(f"NED for 'what': {ned * 100:.3f}%")

ned_lcs = normalized_lcs_distance(codes)
print(f"Normalized LCS Distance: {ned_lcs * 100:.3f}%")

NED for 'what': 37.244%
Normalized LCS Distance: 32.539%
