In [None]:
from main import *
from tqdm import tqdm
from typing import Any, Callable, Dict, List, Tuple
import argparse
import collections
import kneed
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import os
import scipy.interpolate
import scipy.stats
import seaborn as sns
import sys
import tempfile
import zipfile

sns.set(style="ticks", palette="colorblind")

In [None]:
all_templates = list()
for i in range(1, 11):
    all_templates.append(np.load("{}_templates.npy".format(i)))

all_expanded_templates = list()
for i, tpls in enumerate(all_templates):
    all_expanded_templates.append(
        [
            scipy.interpolate.interp1d(
                np.linspace(0, 1, num=tpls.shape[1]),
                tpls[i, :],
                kind="cubic",
            )
            for i in range(tpls.shape[0])
        ]
    )

fig, axarr = plt.subplots(
    len(all_expanded_templates), 1, figsize=(3, 2 * len(all_expanded_templates))
)
x = np.linspace(0, 1, 100)
for i, tpls in enumerate(all_expanded_templates):
    for tpl in tpls:
        axarr[i].plot(x, [tpl(v) for v in x])
    axarr[i].set_xlim(0, 1)
    axarr[i].set_ylim(-1.5, 1.5)

In [None]:
def fit_values(values, template):
    rv = [-1 for _ in range(len(values))]
    positions, values = zip(*values)
    distances = np.zeros((len(values), len(values)), dtype=float)
    for i in range(len(values)):
        for j in range(len(values)):
            distances[i, j] = abs(values[i] - template(positions[j]))

    # binary search to find smallest deviation matching
    candidates = np.sort(distances.flatten())
    min_idx = 0
    max_idx = len(candidates) - 1
    while min_idx != max_idx:
        pivot = min_idx + (max_idx - min_idx) // 2
        edges = [
            (str(positions[i]), j)
            for i in range(len(values))
            for j in range(len(values))
            if distances[i, j] <= candidates[pivot]
        ]
        graph = nx.Graph(edges)
        try:
            matching = nx.bipartite.maximum_matching(graph)
            if all([position in matching for position in positions]):
                max_idx = pivot
            else:
                min_idx = pivot + 1
        except nx.AmbiguousSolution:
            min_idx = pivot + 1

    # clean up smallest deviation matching
    edges = [
        (str(positions[i]), j, {"weight": distances[i, j]})
        for i in range(len(values))
        for j in range(len(values))
        if distances[i, j] <= candidates[max_idx]
    ]
    graph = nx.Graph(edges)
    matching = nx.bipartite.minimum_weight_full_matching(graph)
    return list(np.argsort([float(matching[i]) for i in range(len(positions))]))

In [None]:
(training, validation, test), stats = load_data("fma_albums_with_echonest.json")

best_mean_tpl_scores = list()
for i, tpls in enumerate(all_expanded_templates):
    best_tpl_scores = list()
    for album in validation:
        best_tpl_scores.append(
            np.max(
                [
                    scipy.stats.spearmanr(
                        range(len(album)), fit_values(album, tpl)
                    ).correlation
                    for j, tpl in enumerate(tpls)
                ]
            )
        )
    best_mean_tpl_scores.append(np.mean(best_tpl_scores))

kneedle = kneed.KneeLocator(
    np.arange(len(best_mean_tpl_scores[1:])),
    np.array(best_mean_tpl_scores[1:]),
    curve="concave",
    direction="increasing",
)
print(all_templates[kneedle.knee])

In [None]:
tpls = all_expanded_templates[kneedle.knee]
best_tpl_scores = list()
best_random_scores = list()
for album in test:
    best_tpl_scores.append(
        np.max(
            [
                scipy.stats.spearmanr(
                    range(len(album)), fit_values(album, tpl)
                ).correlation
                for j, tpl in enumerate(tpls)
            ]
        )
    )
    best_random_scores.append(
        np.max(
            [
                scipy.stats.spearmanr(
                    range(len(album)),
                    sorted(range(len(album)), key=lambda _: np.random.rand()),
                ).correlation
                for j, tpl in enumerate(tpls)
            ]
        )
    )

print(
    "Templates: {0:.4f}+-{1:.4f}".format(
        np.mean(best_tpl_scores),
        np.std(best_tpl_scores) / np.sqrt(len(best_tpl_scores)),
    )
)
print(
    "Random: {0:.4f}+-{1:.4f}".format(
        np.mean(best_random_scores),
        np.std(best_random_scores) / np.sqrt(len(best_random_scores)),
    )
)

In [None]:
t, p = scipy.stats.ttest_ind(best_tpl_scores, best_random_scores)
p /= 2
if (t > 0) and (p < 0.01):
    print("Templates > Random (p < 0.01)")