# Heuristics Visualization
This notebook is a demonstration of how the elbow finding heuristics will work on different datasets.
It is intended for experimentation and to determine if any adjustments should be made to handle cases such as an S-curve along the expected linear descent line.

In [None]:
import numpy as np
import sklearn.metrics
from top2vec.elbow_finding import find_elbow_index, get_distances_from_line

%matplotlib inline
import matplotlib.pyplot as plt


def plot_heuristic(values, figure_num="1", derivatives="distance", figsize=(8, 3)):
    sorted_vals = -np.sort(-np.array(values))
    x = np.arange(sorted_vals.size)[:, None]

    m = (sorted_vals[-1] - sorted_vals[0]) / (sorted_vals.size - 1)
    line = x * m + sorted_vals[0]
    # Uniform is an absolute value and therefore useless for detecting an inflection
    y_distances = get_distances_from_line(
        sorted_vals, m, sorted_vals[0], metric="raw-y"
    )
    elbow = find_elbow_index(sorted_vals)

    if not derivatives:
        distances = []
        with_derivatives = False
    elif derivatives == "distance":
        distances = get_distances_from_line(sorted_vals, m, sorted_vals[0])
        with_derivatives = True
    elif derivatives == "values":
        distances = sorted_vals
        with_derivatives = True
    else:
        raise ValueError("Unknown derivatives requested.")

    plot_figure(
        sorted_vals,
        distances,
        elbow,
        line,
        figure_num=figure_num,
        with_derivatives=with_derivatives,
        y_distances=y_distances,
        figsize=figsize,
    )


def plot_figure(
    sorted_vals,
    distances,
    elbow,
    line,
    figure_num="1",
    with_derivatives=True,
    y_distances=None,
    figsize=(8, 3),
):

    distances_prime = [
        0,
    ]
    for x, distance in enumerate(distances):
        if x == 0:
            continue
        else:
            distances_prime.append(distance - distances[x - 1])
    distances_prime_prime = [0, 0]
    for x, distance_prime in enumerate(distances_prime):
        if x == 0:
            continue
        else:
            distances_prime_prime.append(distance_prime - distances_prime[x - 1])

    if not with_derivatives:
        fig = plt.figure(num=figure_num, clear=True, figsize=figsize)
        gs = fig.add_gridspec(nrows=3, ncols=1)
        if y_distances is not None:
            ax = fig.add_subplot(gs[:2, 0])
            ax_y = fig.add_subplot(gs[2, 0], sharex=ax)
            ax_y.axhline(0, color="black")
            ax_y.plot(y_distances)
        else:
            ax = fig.add_subplot(gs[:, 0])

    else:
        fig = plt.figure(num=figure_num, clear=True, figsize=figsize)
        if y_distances is not None:
            gs = fig.add_gridspec(nrows=3, ncols=2)
            ax = fig.add_subplot(gs[:2, 0])
            ax_y = fig.add_subplot(gs[2, 0])
            ax_y.axhline(0, color="black")
            ax_y.plot(y_distances)
        else:
            ax = fig.add_subplot(gs[:, 0])
        ax_d = fig.add_subplot(gs[0, 1])
        ax_d.plot(distances)
        ax_d.axhline(0, color="black")
        ax_d.xaxis.set_ticklabels([])
        ax_d_prime = fig.add_subplot(gs[1, 1], sharex=ax_d)
        ax_d_prime.plot(distances_prime)
        ax_d_prime.axhline(0, color="black")
        ax_d_prime_prime = fig.add_subplot(gs[2, 1])
        ax_d_prime_prime.plot(distances_prime_prime)
        ax_d_prime_prime.axhline(0, color="black")

    # Now the stuff for everyone
    ax.plot(line)
    ax.scatter([elbow], [sorted_vals[elbow]])
    ax.plot(sorted_vals)

In [None]:
vector = np.array([[2, 1]])
test_embedding = np.array(
    [
        [0, 1],
        [2, 1],
        [1, 0.5],
        [1, 0],
        [4, 2],
        [8, 4],
        [1, -1],
        # Orthogonal
        [-1, 2],
    ]
)
test_embedding_multiple_elbows = np.vstack(
    [
        test_embedding,
        np.array(
            [
                # Opposite (ish)
                [-2, -0.8],
                [-2, -0.9],
                [-2, -0.95],
                [-2, -1],
                [-2, -1.05],
                [-2, -1.1],
                [-2, -1.2],
                [-2, -1.5],
            ]
        ),
    ]
)

sims = 1 - sklearn.metrics.pairwise_distances(vector, test_embedding, metric="cosine")
sims_multiple_elbows = 1 - sklearn.metrics.pairwise_distances(
    vector, test_embedding_multiple_elbows, metric="cosine"
)

## Show heuristic performance on simple embedding - one elbow

In [None]:
# The current elbow-finding heuristic also showing changes in distance from the line
plot_heuristic(sims[0], derivatives="distance", figsize=(18, 8))

In [None]:
# What if we wanted to use a derivative of the raw values as a cutoff?
plot_heuristic(sims[0], figure_num=2, derivatives="values", figsize=(18, 8))

In [None]:
# Just the elbow finding
plot_heuristic(sims[0], figure_num=3, derivatives=None, figsize=(18, 5))

## Showing heuristic performance with multiple elbows in graph

In [None]:
# The current elbow-finding heuristic also showing changes in distance from the line
plot_heuristic(sims_multiple_elbows[0], derivatives="distance", figsize=(18, 8))

In [None]:
# The current elbow-finding heuristic also showing changes in distance from the line
plot_heuristic(sims_multiple_elbows[0], derivatives="values", figsize=(18, 8))