# KDE histogram comparison


This notebook compares the KDE estimate to a regular histogram on 1D and 2D toy datasets.

In [None]:
import time

import matplotlib.pyplot as plt
import numpy as np
import proplot as pplt
import torch

import mentflow as mf

In [None]:
pplt.rc["cmap.discrete"] = False
pplt.rc["cmap.sequential"] = "viridis"
pplt.rc["figure.facecolor"] = "white"
pplt.rc["grid"] = False

## Settings

In [None]:
data_name = "swissroll"
xmax = 3.0
n_bins = 100
sizes = [1.00e+03, 1.00e+04, 1.00e+05, 1.00e+06]

## 1D

In [None]:
bin_edges = torch.linspace(-xmax, xmax, n_bins + 1)
bin_coords = 0.5 * (bin_edges[:-1] + bin_edges[1:])
diagnostic = mf.diagnostics.Histogram1D(axis=0, bin_edges=bin_edges)

fig, axs = pplt.subplots(ncols=len(sizes), figsize=(6.0, 1.25))
for ax, size in zip(axs, sizes):
    X = torch.tensor(mf.data.toy.gen_data(data_name, size=int(size)))
    X = X.type(torch.float32)
    for i in range(2):
        hist = diagnostic(X, kde=i)
        ax.plot(bin_coords.numpy(), hist.numpy(), label=["hist", "kde"][i], color=["blue8", "red8"][i])

    ax.format(title=f"n = {size:0.2e}")
axs[-1].legend(loc="r", ncols=1, framealpha=0.0, handlelength=1.5)
plt.show()

## 2D

In [None]:
bin_edges = 2 * [torch.linspace(-xmax, xmax, n_bins + 1)]
diagnostic = mf.diagnostics.Histogram2D(axis=(0, 1), bin_edges=bin_edges)

sizes = [1.00e+03, 1.00e+04, 1.00e+05, 1.00e+06]

fig, axs = pplt.subplots(ncols=len(sizes), figwidth=6.0, nrows=2)
for j, size in enumerate(sizes):
    X = torch.tensor(mf.data.toy.gen_data(data_name, size=int(size)))
    X = X.type(torch.float32)
    for i in range(2):
        hist = diagnostic(X, kde=i)
        axs[i, j].pcolormesh(
            bin_edges[0].numpy(),
            bin_edges[1].numpy(),
            hist.numpy().T,
        )
    axs[0, j].format(title=f"n = {size:0.2e}")
axs.format(leftlabels=["hist", "kde"])
plt.show()