In [1]:
# %pip install bqplot

In [2]:
import numpy as np
from scipy.stats import lognorm
import matplotlib.pyplot as plt
import bqplot.pyplot as bqplt
import ipywidgets as w

%matplotlib inline
%config InlineBackend.figure_format='retina'

In [3]:
samples = lognorm.rvs(5, size=1000)

def draw_matplotlib_fig(samples, percentiles=(100, 99, 95, 75, 50), bins=30):
    fig, ax = plt.subplots(ncols=3, nrows=2, figsize=(13, 8))
    ax = ax.reshape(-1)

    for i, percent in enumerate(percentiles): 
        ax[i].hist(samples[samples < np.percentile(samples, percent)], bins=bins)
        ax[i].set_title(f"truncate at p{percent}")
        for pct in percentiles[i+1:]:
            ax[i].axvline(np.percentile(samples, pct), linestyle='--')
    
    ax[i+1].hist(np.log10(samples), bins=bins)
    ax[i+1].set_title("log10 transform")
    for pct in percentiles:
        ax[i+1].axvline(np.log10(np.percentile(samples, pct)), linestyle='--')

    return fig, ax

In [4]:
plt.ioff()
out_static = w.Output()
with out_static:
    fig, ax = draw_matplotlib_fig(samples)
    display(fig)

In [5]:
readme = w.HTML("""
This app demonstrates the pitfalls of visualizing a distribution (with a histogram) that has a long tail. 
<br>
Indeed, some heavy-tailed distributions make it hard to see the main body of the distribution when the tail is included. To fix it, one can truncate the tail or transform the data. 
<br>
This app allows you to visualize the effect of truncation and log10-transformation on a heavy-tailed distribution.
<br>
Data is generated by sampling a <a href="https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.lognorm.html">log-normal distribution with shape parameter 5</a>.
<br>
The number of samples is 1000.
""")

In [6]:
fig = bqplt.figure()

bins, edges = np.histogram(samples, bins=30)
bar = bqplt.bar(edges, bins)

def on_change(_):
    pct = np.percentile(samples, truncate_p.value)
    masked = samples[samples < pct]
    masked = np.log10(masked) if scale_transform.value == 'log' else masked
    bins, edges = np.histogram(masked, bins=30)
    bar.x = edges
    bar.y = bins

truncate_p = w.FloatSlider(min=0, max=100, value=100, description="Truncate pct")
truncate_p.observe(on_change, "value")

scale_transform = w.ToggleButtons(options=['linear', 'log'])
scale_transform.observe(on_change, "value")

tabs = w.Tab([
    w.VBox([truncate_p, scale_transform, fig]),
    out_static,
    readme]
)

tabs.set_title(0, "Interactive")
tabs.set_title(1, "Static")
tabs.set_title(2, "Readme")

tabs

Tab(children=(VBox(children=(FloatSlider(value=100.0, description='Truncate pct'), ToggleButtons(options=('lin…