In [1]:
import math

import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly import subplots

np.random.seed(1337)

In [2]:
n = 1000
k = 10
d_plot = pd.DataFrame(
    {
        "name": [f"id_{i:03d}" for i in range(n)],
        "x": np.random.randn(n),
        "category": np.random.choice(["foo", "bar", "baz"], n),
    }
)
for i in range(k):
    d_plot[f"column_name_y_{i:02d}"] = i + (i + 1) * np.random.randn(n)
for i in range(k):
    d_plot[f"column_name_z_{i:02d}"] = np.random.choice(
        [f"foo_{j:02d}" for j in range(i + 1)], n
    )

In [3]:
# Distribution plot for one variable

d_plot = d_plot
val_name = "x"
bucket_size = None

fig = go.Figure()
fig.add_trace(
    go.Histogram(
        x=None if d_plot[val_name].dtype == "object" else d_plot[val_name],
        y=d_plot[val_name] if d_plot[val_name].dtype == "object" else None,
        name="",
        nbinsx=None,
        xbins=dict(size=bucket_size, start=None, end=None),
        marker=dict(color="cornflowerblue"),
    )
)
if d_plot[val_name].dtype == "object":
    fig.update_traces(hovertemplate="value: %{y}<br>count: %{x}")
else:
    fig.update_traces(hovertemplate="value: %{x}<br>count: %{y}")
fig.update_layout(
    template="plotly_white",
    width=1000,
    height=500,
    title=f"{val_name.capitalize()} distribution",
    legend_title="Legend",
    showlegend=False,
)
if d_plot[val_name].dtype == "object":
    fig.update_xaxes(title="Count", range=None, dtick=None)
    fig.update_yaxes(title=val_name.capitalize(), range=None, dtick=bucket_size)
else:
    fig.update_xaxes(title=val_name.capitalize(), range=None, dtick=bucket_size)
    fig.update_yaxes(title="Count", range=None, dtick=None)
fig.show()

In [4]:
# Distribution plot of a set of variables

d_plot = d_plot
col_names = [
    "column_name_y_00",
    "column_name_z_04",
    "column_name_y_02",
    "column_name_y_03",
    "column_name_y_04",
    "column_name_z_08",
    "column_name_y_06",
    "column_name_y_07",
]
n_plots = len(col_names)
n_plot_cols = 3
n_plot_rows = math.ceil(n_plots / n_plot_cols)

fig = subplots.make_subplots(
    rows=n_plot_rows,
    cols=n_plot_cols,
    subplot_titles=[c.capitalize() for c in col_names],
    horizontal_spacing=0.1,
)
for i, val_name in enumerate(col_names):
    fig.add_trace(
        go.Histogram(
            x=None if d_plot[val_name].dtype == "object" else d_plot[val_name],
            y=d_plot[val_name] if d_plot[val_name].dtype == "object" else None,
            name="",
            nbinsx=None,
            xbins=dict(size=None, start=None, end=None),
            marker=dict(color="cornflowerblue"),
        ),
        row=1 + i // n_plot_cols,
        col=1 + i % n_plot_cols,
    )
    if d_plot[val_name].dtype == "object":
        fig.update_xaxes(
            title_text="Count", row=1 + i // n_plot_cols, col=1 + i % n_plot_cols
        )
    else:
        fig.update_yaxes(
            title_text="Count", row=1 + i // n_plot_cols, col=1 + i % n_plot_cols
        )
fig.update_traces(hovertemplate="%{x}<br>%{y}")
fig.update_layout(
    template="plotly_white",
    width=1200,
    height=80 + 250 * n_plot_rows,
    title="Distributions",
    showlegend=False,
)

In [5]:
# Grouped distribution plot with multiple categories

d_plot = d_plot
val_name = "x"
cat_name = "category"
cats = ["foo", "bar", "baz"]
colors = ["cornflowerblue", "coral", "mediumseagreen"]
bucket_size = None

fig = go.Figure()
for i, category in enumerate(cats):
    fig.add_trace(
        go.Histogram(
            x=(
                None
                if d_plot[val_name].dtype == "object"
                else d_plot[d_plot[cat_name] == category][val_name]
            ),
            y=(
                d_plot[d_plot[cat_name] == category][val_name]
                if d_plot[val_name].dtype == "object"
                else None
            ),
            name=category,
            showlegend=True,
            nbinsx=None,
            xbins=dict(size=bucket_size, start=None, end=None),
            marker=dict(color=colors[i]),
        )
    )
if d_plot[val_name].dtype == "object":
    fig.update_traces(hovertemplate="value: %{y}<br>count: %{x}")
else:
    fig.update_traces(hovertemplate="value: %{x}<br>count: %{y}")
fig.update_layout(
    template="plotly_white",
    width=1000,
    height=500,
    title=f"{val_name.capitalize()} distribution by {cat_name}",
    legend_title=cat_name.capitalize(),
    showlegend=True,
)
if d_plot[val_name].dtype == "object":
    fig.update_xaxes(title="Count", range=None, dtick=None)
    fig.update_yaxes(title=val_name.capitalize(), range=None, dtick=bucket_size)
else:
    fig.update_xaxes(title=val_name.capitalize(), range=None, dtick=bucket_size)
    fig.update_yaxes(title="Count", range=None, dtick=None)
fig.show()