In [1]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go

np.random.seed(1337)

In [2]:
n = 1000
k = 20
d_plot = pd.DataFrame(
    {
        "name": [f"id_{i:03d}" for i in range(n)],
        "x": 5 + np.random.randn(n),
        "category": np.random.choice([f"category_{i:02d}" for i in range(k)], n),
        "subcategory": np.random.choice(["hello", "world"], n),
    }
)

In [3]:
# Basic vertical bar chart

d_plot = d_plot
val_name = "x"
cat_name = "category"
cat_values = (
    d_plot.groupby([cat_name])[val_name]
    .sum()
    .reset_index()
    .sort_values(val_name, ascending=False)[cat_name]
    .head(20)
)
total_sum = d_plot[val_name].sum()
e = (
    d_plot[d_plot[cat_name].isin(cat_values)]
    .groupby(cat_name)[[val_name]]
    .sum()
    .reset_index()
    .sort_values(val_name, ascending=False)
)
if len(e) > 30:
    raise ValueError("Too many values to plot")

fig = go.Figure()
for i, category in enumerate(e[cat_name]):
    fig.add_trace(
        go.Bar(
            x=e[e[cat_name] == category][cat_name],
            y=e[e[cat_name] == category][val_name],
            # text=e[e[cat_name] == category][val_name].round(1),
            text=e[e[cat_name] == category][val_name]
            .round(1)
            .apply(lambda x: f"{x} ({round((100 * x/total_sum), 1)}%)"),
            name="",
            showlegend=True,
            marker=dict(color="cornflowerblue"),
        )
    )
fig.update_traces(hovertemplate="%{y}")
fig.update_layout(
    template="plotly_white",
    width=50 + len(fig.data) * 40,
    height=600,
    title=f"{val_name.capitalize()} by {cat_name}",
    showlegend=False,
)
fig.update_xaxes(title=cat_name.capitalize(), tickangle=90)
fig.update_yaxes(title=val_name.capitalize(), range=None, dtick=None)
fig.show()

In [4]:
# Basic horizontal bar chart

d_plot = d_plot
val_name = "x"
cat_name = "category"
cat_values = (
    d_plot.groupby([cat_name])[val_name]
    .sum()
    .reset_index()
    .sort_values(val_name, ascending=False)[cat_name]
    .head(20)
)
total_sum = d_plot[val_name].sum()
e = (
    d_plot[d_plot[cat_name].isin(cat_values)]
    .groupby(cat_name)[[val_name]]
    .sum()
    .reset_index()
    .sort_values(val_name, ascending=True)
)
if len(e) > 30:
    raise ValueError("Too many values to plot")

fig = go.Figure()
for i, category in enumerate(e[cat_name]):
    fig.add_trace(
        go.Bar(
            x=e[e[cat_name] == category][val_name],
            y=e[e[cat_name] == category][cat_name],
            # text=e[e[cat_name] == category][val_name].round(1),
            text=e[e[cat_name] == category][val_name]
            .round(1)
            .apply(lambda x: f"{x} ({round((100 * x/total_sum), 1)}%)"),
            orientation="h",
            name="",
            showlegend=True,
            marker=dict(color="cornflowerblue"),
        )
    )
fig.update_traces(hovertemplate="%{x}")
fig.update_layout(
    template="plotly_white",
    width=1000,
    height=50 + len(fig.data) * 40,
    title=f"{val_name.capitalize()} by {cat_name}",
    showlegend=False,
)
fig.update_xaxes(title=val_name.capitalize(), range=None, dtick=None)
fig.update_yaxes(title=cat_name.capitalize())
fig.show()

In [5]:
# Grouped vertical bar chart with multiple categories

d_plot = d_plot
val_name = "x"
cat_name = "category"
cat_values = (
    d_plot.groupby([cat_name])[val_name]
    .sum()
    .reset_index()
    .sort_values(val_name, ascending=False)[cat_name]
    .head(10)
)
subcat_name = "subcategory"
subcat_values = ["hello", "world"]
colors = ["cornflowerblue", "coral"]
e = (
    d_plot[
        (d_plot[cat_name].isin(cat_values)) & (d_plot[subcat_name].isin(subcat_values))
    ]
    .groupby([cat_name, subcat_name])[[val_name]]
    .sum()
    .reset_index()
    .sort_values(val_name, ascending=True)
)
if len(e) > 30:
    raise ValueError("Too many values to plot")

fig = go.Figure()
for i, subcategory in enumerate(subcat_values):
    fig.add_trace(
        go.Bar(
            x=e[(e[cat_name].isin(cat_values)) & (e[subcat_name] == subcategory)][
                cat_name
            ],
            y=e[(e[cat_name].isin(cat_values)) & (e[subcat_name] == subcategory)][
                val_name
            ],
            text=e[(e[cat_name].isin(cat_values)) & (e[subcat_name] == subcategory)][
                val_name
            ].round(1),
            name=subcategory,
            showlegend=True,
            marker=dict(color=colors[i]),
        )
    )
fig.update_traces(hovertemplate="%{y}")
fig.update_layout(
    barmode="group",
    template="plotly_white",
    width=50 + len(cat_values) * len(subcat_values) * 40,
    height=600,
    title=f"{val_name.capitalize()} by {cat_name} and {subcat_name}",
    legend_title=subcat_name.capitalize(),
    showlegend=True,
)
fig.update_xaxes(
    title=cat_name.capitalize(),
    tickangle=90,
    categoryorder="array",
    categoryarray=cat_values,
)
fig.update_yaxes(title=val_name.capitalize(), range=None, dtick=None)
fig.show()

In [6]:
# Grouped horizontal bar chart with multiple categories

d_plot = d_plot
val_name = "x"
cat_name = "category"
cat_values = (
    d_plot.groupby([cat_name])[val_name]
    .sum()
    .reset_index()
    .sort_values(val_name, ascending=False)[cat_name]
    .head(10)
)
subcat_name = "subcategory"
subcat_values = ["hello", "world"]
colors = ["cornflowerblue", "coral"]
e = (
    d_plot[
        (d_plot[cat_name].isin(cat_values)) & (d_plot[subcat_name].isin(subcat_values))
    ]
    .groupby([cat_name, subcat_name])[[val_name]]
    .sum()
    .reset_index()
    .sort_values(val_name, ascending=True)
)
if len(e) > 30:
    raise ValueError("Too many values to plot")

fig = go.Figure()
for i, subcategory in enumerate(subcat_values):
    fig.add_trace(
        go.Bar(
            x=e[(e[cat_name].isin(cat_values)) & (e[subcat_name] == subcategory)][
                val_name
            ],
            y=e[(e[cat_name].isin(cat_values)) & (e[subcat_name] == subcategory)][
                cat_name
            ],
            text=e[(e[cat_name].isin(cat_values)) & (e[subcat_name] == subcategory)][
                val_name
            ].round(1),
            name=subcategory,
            showlegend=True,
            marker=dict(color=colors[i]),
        )
    )
fig.update_traces(hovertemplate="%{x}")
fig.update_layout(
    barmode="group",
    template="plotly_white",
    width=900,
    height=50 + len(cat_values) * len(subcat_values) * 40,
    title=f"{val_name.capitalize()} by {cat_name} and {subcat_name}",
    legend_title=subcat_name.capitalize(),
    showlegend=True,
)
fig.update_traces(orientation="h")
fig.update_xaxes(title=val_name.capitalize(), range=None, dtick=None)
fig.update_yaxes(
    title=cat_name.capitalize(),
    autorange="reversed",
    categoryorder="array",
    categoryarray=cat_values,
)
fig.show()