
# Data vs model with stacked components

Plot data and a model with stacked components.


In [None]:
from plothist import get_dummy_data
from plothist import make_hist
from plothist import get_color_palette
from plothist import plot_data_model_comparison
from plothist import plot_hist
from plothist import add_luminosity

Load dummy data.

In [None]:
df = get_dummy_data()

df

Define and fill signal, data, and background histograms.

In [None]:
key = "variable_1"
range = [-9, 12]
category = "category"

# Define some masks to separate the dataset in signal (1 category), background (3 categories) and data (1 category)
signal_mask = df[category] == 7
data_mask = df[category] == 8

background_categories = [0, 1, 2]
background_categories_labels = [f"c{i}" for i in background_categories]
background_categories_colors = get_color_palette("cubehelix", len(background_categories))

background_masks = [df[category] == p for p in background_categories]

# Create the histograms using the masks defined above
data_hist = make_hist(df[key][data_mask], bins=50, range=range, weights=1)
background_hists = [make_hist(df[key][mask], bins=50, range=range, weights=1) for mask in background_masks]
signal_hist = make_hist(df[key][signal_mask], bins=50, range=range, weights=1)

# Optional: scale to data
# boost_histogram.Histogram objects are easy to manipulate.
# Here, we multiply them by a scalar to scale them, and their variance is correctly scaled as well.
background_scaling_factor = data_hist.sum().value / sum(background_hists).sum().value
background_hists = [background_scaling_factor * h for h in background_hists]

signal_scaling_factor = data_hist.sum().value / signal_hist.sum().value
signal_hist *= signal_scaling_factor

Plot data model comparison.

In [None]:
# Compare data and model
fig, ax_main, ax_comparison = plot_data_model_comparison(
    data_hist=data_hist,
    stacked_components=background_hists,
    stacked_labels=background_categories_labels,
    stacked_colors=background_categories_colors,
    xlabel=key,
    ylabel="Entries",
)

# Signal histogram not part of the model and therefore not included in the comparison
plot_hist(
    signal_hist,
    ax=ax_main,
    color="red",
    label="Signal",
    histtype="step",
)

# Update legend
ax_main.legend()

# Add integrated luminosity
add_luminosity(
    collaboration="plothist", ax=ax_main, lumi=3, lumi_unit="zb", preliminary=True
)