
# Data vs histogram model

Plot data and a model with stacked histogram components.


In [None]:
from plothist import get_dummy_data
from plothist import make_hist
from plothist import get_color_palette
from plothist import plot_data_model_comparison
from plothist import add_luminosity

Install Latin Modern fonts (please restart the kernel afterwards)

In [None]:
!install_latin_modern_fonts

Load dummy data.

In [None]:
df = get_dummy_data()

df

Define and fill data and background histograms.

In [None]:
key = "variable_1"
range = [-9, 12]
category = "category"

# Define data mask (1 category)
data_mask = df[category] == 8

# Define background masks (3 categories)
background_categories = [0, 1, 2]
background_masks = [df[category] == p for p in background_categories]

# Define background labels and colors
background_categories_labels = [f"c{i}" for i in background_categories]
background_categories_colors = get_color_palette("cubehelix", len(background_categories))

# Create the histograms using the masks defined above
data_hist = make_hist(df[key][data_mask], bins=50, range=range, weights=1)
background_hists = [make_hist(df[key][mask], bins=50, range=range, weights=1) for mask in background_masks]

# Scale background to data
# boost_histogram.Histogram objects are easy to manipulate and can be multiplied by a factor
# The variance in each bin is correctly scaled as well
background_scaling_factor = data_hist.sum().value / sum(background_hists).sum().value
background_hists = [background_scaling_factor * h for h in background_hists]

Plot data model comparison.

In [None]:
fig, ax_main, ax_comparison = plot_data_model_comparison(
    data_hist=data_hist,
    stacked_components=background_hists,
    stacked_labels=background_categories_labels,
    stacked_colors=background_categories_colors,
    xlabel=key,
    ylabel="Entries",
)

# Add integrated luminosity
add_luminosity(collaboration="plothist", ax=ax_main, lumi=3, lumi_unit="zb", preliminary=True)