In [None]:
%config InteractiveShell.ast_node_interactivity="last_expr_or_assign"

import hist
from hist import Hist
import numpy as np
import mplhep
from PIL import Image
import matplotlib.pyplot as plt

# Hist: The analyst's friend

The boost-histogram library should be viewed like NumPy; a power, fundamental library for supporting histograms. However, it has no dependencies and is designed to be exact. It is meant to be usable as a backend for other histogramming libraries.

Like Pandas provides a nice interface to NumPy that adds columns, plotting adapters, and more, we have Hist; the analyst friendly frontend to boost-histogram.

| boost-histogram      | Hist                        |
|----------------------|-----------------------------|
| No dependencies      | Dependencies allowed        |
| Verbose but clear    | Can reduce typing           |
| First principles     | Wrappers for common needs   |
| Interfaces with NumPy | Interface with other tools |

If a feature doesn't add dependencies and is useful/popular, it *may* be upstreamed to boost-histogram. Let us know what you like!

Hist currently provides 4 things that boost-histogram doesn't have.

Names, UHI+, Plotting, Quick Constructors

---

# 1: Meaning to metadata: titles and names

Axis in Hist can have a `name` and a `label`. The name is special; it needs to be unique to a Histogram (but not across histograms), and is completely optional - you can even mix named and unnamed axes. 

A `name` can be used to identify an axis anywhere a number can be used!

In [None]:
h = Hist(
    hist.axis.Regular(10, -1, 1, name="x"), hist.axis.Regular(20, -2, 2, name="y"),
)

In [None]:
h.fill(x=np.random.normal(size=1_000_000), y=np.random.normal(size=1_000_000))

In [None]:
mplhep.histplot(h.project("y"));

In [None]:
h[{"x": sum, "y": sum}]

A `title` will override the name when plotting; but if unset, the name will be used in plotting.

In [None]:
h.axes["x"].title = "x [μm]"
h.axes["y"].title = "y [cm]"
mplhep.hist2dplot(h);

> Note to self: name probably should not be changable once an axes is created...

If you love names, there's even an experimental `NamedHist` that enforces *only* named access - you can't use normal positional access on a `NamedHist`.

---

# 2: UHI+

Hist supports an experimental UHI addition that allows for ultra-terse UHI. You can perform all manipulations directly inline without extra imports.

Rules:

Single bin or slice endpoints:
* `3` -> bin coordinate 3 (like NumPy/boost-histogram)
* `1.5j` -> data coordinate 1.5 (`bh.loc(1.5)`)
* `"string"` -> String category (`bh.loc("string")`)

Action slot:
* `2j` -> rebin by 2 over axis or range (`bh.rebin(2)`)
* `sum` -> Sum over axis or range (like boost-histogram)

For example, let's make something mildly interesting:

In [None]:
sp = Image.open("SciPy Icon.png")

In [None]:
arr = np.asarray(sp)[:, :, 3]
x, y, p = np.random.default_rng().random((3, 200_000))

bx = (x * 140).astype(np.int32)
by = (y * 120).astype(np.int32)

# Probibility of hit
prob = arr[by, bx] / 255

# True if hit
hits = prob > p

# Select just hits that are in the blue parts
X = x[hits]
Y = 1 - (y[hits] / 14 * 12 + (1 / 14));

Make a histogram:

In [None]:
sci = Hist(
    hist.axis.Regular(300, 0, 1, name="x", title="x [cm]"),
    hist.axis.Regular(300, 0, 1, name="y", title="y [cm]"),
)

sci.fill(X, Y)

In [None]:
sci.plot();

In [None]:
sci[0.7j:, 0.5j:0.8j].plot();

In [None]:
sci[::4j, ::4j].plot();

In [None]:
h = Hist(hist.axis.StrCategory(["a", "b", "c"]))
h.fill(["a", "a", "a", "b", "b", "c"])

In [None]:
h["a"]

In [None]:
h["a":"c"]

---

# 3: Quick plotting

You can directly make several useful plots in hist using matplotlib, inline using pandas-like `.plot()` interface.

In [None]:
sci.plot();

In [None]:
sci.plot2d_full();

In [None]:
sci[0.1j:0.9j, sum].plot();

I'm saving one more plot type for later!

---

# 4. Quick constructors

We've avoided using the `hist` namespace, and just did everything with the `Hist` object itself, except construction. But Hist supports a **very experimental** quick construction system! It's even written in the same naming scheme as Hist:


In [None]:
rng = np.random.default_rng(1)
x = rng.normal(size=10_000)
y = rng.normal(size=10_000)

Hist.Log(30, 0.1, 3).Sqrt(30, 0, 3).fill(x, y).plot();

In [None]:
nicely_organised = (
    Hist.Reg(10, 0, 10, name="p_T", title="$p_T$")
        .Reg(5, 0, 5, name="eta", title=r"$\eta$")
        .Bool(name="sim", title="is simulation")
        .Bool(name="cand", title="is candidate")
)

candidates_and_non_candidates = nicely_organised[{"cand": sum}]
simulation_canidates = nicely_organised[{"cand": True, "sim": True}]

In [None]:
simulation_canidates.plot();

---

And remember, this is all backed by boost-histogram! We can go back and forth any time:

In [None]:
import boost_histogram as bh
b_sim = bh.Histogram(simulation_canidates)

In [None]:
Hist(b_sim)

---

# Demo: all together now!

In [None]:
import uproot4
from skhep_testdata import data_path

In [None]:
rfile = uproot4.open(data_path("uproot-hepdata-example.root"))

In [None]:
h1 = rfile["hpx"].to_hist()
mplhep.histplot(h1) # try .to_boost, .to_hist

In [None]:
h1.plot();

In [None]:
from uncertainties import unumpy as unp


def pdf(x, a=1 / np.sqrt(2 * np.pi), x0=0, sigma=1, offset=0):
    exp = unp.exp if a.dtype == np.dtype("O") else np.exp
    return a * exp(-((x - x0) ** 2) / (2 * sigma ** 2)) + offset

In [None]:
h1.plot_pull(pdf);

In [None]:
h = rfile["hpxpy"].to_hist()
mplhep.hist2dplot(h);

In [None]:
h.plot();

In [None]:
h.plot2d_full();

In [None]:
h.project("xaxis").plot();