# Lesson 4 project 2: JIT-compiled Higgs combinatorics

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import numba as nb

import awkward as ak
import vector
from hist import Hist
vector.register_awkward()

In [None]:
events = ak.from_parquet("../data/SMHiggsToZZTo4L.parquet")

<br><br><br>

This exercise repeats [lesson-3-awkward/project-2-higgs.ipynb](../lesson-3-awkward/project-2-higgs.ipynb), but instead of allocating arrays of combinations, we only iterate over the combinations with Numba. In situations with many combinations, you might not have enough memory to allocate them as arrays.

As a reminder, the task was to reconstruct $H \to ZZ \to e^+e^-e^+e^-$ without allowing any one electron to be the decay product of both Z bosons.

We'll start with the collections separated by charge, as before.

In [None]:
eplus = events.electron[events.electron.charge > 0]
eminus = events.electron[events.electron.charge < 0]

<br><br><br>

When writing a function for Numba, we're likely to encounter more verbose errors than we would get from a normal Python function. This is because Numba has to analyze the function and determine all the possible ways that it could run, whereas Python charges ahead and discovers type errors at runtime.

Because Numba's errors can be confusing, it's usually easier to develop the function in pure Python with a small dataset or a single event, and then compile and apply it to the full dataset.

Since we want to test electron combinatorics, let's pick a collision event with more than the two we'd need in each charge collection to form a Higgs.

In [None]:
np.nonzero((ak.num(eplus) == 3) & (ak.num(eminus) == 3))

There are a few of these, but let's take the first one, at index 448.

In [None]:
eplus_event = eplus[448]
eminus_event = eminus[448]

In [None]:
eplus_event.show()

In [None]:
eminus_event.show()

<br><br><br>

## Exercise 1

Fill in the `...` in each range over the indexes of `eplus_event` and `eminus_event` such that

* `Z2_i` never overlaps with `Z1_i`: the $e^+$ is given to only one Z boson
* `Z2_j` never overlaps with `Z1_j`: the $e^-$ is given to only one Z boson

You should reproduce the table below.

In [None]:
print( "e+ index  | e- index ")
print(f"Z1_i Z2_i | Z1_j Z2_j")
print( "----------+----------")
for Z1_i in range(...):
    for Z2_i in range(...):
        for Z1_j in range(...):
            for Z2_j in range(...):
                print(f"{Z1_i:4d} {Z2_i:4d} | {Z1_j:4d} {Z2_j:4d}")
        print( "----------+----------")

```
e+ index  | e- index 
Z1_i Z2_i | Z1_j Z2_j
----------+----------
   0    1 |    0    1
   0    1 |    0    2
   0    1 |    1    2
----------+----------
   0    2 |    0    1
   0    2 |    0    2
   0    2 |    1    2
----------+----------
   1    2 |    0    1
   1    2 |    0    2
   1    2 |    1    2
----------+----------
```

<br><br><br>

## Exercise 2

Using the same index ranges, set `Zsmall` and `Zbig` to the Z boson (`Z1` or `Z2`) with the lowest and highest mass, respectively.

In [None]:
print( " Zsmall  |  Zbig   ")
print( "---------+---------")
for Z1_i in range(...):
    for Z2_i in range(...):
        for Z1_j in range(...):
            for Z2_j in range(...):
                Z1 = eplus_event[Z1_i] + eminus_event[Z1_j]
                Z2 = eplus_event[Z2_i] + eminus_event[Z2_j]
                Zsmall, Zbig = ...
                print(f"{Zsmall.mass:.5f} | {Zbig.mass:.5f}")
        print( "---------+---------")

```
 Zsmall  |  Zbig   
---------+---------
51.54425 | 88.57392
51.54425 | 52.81760
42.52653 | 52.81760
---------+---------
51.54425 | 56.26014
51.54425 | 52.19812
42.52653 | 52.19812
---------+---------
56.26014 | 66.23705
52.19812 | 66.23705
52.19812 | 88.57392
---------+---------
```

<br><br><br>

## Exercise 3

Using the same index ranges and `Zsmall`/`Zbig`, select the best Higgs interpretation for this event, the one for which

* `Zbig` is as close as possible to the on-shell Z mass.

Note that `best_Zbig` can be `None`. This is exactly the sort of issue (type error) you encounter interactively in Python but would get a confusing compilation error message if it fails in Numba.

In [None]:
from particle import Particle
from hepunits import GeV

onshell_mass = Particle.from_name("Z0").mass / GeV

In [None]:
def best_interpretation(eplus_event, eminus_event):
    best_Zsmall, best_Zbig = None, None
    for Z1_i in range(...):
        for Z2_i in range(...):
            for Z1_j in range(...):
                for Z2_j in range(...):
                    Z1 = eplus_event[Z1_i] + eminus_event[Z1_j]
                    Z2 = eplus_event[Z2_i] + eminus_event[Z2_j]
                    Zsmall, Zbig = ...
                    if ...:
                        best_Zsmall, best_Zbig = Zsmall, Zbig
    return best_Zsmall, best_Zbig

best_interpretation(eplus_event, eminus_event)

Now put a `@nb.njit` decorator above `def best_interpretation` to see if it compiles and runs, producing the same result.

<br><br><br>

## Exercise 4

Put it all together by preparing output arrays and writing a `for` loop over all events.

First try this in pure Python with a small subset of events: `eplus[:100]` and `eminus[:100]`. Also evaluate the cells that make the plots.

Then put a `@nb.njit` decorator above `def best_interpretation_per_event` to see if it compiles and runs. You can also let it run for all events in `eplus` and `eminus` to see full statistics in the plots.

(Note that you can call a compiled function from a pure Python function, but you can't call a pure Python function from a compiled function.)

In [None]:
def best_interpretation_per_event(eplus, eminus):
    zmass_small = np.empty(len(eplus), dtype=np.float64)
    zmass_big = np.empty(len(eplus), dtype=np.float64)
    higgsmass = np.empty(len(eplus), dtype=np.float64)
    for i, (eplus_event, eminus_event) in enumerate(zip(eplus, eminus)):
        best_Zsmall, best_Zbig = best_interpretation(eplus_event, eminus_event)
        if best_Zsmall is not None and best_Zbig is not None:
            zmass_small[i] = best_Zsmall.mass
            zmass_big[i] = best_Zbig.mass
            if 12 < best_Zsmall.mass < 120 and 40 < best_Zbig.mass < 120:
                higgsmass[i] = (best_Zsmall + best_Zbig).mass
            else:
                higgsmass[i] = np.nan
        else:
            zmass_small[i] = np.nan
            zmass_big[i] = np.nan
            higgsmass[i] = np.nan
    return zmass_small, zmass_big, higgsmass

zmass_small, zmass_big, higgsmass = best_interpretation_per_event(eplus[:100], eminus[:100])

In [None]:
Hist.new.Regular(60, 0, 120, name="zmass_small").Regular(60, 0, 120, name="zmass_big").Double().fill(
    zmass_small=zmass_small,
    zmass_big=zmass_big,
).plot2d_full();

In [None]:
Hist.new.Regular(100, 0, 200, name="Higgs mass").Double().fill(
    higgsmass
).plot();