# Part 5: Statistical Analysis
A quick tour through [`cabinetry`](https://github.com/scikit-hep/cabinetry)

In [None]:
import copy
import glob
import json
import pathlib

import pyhf
import cabinetry

cabinetry.set_logging()

import boost_histogram as bh
import hist
import numpy as np
from pyhf.contrib.utils import download

# Creating a statistical model

### Defining the model

In [None]:
config = {
   "General":{
      "Measurement": "minimal_example",
      "POI": "Signal_norm",              # parameter of interest, which we want to measure 
      "InputPath": "input/{SamplePath}", # where to find input data
      "HistogramFolder": "histograms/"

   }
}

In [None]:
config.update({
   "Regions":[
      {
         "Name": "Signal_region",
         "Filter": "lep_charge > 0",           # event selection 
         "Variable": "jet_pt",                 # which variable we bin histograms in
         "Binning": [200, 300, 400, 500, 600]
      }
   ]
})

In [None]:
config.update({
   "Samples":[
      {
         "Name": "Data",
         "Tree": "pseudodata",
         "SamplePath": "data.root",
         "Data": True                       # observed data is handled differently, need to distinguish
      },
      {
         "Name": "Signal",
         "Tree": "signal",
         "SamplePath": "prediction.root",
         "Weight": "weight"                 # weights: Monte Carlo integration, simulation correction etc.
      },
      {
         "Name": "Background",
         "Tree": "background",
         "SamplePath": "prediction.root",
         "Weight": "weight"
      }
   ]
})

In [None]:
config.update({"Systematics": []})

In [None]:
config.update({
   "NormFactors":[
      {
         "Name": "Signal_norm",
         "Samples": "Signal",    # we want this parameter to scale the signal
         "Nominal": 1,
         "Bounds": [-5, 10]
      }
   ]
})

In [None]:
cabinetry.configuration.validate(config)

In [None]:
cabinetry.configuration.print_overview(config)

### Creating histograms

In [None]:
cabinetry.templates.build(config, method="uproot")

In [None]:
glob.glob("histograms/*")

#### Visualization

In [None]:
_ = cabinetry.visualize.data_mc_from_histograms(config)

### A more complex model: adding systematic uncertainties

In [None]:
config.update({
   "Systematics":[
      {
         "Name": "Luminosity",
         "Up": {"Normalization": 0.05},
         "Down": {"Normalization": -0.05},
         "Type": "Normalization"
      },
      {
         "Name":"Modeling",
         "Up": {"Tree": "background_varied"},
         "Down": {"Symmetrize": True},
         "Samples": "Background",
         "Type": "NormPlusShape"
      },
      {
         "Name": "WeightBasedModeling",
         "Up": {"Weight": "weight_up"},
         "Down": {"Weight": "0.7*weight"},
         "Samples": "Background",
         "Type": "NormPlusShape"
      }
   ],
})

In [None]:
cabinetry.templates.build(config, method="uproot")

In [None]:
_ = cabinetry.visualize.templates(config)

### Building a workspace

In [None]:
workspace_path = "example_workspace.json"
spec = cabinetry.workspace.build(config)
cabinetry.workspace.save(spec, workspace_path)

In [None]:
print(json.dumps(spec, sort_keys=True, indent=4))

### Model structure

In [None]:
cabinetry.visualize.modifier_grid(pyhf.Workspace(spec).model())

# Performing statistical inference with our model

In [None]:
model, data = cabinetry.model_utils.model_and_data(spec)

In [None]:
data

### Maximum likelihood estimate (MLE)

In [None]:
fit_results = cabinetry.fit.fit(model, data)

In [None]:
for label, result, unc in zip(fit_results.labels, fit_results.bestfit, fit_results.uncertainty):
    print(f"{label}: {result:.3f} +/- {unc:.3f}")

In [None]:
cabinetry.visualize.pulls(fit_results, exclude="Signal_norm")

In [None]:
cabinetry.visualize.correlation_matrix(fit_results, pruning_threshold=0.1)

In [None]:
model_pred = cabinetry.model_utils.prediction(model)
figures = cabinetry.visualize.data_mc(model_pred, data, config=config)

In [None]:
figures = cabinetry.visualize.data_mc(model_pred, data, config=config,
                                      colors={"Signal": "tomato", "Background": "navajowhite"}, close_figure=True)
ratio_panel = figures[0]["figure"].get_axes()[1]
ratio_panel.set_xlabel("jet $p_T$")
figures[0]["figure"]  # show figure

In [None]:
_ = cabinetry.tabulate.yields(model_pred, data)

In [None]:
!cat tables/yields_per_bin_pre-fit.txt

In [None]:
model_pred_postfit = cabinetry.model_utils.prediction(model, fit_results=fit_results)
_ = cabinetry.visualize.data_mc(model_pred_postfit, data, config=config)

### Expected sensitivity with the Asimov dataset

In [None]:
asimov_data = cabinetry.model_utils.asimov_data(model)
_ = cabinetry.fit.fit(model, asimov_data)

### Beyond MLEs: discovery significance and parameter limits

In [None]:
significance_results = cabinetry.fit.significance(model, data)

In [None]:
data_limit = [112, 129, 92, 63] + model.config.auxdata  # need auxiliary data as well
limit_results = cabinetry.fit.limit(model, data_limit)
cabinetry.visualize.limit(limit_results)

# Exploring the statistical model of an ATLAS analysis

In [None]:
download("https://www.hepdata.net/record/resource/1267798?view=true", "electroweakinos")
ATLAS_ws = pyhf.Workspace(json.load(open("electroweakinos/1Lbb-likelihoods-hepdata/BkgOnly.json")))
patchset = pyhf.PatchSet(json.load(open("electroweakinos/1Lbb-likelihoods-hepdata/patchset.json")))
ATLAS_ws = patchset.apply(ATLAS_ws, "C1N2_Wh_hbb_700_400")
cabinetry.workspace.save(ATLAS_ws, "electroweakinos.json")

In [None]:
!pyhf inspect electroweakinos.json | head -n 6

In [None]:
ATLAS_model, ATLAS_data = cabinetry.model_utils.model_and_data(ATLAS_ws)

In [None]:
ATLAS_model_pred = cabinetry.model_utils.prediction(ATLAS_model)
_ = cabinetry.visualize.data_mc(ATLAS_model_pred, ATLAS_data)

In [None]:
ATLAS_fit_results = cabinetry.fit.fit(ATLAS_model, ATLAS_data)

In [None]:
cabinetry.visualize.pulls(ATLAS_fit_results)