In [1]:
import pathlib
from os import uname

import numpy as np
import ROOT as rt
from math import ceil, floor

from src import CMS_lumi, tdrstyle

# from sklearn.metrics import roc_auc_score  # , roc_curve
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import StandardScaler
# from sklearn.ensemble import GradientBoostingClassifier  # , RandomForestClassifier

OUT_DIR = "reports/weekly/2023-09-14"
T2_OUT_DIR = "/storage/af/user/psimmerl/LLP/mdc_analysis"  # os.getcwd()
LOCAL_OUT_DIR = "/home/psimmerl/LLP/mdc_analysis"  # os.getcwd()

DATA_VERSION = "6"
LUMI = 23.02 * 1000

T2_DATA_DIR = "/storage/cms/store/user/christiw/displacedJetMuonAnalyzer/Run3/V1p19"
LOCAL_DATA_DIR = "/home/psimmerl/LLP/mdc_analysis/data/raw"  # os.getcwd() + "/data/raw"
DATA_DIR = "TIER2" if "caltech" in uname()[1] else "LOCAL"

FN_MC = "ggH_HToSSTobbbb_MH-125_MS-15_CTau1000_13p6TeV_1pb_weighted"
FN_R3 = "DisplacedJet-EXOCSCCluster_Run2022EFG-PromptReco-v1_goodLumi"

ROOT_ERROR_LEVEL = 1001  # rt.kInfo + 1
BOT_MARGIN, TOP_MARGIN = 0.025, 0.1
gc = []

# **************************** #
if "TIER2" in DATA_DIR:
    OUT_DIR = f"{T2_OUT_DIR}/{OUT_DIR}"
    FN_MC = f"{T2_DATA_DIR}/MC_Summer22EE/v1/sixie/v{DATA_VERSION}/normalized/{FN_R3}.root"
    FN_R3 = f"{T2_DATA_DIR}/Data2022/v{DATA_VERSION}/normalized/{FN_R3}.root"
else:
    OUT_DIR = f"{LOCAL_OUT_DIR}/{OUT_DIR}"
    FN_MC = f"{LOCAL_DATA_DIR}/{FN_MC}_v{DATA_VERSION}.root"
    FN_R3 = f"{LOCAL_DATA_DIR}/{FN_R3}_v{DATA_VERSION}.root"
pathlib.Path(OUT_DIR).mkdir(parents=True, exist_ok=True)
# **************************** #
rt.gErrorIgnoreLevel = ROOT_ERROR_LEVEL
# rt.gROOT.SetBatch(True)
tdrstyle.setTDRStyle()
CMS_lumi.writeExtraText = 0


# **************************** #
def histo1d(rdfs, vals, filters=None, styles=None):
    pass

  module "memory_resource" [optional] {
                            ^
  module "bits/chrono.h" [optional] {
                          ^
  explicit module "bits_ranges_base_h" [optional] {
                                        ^
  module "bits/ranges_util.h" [optional] {
                               ^
  module "bits/uses_allocator_args.h" [optional] {
                                       ^


Welcome to JupyROOT 6.28/04


In [5]:
# ms_mc = MuonSystemAwkward(FN_MC, name="Signal", nev=N_EVENTS, is_mc=True, lumi=LUMI)
rdfn, save = "mc", True
rdf = rt.RDataFrame("MuonSystem", FN_MC if rdfn == "mc" else FN_R3)
columns = [n for n in rdf.GetColumnNames()]
columns.remove("HLTDecision")
rdf = rdf.Filter("HLTDecision[569] && (nCscRechitClusters > 0 || nDtRechitClusters > 0)")
if save:
    rdf.Snapshot("MuonSystem_HLT569", f"data/processed/{rdfn}_hlt569.rdf", )

In [None]:
# ms_mc = MuonSystemAwkward(FN_MC, name="Signal", nev=N_EVENTS, is_mc=True, lumi=LUMI)
rdfn, save = "r3", True
rdf = rt.RDataFrame("MuonSystem", FN_MC if rdfn == "mc" else FN_R3)
columns = [n for n in rdf.GetColumnNames()]
columns.remove("HLTDecision")
rdf = rdf.Filter("HLTDecision[569] && (nCscRechitClusters > 0 || nDtRechitClusters > 0)")
if save:
    rdf.Snapshot("MuonSystem_HLT569", f"data/processed/{rdfn}_hlt569.rdf")

: 

In [None]:
%%time
print(f"total CSC RechitSize = {rdf.Sum('cscRechitClusterSize').GetValue():,.0f}")

icl, sum_size = 0, 1
while sum_size > 0:
    sum_size = rdf.Redefine("cscRechitClusterSize", f"cscRechitClusterSize[{icl}]")
    sum_size = sum_size.Sum("cscRechitClusterSize").GetValue()
    print(f"{icl} | {sum_size:,.0f}")
    icl+=1

In [None]:
%%time
csc_vN = lambda v,n: f"return (nCscRechitClusters > {n}) ? cscRechitCluster{v}[{n}] : 0;"
dt_vN = lambda v,n: f"return (nCscRechitClusters > {n}) ? dtRechitCluster{v}[{n}] : 0;"
print(f"total CSC RechitSize = {rdf.Sum('cscRechitClusterSize').GetValue():,.0f}")

icl, sum_size = 0, 1
while sum_size > 0:
    sum_size = rdf.Redefine("cscRechitClusterSize", csc_vN("Size", icl))
    sum_size = sum_size.Sum("cscRechitClusterSize").GetValue()
    print(f"{icl} | {sum_size:,.0f}")
    icl+=1

In [None]:
%%time
csc_vN = lambda v,n: f"return (nCscRechitClusters > {n}) ? cscRechitCluster{v}[{n}] : 0;"
dt_vN = lambda v,n: f"return (nCscRechitClusters > {n}) ? dtRechitCluster{v}[{n}] : 0;"
print(f"total CSC RechitSize = {rdf.Sum('cscRechitClusterSize').GetValue():,.0f}")

icl, sum_sizes = 0, []
while len(sum_sizes)==0 or sum_sizes[-1] > 0:
    for _icl in range(icl, icl+10):
        sum_sizes.append(rdf.Redefine("cscRechitClusterSize", csc_vN("Size", _icl)).Sum("cscRechitClusterSize"))
    for _icl in range(icl, _icl+1):
        sum_sizes[_icl] = sum_sizes[_icl].GetValue()
        if sum_sizes[_icl] > 0:
            print(f"{_icl} | {sum_sizes[_icl]}")
    icl = _icl + 1


In [None]:
canvas = rt.TCanvas("c", "c", 800, 800)
h1 = rdf.Histo1D(("", "", 50, 50, 200), "dtRechitClusterSize")
h1.Fit("expo")
h1.Draw()
canvas.SetLogy()
canvas.SetGrid()
canvas.Draw()

In [None]:
# How do RDF's create 2D histograms of multidimensional RVectors
    # elementwise-- A,B : a00&b00, a01&b01, ..., a0N&b0N, a10&b10, ..., aMN&bMN, etc
canvas = rt.TCanvas("c", "c", 800, 800)
h2 = rdf.Histo2D(("", "", 50, 100, 300, 50, 100, 300), "cscRechitClusterSize", "cscRechitClusterSize")
h2.Draw("colz")
# canvas.SetLogz()
canvas.SetGrid()
canvas.Draw()

In [None]:
canvas = rt.TCanvas("c", "c", 800, 800)
h2 = rdf.Histo2D(("", "", 50, 50, 200, 16, -np.pi, np.pi), "dtRechitClusterSize", "dtRechitClusterPhi")
h2.Draw("colz")
canvas.SetLogz()
canvas.SetGrid()
canvas.Draw()

In [None]:
canvas = rt.TCanvas("c", "c", 800, 800)
h2 = rdf.Histo2D(("", "", 50, 50, 200, 50, 100, 300), "met", "cscRechitClusterSize")
h2.Draw("colz")
canvas.SetLogz()
canvas.SetGrid()
canvas.Draw()

In [None]:
canvas = rt.TCanvas("c", "c", 800, 800)
h2 = rdf.Histo2D(("", "", 50, 50, 200, 50, 100, 300), "dtRechitClusterSize", "cscRechitClusterSize")
h2.Draw("colz")
canvas.SetLogz()
canvas.SetGrid()
canvas.Draw()