# Samples

## Signal

M = 3000 GeV

https://opendata.cern.ch/record/75156

## Backgrounds

### W+jets
https://opendata.cern.ch/record/69747

or

https://opendata.cern.ch/record/69745

### TT semilep
https://opendata.cern.ch/record/67993

### TT hadronic
https://opendata.cern.ch/record/67841

### TT leptonic
https://opendata.cern.ch/record/19958



# Trigger

The trigger used for the muon channel is the “OR” combination of the HLT paths:HLT Mu50 v*
196 , HLT TkMu50 v*. Similarly, the scale factors for this trigger combination were provided by the
197 Muon POG [33].

# Kinematics 

Referencing this note

[1] M. Adams et al., “Search for ttbar resonances in boosted semileptonic final states at
√14 s = 13 TeV”, CMS Analysis Note AN-2015/107 (2015).

for discussions of invariant mass and using the mass of the W to constrain the kinematics of the missing energy in the transverse plane. See Eqn. 3.

In [None]:
# Run this if these are not installed and upgraded
'''
!pip install --upgrade awkward
!pip install --upgrade uproot

!pip install --upgrade matplotlib

!pip install vector
'''

In [None]:
# The classics
import numpy as np
import matplotlib.pylab as plt
import matplotlib # To get the version

import pandas as pd

# The newcomers
import awkward as ak
import uproot

import vector
vector.register_awkward()


In [None]:
print("Versions --------\n")
print(f"{ak.__version__ = }\n")
print(f"{uproot.__version__ = }\n")
print(f"{np.__version__ = }\n")
print(f"{matplotlib.__version__ = }\n")
print(f"{vector.__version__ = }\n")

In [None]:
####### Backgrounds
# W+jets
#dataset = "Wjets"
#filename = 'root://eospublic.cern.ch//eos/opendata/cms/mc/RunIISummer20UL16NanoAODv9/WJetsToLNu_TuneCP5_13TeV-madgraphMLM-pythia8/NANOAODSIM/106X_mcRun2_asymptotic_v17-v1/270000/00702195-E707-3743-8BBA-57EB9DEE1DBA.root'

# ttbar leptonic
#dataset = "tt_lep"
#filename = 'root://eospublic.cern.ch//eos/opendata/cms/mc/RunIIFall15MiniAODv2/TTTo2L2Nu_13TeV-powheg/MINIAODSIM/PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/00000/02A468DA-E8B9-E511-942C-0022195E688C.root'

# ttbar hadronic
#dataset = "tt_had"
#filename = 'root://eospublic.cern.ch//eos/opendata/cms/mc/RunIISummer20UL16NanoAODv9/TTToHadronic_TuneCP5_13TeV-powheg-pythia8/NANOAODSIM/106X_mcRun2_asymptotic_v17-v1/130000/009086DB-1E42-7545-9A35-1433EC89D04B.root'

# ttbar semileptonic
#dataset = "tt_semilep"
#filename = 'root://eospublic.cern.ch//eos/opendata/cms/mc/RunIISummer20UL16NanoAODv9/TTToSemiLeptonic_TuneCP5_13TeV-powheg-pythia8/NANOAODSIM/106X_mcRun2_asymptotic_v17-v1/120000/08FCB2ED-176B-064B-85AB-37B898773B98.root'


########### Signal
dataset = 'signal'
filename = 'root://eospublic.cern.ch//eos/opendata/cms/mc/RunIISummer20UL16NanoAODv9/ZprimeToTT_M2000_W20_TuneCP2_PSweights_13TeV-madgraph-pythiaMLM-pythia8/NANOAODSIM/106X_mcRun2_asymptotic_v17-v1/270000/22BAB5D2-9E3F-E440-AB30-AE6DBFDF6C83.root'


# Open the file 
f = uproot.open(filename)

events = f['Events']

nevents = events.num_entries

print(f"{nevents = }")

In [None]:
def pretty_print(fields, fmt='40s', require=None, ignore=None):
    
    output = ""
    
    for f in fields:
        PASSED = True
        if require is not None:
            if type(require) != list:
                require = [require]
            PASSED = True
            for r in require:
                if f.find(r) < 0:
                    PASSED = False
        
        # Did not find a string and so skip
        if PASSED is False:
            continue
        
        if ignore is not None:
            if f.find(ignore) >= 0:
                continue
        
        if len(output) + len(f) <= 80:
            output += f"{f:{fmt}} "
        else:
            print(output)
            output = f"{f:{fmt}} "
    
    print(output)

In [None]:
# Pretty print all the fields
#pretty_print(events.keys())

# Pretty print some subsets
#pretty_print(events.keys(), fmt='30s', require='FatJet')
#pretty_print(events.keys(), fmt='40s', require=['Muon', 'Iso'], ignore='HLT')
#pretty_print(events.keys(), fmt='40s', require=['HLT', 'TkMu50'])
#pretty_print(events.keys(), fmt='40s', require='HLT')
#pretty_print(events.keys(), fmt='40s', require='Jet_', ignore='Fat')
pretty_print(events.keys(), fmt='40s', require='PuppiMET', ignore='Raw')

In [None]:
fatjet_mSD = events['FatJet_msoftdrop'].array()

fatjet_tag = events['FatJet_particleNet_TvsQCD'].array()

fatjet_tau2 = events['FatJet_tau2'].array()
fatjet_tau3 = events['FatJet_tau3'].array()

fatjet_pt = events['FatJet_pt'].array()
fatjet_eta = events['FatJet_eta'].array()
fatjet_phi = events['FatJet_phi'].array()
fatjet_mass = events['FatJet_mass'].array()


In [None]:
muon_pt = events['Muon_pt'].array()
muon_eta = events['Muon_eta'].array()
muon_phi = events['Muon_phi'].array()
muon_mass = events['Muon_mass'].array()

muon_iso = events['Muon_miniIsoId'].array()

muon_tightId = events['Muon_tightId'].array()


In [None]:
jet_btag = events['Jet_btagDeepB'].array()

jet_jetid = events['Jet_jetId'].array()

jet_pt = events['Jet_pt'].array()
jet_eta = events['Jet_eta'].array()
jet_phi = events['Jet_phi'].array()
jet_mass = events['Jet_mass'].array()


In [None]:
met_pt = events['PuppiMET_pt'].array()
met_eta = 0*events['PuppiMET_pt'].array()  # Fix this to be 0
met_phi = events['PuppiMET_phi'].array() 
met_energy = events['PuppiMET_sumEt'].array() # Is this the right thing to use?

ht_lep = muon_pt + met_pt

In [None]:
# Cuts
tau32 = fatjet_tau3/fatjet_tau2

#cut_fatjet = (tau32>0.67) & (fatjet_eta>-2.4) & (fatjet_eta<2.4) & (fatjet_mSD>105) & (fatjet_mSD<220)
cut_fatjet = (fatjet_pt > 500) & (fatjet_tag > 0.5)

cut_muon = (muon_pt>55) & (muon_eta>-2.4) & (muon_eta<2.4) & \
           (muon_tightId == True) & (muon_iso>1) & (ht_lep>150)

cut_jet = (jet_btag > 0.5) & (jet_jetid>=4)



# Event cut
cut_met = (met_pt > 50)

cut_nmuons = ak.num(cut_muon[cut_muon]) == 1

cut_trigger = (events['HLT_TkMu50'].array())


cut_ntop = ak.num(cut_fatjet[cut_fatjet]) == 1

cut_full_event = cut_trigger & cut_nmuons & cut_met & cut_ntop

In [None]:
fatjets = ak.zip(
    {"pt": fatjet_pt[cut_full_event][cut_fatjet[cut_full_event]], 
     "eta": fatjet_eta[cut_full_event][cut_fatjet[cut_full_event]], 
     "phi": fatjet_phi[cut_full_event][cut_fatjet[cut_full_event]], 
     "mass": fatjet_mass[cut_full_event][cut_fatjet[cut_full_event]]},
    with_name="Momentum4D",
)

muons = ak.zip(
    {"pt": muon_pt[cut_full_event][cut_muon[cut_full_event]], 
     "eta": muon_eta[cut_full_event][cut_muon[cut_full_event]], 
     "phi": muon_phi[cut_full_event][cut_muon[cut_full_event]], 
     "mass": muon_mass[cut_full_event][cut_muon[cut_full_event]]},
    with_name="Momentum4D",
)

jets = ak.zip(
    {"pt": jet_pt[cut_full_event][cut_jet[cut_full_event]], 
     "eta": jet_eta[cut_full_event][cut_jet[cut_full_event]], 
     "phi": jet_phi[cut_full_event][cut_jet[cut_full_event]], 
     "mass": jet_mass[cut_full_event][cut_jet[cut_full_event]]},
    with_name="Momentum4D",
)

met = ak.zip(
    {"pt": met_pt[cut_full_event], 
     "eta": met_eta[cut_full_event], 
     "phi": met_phi[cut_full_event], 
     "e": met_energy[cut_full_event]},
    with_name="Momentum4D",
)

In [None]:
p4mu,p4fj,p4j,p4met = ak.unzip(ak.cartesian([muons, fatjets, jets, met]))


## Trying to get W mass constrainting working 

Maybe we ignore this in the end. 

In [None]:
#newmet = vector.Array([{"x":met.x, "y":met.y, "z":met.z, "e":met.e}])

newmet = ak.zip(
    {"x": p4met.x, 
     "y": p4met.y,
     "z": tempz,
     "e": p4met.e
    }, with_name="Momentum4D",
)


In [None]:
print(met)
print()
print(newmet)

print()

print(met.x)
print()
print(newmet.x)

print()
print(met.z)
print()
print(newmet.z)


In [None]:
#p4mu,p4fj,p4j,p4met = ak.unzip(ak.cartesian([muons, fatjets, jets, newmet]))
p4mu,p4fj,p4j,p4met = ak.unzip(ak.cartesian([muons, fatjets, jets, met]))

In [None]:
p4tot = p4mu + p4fj + p4j + p4met

In [None]:
plt.hist(ak.flatten(p4tot.mass),bins=50, range=(0,7000));

In [None]:
mydict = {}
mydict['mtt'] = ak.flatten(p4tot.mass) 
mydict['mu_pt'] = ak.flatten(p4mu.pt) 

df = pd.DataFrame.from_dict(mydict)

df

outfilename = f"output_{dataset}_{filename.split('/')[-1].split('.')[0]}.csv"
print(outfilename)

df.to_csv(outfilename, index=False)

In [None]:
!cat output_signal_22BAB5D2-9E3F-E440-AB30-AE6DBFDF6C83.csv

# Sandbox

In [None]:
# Run this if these are not installed
'''
!pip install --upgrade awkward
!pip install --upgrade uproot

!pip install coffea

!pip install --upgrade matplotlib

!pip install vector
'''

In [None]:
import awkward as ak
import uproot

import coffea

from coffea.nanoevents import NanoEventsFactory, NanoAODSchema

import numpy as np

import matplotlib.pylab as plt

import matplotlib

import vector
vector.register_awkward()

import pandas as pd


In [None]:
print(f"{ak.__version__ = }")
print(f"{uproot.__version__ = }")
print(f"{coffea.__version__ = }")
print(f"{np.__version__ = }")
print(f"{matplotlib.__version__ = }")
print(f"{vector.__version__ = }")

In [None]:
# Signal
filename = 'root://eospublic.cern.ch//eos/opendata/cms/mc/RunIISummer20UL16NanoAODv9/ZprimeToTT_M2000_W20_TuneCP2_PSweights_13TeV-madgraph-pythiaMLM-pythia8/NANOAODSIM/106X_mcRun2_asymptotic_v17-v1/270000/22BAB5D2-9E3F-E440-AB30-AE6DBFDF6C83.root'


# TT to semilep
#filename = 'root://eospublic.cern.ch//eos/opendata/cms/mc/RunIISummer20UL16NanoAODv9/TTToSemiLeptonic_TuneCP5_13TeV-powheg-pythia8/NANOAODSIM/106X_mcRun2_asymptotic_v17-v1/120000/08FCB2ED-176B-064B-85AB-37B898773B98.root'

f = uproot.open(filename)

In [None]:
#fname = "https://raw.githubusercontent.com/CoffeaTeam/coffea/master/tests/samples/nano_dy.root"
#events = NanoEventsFactory.from_root(
#    {fname: "Events"},
#    schemaclass=NanoAODSchema,
#).events()

In [None]:
events = f['Events']

#rrays = events.arrays(filter_name=['Muon_*', 'Jet_*'])


#arrays
#events.keys()

In [None]:
def pretty_print(fields, fmt='40s', require=None, ignore=None):
    
    output = ""
    
    for f in fields:
        PASSED = True
        if require is not None:
            if type(require) != list:
                require = [require]
            PASSED = True
            for r in require:
                if f.find(r) < 0:
                    PASSED = False
        
        # Did not find a string and so skip
        if PASSED is False:
            continue
        
        if ignore is not None:
            if f.find(ignore) >= 0:
                continue
        
        if len(output) + len(f) <= 80:
            output += f"{f:{fmt}} "
        else:
            print(output)
            output = f"{f:{fmt}} "
    
    print(output)

In [None]:
#pretty_print(events.keys())

pretty_print(events.keys(), fmt='30s', require='FatJet')
#pretty_print(events.keys(), fmt='40s', require=['Muon', 'Iso'], ignore='HLT')
#pretty_print(events.keys(), fmt='40s', require=['HLT', 'TkMu50'])
#pretty_print(events.keys(), fmt='40s', require='HLT')
#pretty_print(events.keys(), fmt='40s', require='Jet_', ignore='Fat')
#pretty_print(events.keys(), fmt='40s', require='PuppiMET')

In [None]:
cut_trigger = events['HLT_TkMu50'].array()

In [None]:
cut_full_event = cut_trigger

In [None]:
len(cut_full_event)
cut_full_event
ak.num(events['FatJet_eta'].array(), axis=0)

In [None]:
ak.num(events['FatJet_eta'].array()[cut_full_event], axis=0)

In [None]:
#branches = events.arrays()

In [None]:
fatjet_pt = events['FatJet_pt'].array()
cut_temp = fatjet_pt>400

print(len(fatjet_pt))
print(fatjet_pt[0:10])

fatjet_pt = events['FatJet_pt'].array()[cut_full_event][cut_temp[cut_full_event]]
#fatjet_pt = events['FatJet_pt'].array()[cut_temp[cut_full_event]]

print(len(fatjet_pt))
print(fatjet_pt[0:10])


In [None]:
fatjet_mSD = events['FatJet_msoftdrop'].array()#[cut_full_event]
fatjet_eta = events['FatJet_eta'].array()#[cut_full_event]

fatjet_tag = events['FatJet_particleNet_TvsQCD'].array()

fatjet_tau2 = events['FatJet_tau2'].array()#[cut_full_event]
fatjet_tau3 = events['FatJet_tau3'].array()#[cut_full_event]

fatjet_pt = events['FatJet_pt'].array()#[cut_full_event]
fatjet_eta = events['FatJet_eta'].array()#[cut_full_event]
fatjet_phi = events['FatJet_phi'].array()#[cut_full_event]
fatjet_mass = events['FatJet_mass'].array()#[cut_full_event]



In [None]:
muon_pt = events['Muon_pt'].array()#[cut_full_event]
muon_eta = events['Muon_eta'].array()#[cut_full_event]
muon_phi = events['Muon_phi'].array()#[cut_full_event]
muon_mass = events['Muon_mass'].array()#[cut_full_event]

muon_iso = events['Muon_miniIsoId'].array()

muon_tightId = events['Muon_tightId'].array()#[cut_full_event]



In [None]:
jet_btag = events['Jet_btagDeepB'].array()#[cut_full_event]

jet_jetid = events['Jet_jetId'].array()

jet_pt = events['Jet_pt'].array()#[cut_full_event]
jet_eta = events['Jet_eta'].array()#[cut_full_event]
jet_phi = events['Jet_phi'].array()#[cut_full_event]
jet_mass = events['Jet_mass'].array()#[cut_full_event]



In [None]:
jet_mass

In [None]:
met_pt = events['PuppiMET_pt'].array()#[cut_full_event]
met_eta = 0*events['PuppiMET_pt'].array()#[cut_full_event]  # Fix this to be 0
met_phi = events['PuppiMET_phi'].array()#[cut_full_event] 
met_energy = events['PuppiMET_sumEt'].array()#[cut_full_event] 

ht_lep = muon_pt + met_pt

In [None]:
#met_energy

In [None]:
#jet_jetid

In [None]:
# Cuts
tau32 = fatjet_tau3/fatjet_tau2

#cut_fatjet = (tau32>0.67) & (fatjet_eta>-2.4) & (fatjet_eta<2.4) & (fatjet_mSD>105) & (fatjet_mSD<220)
cut_fatjet = (fatjet_pt > 500) & (fatjet_tag > 0.5)

cut_muon = (muon_pt>55) & (muon_eta>-2.4) & (muon_eta<2.4) & \
           (muon_tightId == True) & (muon_iso>1) & (ht_lep>150)

cut_jet = (jet_btag > 0.5) & (jet_jetid>=4)



# Event cut
cut_met = (met_pt > 50)

cut_nmuons = ak.num(cut_muon[cut_muon]) == 1

cut_trigger = (events['HLT_TkMu50'].array())


cut_ntop = ak.num(cut_fatjet[cut_fatjet]) == 1

cut_full_event = cut_trigger & cut_nmuons & cut_met & cut_ntop# & cut_ht

In [None]:
#events['Muon_pt'].array()[cut_met]

In [None]:
#cutn_muons

In [None]:
#cut_trigger = events['HLT_TkMu50'].array()

#met_pt = events['PuppiMET_pt'].array()
#cut_met = met_pt > 50

#cut_full_event = cut_trigger & cut_met

'''
fatjet_pt = events['FatJet_pt'].array()
fatjet_eta = events['FatJet_eta'].array()
fatjet_phi = events['FatJet_phi'].array()
fatjet_mass = events['FatJet_mass'].array()

muon_pt = events['Muon_pt'].array()
muon_eta = events['Muon_eta'].array()
muon_phi = events['Muon_phi'].array()
muon_mass = events['Muon_mass'].array()

jet_pt = events['Jet_pt'].array()
jet_eta = events['Jet_eta'].array()
jet_phi = events['Jet_phi'].array()
jet_mass = events['Jet_mass'].array()

met_pt = events['PuppiMET_pt'].array()
met_eta = 0*events['PuppiMET_pt'].array()  # Fix this to be 0
met_phi = events['PuppiMET_phi'].array() 
met_energy = events['PuppiMET_sumEt'].array() 
'''

In [None]:
#cut_trigger
#cut_met

In [None]:
fatjets = ak.zip(
    {"pt": fatjet_pt[cut_full_event][cut_fatjet[cut_full_event]], 
     "eta": fatjet_eta[cut_full_event][cut_fatjet[cut_full_event]], 
     "phi": fatjet_phi[cut_full_event][cut_fatjet[cut_full_event]], 
     "mass": fatjet_mass[cut_full_event][cut_fatjet[cut_full_event]]},
    with_name="Momentum4D",
)

muons = ak.zip(
    {"pt": muon_pt[cut_full_event][cut_muon[cut_full_event]], 
     "eta": muon_eta[cut_full_event][cut_muon[cut_full_event]], 
     "phi": muon_phi[cut_full_event][cut_muon[cut_full_event]], 
     "mass": muon_mass[cut_full_event][cut_muon[cut_full_event]]},
    with_name="Momentum4D",
)

jets = ak.zip(
    {"pt": jet_pt[cut_full_event][cut_jet[cut_full_event]], 
     "eta": jet_eta[cut_full_event][cut_jet[cut_full_event]], 
     "phi": jet_phi[cut_full_event][cut_jet[cut_full_event]], 
     "mass": jet_mass[cut_full_event][cut_jet[cut_full_event]]},
    with_name="Momentum4D",
)

met = ak.zip(
    {"pt": met_pt[cut_full_event], 
     "eta": met_eta[cut_full_event], 
     "phi": met_phi[cut_full_event], 
     "e": met_energy[cut_full_event]},
    with_name="Momentum4D",
)

In [None]:
#cut_fatjet

In [None]:
#len(cut_met[cut_met])

In [None]:
#cut_met

In [None]:
#cut_met & cut_fatjet

In [None]:
#fatjets

In [None]:
#muons

In [None]:
#jets

In [None]:
#met

In [None]:
#p4mu = ak.unzip(ak.combinations(muons,1))
#p4fj = ak.unzip(ak.combinations(fatjets,1))

#p4mu1,p4mu2 = ak.unzip(ak.combinations(muons,2))
#p4fj1,p4fj2 = ak.unzip(ak.combinations(fatjets,2))

p4mu,p4fj,p4j,p4met = ak.unzip(ak.cartesian([muons, fatjets, jets, met]))

# Because these are only 1 we need at a time, we handle them differently
#p4mu = ak.unzip(ak.zip((muons,)))
#p4fj = ak.unzip(ak.zip((fatjets,)))

In [None]:
#p4j
#p4met

In [None]:
p4tot = p4mu + p4fj + p4j + p4met
#p4tot = p4mu1 + p4mu2

#p4tot = ak.cartesian([p4mu,p4fj])
#p4tot = ak.cartesian([muons, fatjets])

#m = ak.unzip(p4tot).mass
#x = ak.unzip(p4tot)

#plt.hist(ak.unflatten(m),bins=50);

In [None]:
n = 5

print('mu')
print(p4mu[n].pt, p4mu[n].x, p4mu[n].y, p4mu[n].z, p4mu[n].e)
print('fatjet')
print(p4fj[n].pt, p4fj[n].x, p4fj[n].y, p4fj[n].z, p4fj[n].e)
print('jet')
print(p4j[n].pt, p4j[n].x, p4j[n].y, p4j[n].z, p4j[n].e)
print('met')
print(p4met[n].pt, p4met[n].x, p4met[n].y, p4met[n].z, p4met[n].e)
print('tot')
print(p4tot[n].pt, p4tot[n].x, p4tot[n].y, p4tot[n].z, p4tot[n].e, p4tot[n].m)

In [None]:
n0 = 0
n1 = 10

print(p4mu[n0:n1])
print(p4fj[n0:n1])
print(p4j[n0:n1])
print(p4met[n0:n1])
print(p4tot[n0:n1])

In [None]:
#p4fj[0]

In [None]:
#muons#[0][0]

In [None]:
p4tot

In [None]:
#p4tot.mass

In [None]:
plt.hist(ak.flatten(p4tot.mass),bins=50, range=(0,7000));

In [None]:
mydict = {}
mydict['mtt'] = ak.flatten(p4tot.mass) 
mydict['mu_pt'] = ak.flatten(p4mu.pt) 

df = pd.DataFrame.from_dict(mydict)


df

In [None]:
cut_event_level = events['PuppiMET_pt'].array() > 50

muon_pt = events['Muon_pt'].array()

cut_muon = muon_pt > 35

selected_muons = muon_pt[cut_event_level][cut_muon[cut_event_level]]
