In [1]:
import matplotlib.pyplot as plt
import uproot
import numpy as np
import pandas as pd

In [4]:
nfs_path = "/nfs/dust/belle2/user/axelheim/MC_studies/Dstlnu_Bt_generic/Dstlnu_skim/axheim_DstlnuSKIM_run1/"

In [5]:
fileY4S = uproot.open(nfs_path + "merged_DXtagDstl.root:variables")

In [6]:
df_Y4S = fileY4S.arrays(library="pd")

In [7]:
fileY4S.keys()

['__experiment__',
 '__run__',
 '__event__',
 '__candidate__',
 '__ncandidates__',
 '__weight__',
 'm2RecoilSignalSide',
 'foxWolframR2_maskedNaN',
 'foxWolframR2',
 'extraInfo__boFEIProbabilityRank__bc',
 'nTracks',
 'Up4S_isSig',
 'BeamE',
 'BeamPx',
 'BeamPy',
 'BeamPz',
 'BeamcmsE',
 'BeamcmsPx',
 'BeamcmsPy',
 'BeamcmsPz',
 'genUp4S_PDG_0',
 'genUp4S_charge_0',
 'genUp4S_mdstIndex_0',
 'genUp4S_genParticleID_0',
 'genUp4S_E_0',
 'genUp4S_Px_0',
 'genUp4S_Py_0',
 'genUp4S_Pz_0',
 'genUp4S_P_0',
 'genUp4S_cmE_0',
 'genUp4S_cmPx_0',
 'genUp4S_cmPy_0',
 'genUp4S_cmPz_0',
 'genUp4S_cmP_0',
 'genUp4S_uniqParID_0',
 'genUp4S_PDG_0_0',
 'genUp4S_mdstIndex_0_0',
 'genUp4S_genParticleID_0_0',
 'genUp4S_uniqParID_0_0',
 'genUp4S_PDG_0_0_0',
 'genUp4S_mdstIndex_0_0_0',
 'genUp4S_genParticleID_0_0_0',
 'genUp4S_uniqParID_0_0_0',
 'genUp4S_PDG_0_0_1',
 'genUp4S_mdstIndex_0_0_1',
 'genUp4S_genParticleID_0_0_1',
 'genUp4S_uniqParID_0_0_1',
 'genUp4S_PDG_0_1',
 'genUp4S_mdstIndex_0_1',
 'genUp4S_g

In [8]:
names = ["gammas","electrons","pions","kaons","muons"]
dfs = []
for name in names:
    filename=nfs_path + "merged_{}.root:variables".format(name)
    print(filename)
    tmpFileFSPs = uproot.open(filename)
    df_tmp = tmpFileFSPs.arrays(library="pd")
    dfs.append(df_tmp)

/nfs/dust/belle2/user/axelheim/MC_studies/Dstlnu_Bt_generic/Dstlnu_skim/axheim_DstlnuSKIM_run1/merged_gammas.root:variables
/nfs/dust/belle2/user/axelheim/MC_studies/Dstlnu_Bt_generic/Dstlnu_skim/axheim_DstlnuSKIM_run1/merged_electrons.root:variables
/nfs/dust/belle2/user/axelheim/MC_studies/Dstlnu_Bt_generic/Dstlnu_skim/axheim_DstlnuSKIM_run1/merged_pions.root:variables
/nfs/dust/belle2/user/axelheim/MC_studies/Dstlnu_Bt_generic/Dstlnu_skim/axheim_DstlnuSKIM_run1/merged_kaons.root:variables
/nfs/dust/belle2/user/axelheim/MC_studies/Dstlnu_Bt_generic/Dstlnu_skim/axheim_DstlnuSKIM_run1/merged_muons.root:variables


In [9]:
df_FSPs = pd.concat(dfs)

In [10]:
print(df_FSPs.shape[0])

12556867


### delete particles which occur more than ones based on uniqueParticleIdentifier

In [11]:
df_FSPs_noDuplis = df_FSPs.sort_values("basf2_used",ascending=False).drop_duplicates(subset=("__event__","uniqueParticleIdentifier"), keep='first')

In [12]:
print(df_FSPs_noDuplis.shape[0])

8594615


## save and load df after duplication of FSPs removal

In [13]:
df_FSPs_noDuplis.to_csv(nfs_path + "skimFSPs_df.csv")
df_Y4S.to_csv(nfs_path + "skimY4S_df.csv")

In [14]:
#df_FSPs_noDuplis = pd.read_csv(nfs_path + "skimFSPs_df.csv")
#df_Y4S = pd.read_csv(nfs_path + "skimY4S_df.csv")

## check for number of events

In [21]:
df_Y4S["__event__"].nunique()

11771

In [22]:
df_FSPs_noDuplis["__event__"].nunique()

465660

In [23]:
df_FSPs_noDuplis["__event__"].value_counts()

1485463    42
3507296    41
7748014    41
6026888    41
7171164    41
           ..
3087076     4
340336      4
5840747     4
7372468     4
7169083     4
Name: __event__, Length: 465660, dtype: int64

In [24]:
take_subset=False
if take_subset:
    df_sample = df_FSPs_noDuplis.sample(n=10000)
    df_FSPs=df_sample
else:
    df_FSPs=df_FSPs_noDuplis

NameError: name 'take_subset' is not defined

In [26]:
def isBtoDstlnu(s):
    isDecay = 0
    
    if ((np.abs(s["lep_mcPDG"]) == 13.0) or (np.abs(s["lep_mcPDG"]) == 11.0)): # lepton is electron or muon
        if (np.abs(s["lep_motherPDG"]) == 511.0): # leptons mother is B
            if (np.abs(s["dau1_dau0_mcPDG"]) == 413.0): # daughter of B is D*+ 
                isDecay = 1
            
    return isDecay
df_Y4S['isBtoDstlnu'] = df_Y4S.apply(isBtoDstlnu, axis=1)

In [28]:
df_Y4S['isBtoDstlnu'].value_counts()

0    6110
1    5661
Name: isBtoDstlnu, dtype: int64

 300553.0    3553
 413.0       2894
-413.0       2855
 511.0        178
-511.0        175
-431.0          1
-20413.0        1
 443.0          1
 423.0          1
 20433.0        1
Name: dau1_dau0_mcPDG, dtype: int64

In [42]:
df_FSPs_noDuplis[df_FSPs_noDuplis["__event__"] == 3507296][['mcPDG','mcMother0_uniqParID', 'genMothPDG_0', 'genMotherID_0',
'mcMother1_uniqParID', 'genMothPDG_1', 'genMotherID_1',
'mcMother2_uniqParID', 'genMothPDG_2', 'genMotherID_2',
'mcMother3_uniqParID', 'genMothPDG_3', 'genMotherID_3',
'mcMother4_uniqParID', 'genMothPDG_4', 'genMotherID_4',
'mcMother5_uniqParID', 'genMothPDG_5', 'genMotherID_5',
'mcMother6_uniqParID', 'genMothPDG_6', 'genMotherID_6',
'mcMother7_uniqParID', 'genMothPDG_7', 'genMotherID_7',
'mcMother8_uniqParID', 'genMothPDG_8', 'genMotherID_8',
'mcMother9_uniqParID', 'genMothPDG_9', 'genMotherID_9']
]

Unnamed: 0,mcPDG,mcMother0_uniqParID,genMothPDG_0,genMotherID_0,mcMother1_uniqParID,genMothPDG_1,genMotherID_1,mcMother2_uniqParID,genMothPDG_2,genMotherID_2,...,genMotherID_6,mcMother7_uniqParID,genMothPDG_7,genMotherID_7,mcMother8_uniqParID,genMothPDG_8,genMotherID_8,mcMother9_uniqParID,genMothPDG_9,genMotherID_9
1062050,321.0,83886095.0,411.0,15.0,83886083.0,413.0,3.0,83886081.0,-511.0,1.0,...,0.0,,0.0,0.0,,0.0,0.0,,0.0,0.0
1062051,-11.0,83886140.0,22.0,60.0,83886118.0,111.0,38.0,83886104.0,310.0,24.0,...,0.0,,0.0,0.0,,0.0,0.0,,0.0,0.0
1062052,211.0,83886095.0,411.0,15.0,83886083.0,413.0,3.0,83886081.0,-511.0,1.0,...,0.0,,0.0,0.0,,0.0,0.0,,0.0,0.0
1062053,211.0,83886101.0,310.0,21.0,83886086.0,311.0,6.0,83886081.0,-511.0,1.0,...,0.0,,0.0,0.0,,0.0,0.0,,0.0,0.0
1062054,321.0,83886106.0,-421.0,26.0,83886090.0,-413.0,10.0,83886082.0,511.0,2.0,...,0.0,,0.0,0.0,,0.0,0.0,,0.0,0.0
1062055,-13.0,83886082.0,511.0,2.0,83886080.0,300553.0,0.0,,0.0,0.0,...,0.0,,0.0,0.0,,0.0,0.0,,0.0,0.0
1062056,-321.0,83886081.0,-511.0,1.0,83886080.0,300553.0,0.0,,0.0,0.0,...,0.0,,0.0,0.0,,0.0,0.0,,0.0,0.0
1062057,-321.0,83886095.0,411.0,15.0,83886083.0,413.0,3.0,83886081.0,-511.0,1.0,...,0.0,,0.0,0.0,,0.0,0.0,,0.0,0.0
1062058,-211.0,83886101.0,310.0,21.0,83886086.0,311.0,6.0,83886081.0,-511.0,1.0,...,0.0,,0.0,0.0,,0.0,0.0,,0.0,0.0
1062059,-211.0,83886106.0,-421.0,26.0,83886090.0,-413.0,10.0,83886082.0,511.0,2.0,...,0.0,,0.0,0.0,,0.0,0.0,,0.0,0.0


In [33]:
df_FSPs_noDuplis.keys()

Index(['__experiment__', '__run__', '__event__', '__candidate__',
       '__ncandidates__', '__weight__', 'basf2_X', 'basf2_used', 'basf2_Bsig',
       'isSignal', 'uniqueParticleIdentifier', 'mcErrors', 'mcPDG',
       'genMotherID', 'genMotherP', 'genMotherPDG', 'px', 'py', 'pz', 'pt',
       'p', 'E', 'kaonID', 'pionID', 'genMothPDG_0', 'genMothPDG_1',
       'genMothPDG_2', 'genMothPDG_3', 'genMothPDG_4', 'genMothPDG_5',
       'genMothPDG_6', 'genMothPDG_7', 'genMothPDG_8', 'genMothPDG_9',
       'genMotherID_0', 'genMotherID_1', 'genMotherID_2', 'genMotherID_3',
       'genMotherID_4', 'genMotherID_5', 'genMotherID_6', 'genMotherID_7',
       'genMotherID_8', 'genMotherID_9', 'mcMother0_uniqParID',
       'mcMother1_uniqParID', 'mcMother2_uniqParID', 'mcMother3_uniqParID',
       'mcMother4_uniqParID', 'mcMother5_uniqParID', 'mcMother6_uniqParID',
       'mcMother7_uniqParID', 'mcMother8_uniqParID', 'mcMother9_uniqParID',
       'PDG'],
      dtype='object')

In [38]:
for i in range(10):
    print("'mcMother{}_uniqParID',".format(i),"'genMothPDG_{}',".format(i),"'genMotherID_{}',".format(i))

    

'mcMother0_uniqParID', 'genMothPDG_0', 'genMotherID_0',
'mcMother1_uniqParID', 'genMothPDG_1', 'genMotherID_1',
'mcMother2_uniqParID', 'genMothPDG_2', 'genMotherID_2',
'mcMother3_uniqParID', 'genMothPDG_3', 'genMotherID_3',
'mcMother4_uniqParID', 'genMothPDG_4', 'genMotherID_4',
'mcMother5_uniqParID', 'genMothPDG_5', 'genMotherID_5',
'mcMother6_uniqParID', 'genMothPDG_6', 'genMotherID_6',
'mcMother7_uniqParID', 'genMothPDG_7', 'genMotherID_7',
'mcMother8_uniqParID', 'genMothPDG_8', 'genMotherID_8',
'mcMother9_uniqParID', 'genMothPDG_9', 'genMotherID_9',
