In [1]:
import matplotlib.pyplot as plt
import uproot
import numpy as np
import pandas as pd

from pathlib import Path
from sklearn.model_selection import train_test_split

from datetime import datetime
import sys
sys.path.insert(1, '/afs/desy.de/user/a/axelheim/private/MC_studies/Dstlnu_Bt_generic/util_funcs/')
from pandas_colFuncs import whichBisSig_NAHS, B_ID

In [2]:
save_data = True
save_preprocessedDataframe = True
tmp_data = False
take_subset = False
subset_size = 100000

delete_top_numFSP_evts=True

In [3]:
nfs_path = "/nfs/dust/belle2/user/axelheim/MC_studies/Dstlnu_Bt_generic/"

data_subdir = "FULLCMrun_Dstlnu_SHR_BsX_Ablations/"
root_subdir = "SHR_CMvars_run1/"   # "SHR_LeasVars_run2/"

root_path = nfs_path + "SHR_Hc_correctReco_BsX/" + root_subdir # + sub + "/"
root_path = "/afs/desy.de/user/a/axelheim/private/MC_studies/Dstlnu_Bt_generic/NAHS/onlineDataProd/"

In [4]:
merged = "merged_"
if tmp_data:
    merged += "tmp_"
merged = ""

In [5]:
fileHc = uproot.open(root_path + merged + "allHc.root:variables")

In [26]:
df_Hc = fileHc.arrays(library="pd")

In [7]:
names = ["gammas","electrons","pions","kaons","muons"]
dfs = []
for name in names:
    filename=root_path + merged + "{}.root:variables".format(name)
    print(filename)
    tmpFileFSPs = uproot.open(filename)
    df_tmp = tmpFileFSPs.arrays(library="pd")
    dfs.append(df_tmp)

/afs/desy.de/user/a/axelheim/private/MC_studies/Dstlnu_Bt_generic/NAHS/onlineDataProd/gammas.root:variables
/afs/desy.de/user/a/axelheim/private/MC_studies/Dstlnu_Bt_generic/NAHS/onlineDataProd/electrons.root:variables
/afs/desy.de/user/a/axelheim/private/MC_studies/Dstlnu_Bt_generic/NAHS/onlineDataProd/pions.root:variables
/afs/desy.de/user/a/axelheim/private/MC_studies/Dstlnu_Bt_generic/NAHS/onlineDataProd/kaons.root:variables
/afs/desy.de/user/a/axelheim/private/MC_studies/Dstlnu_Bt_generic/NAHS/onlineDataProd/muons.root:variables


In [8]:
df_FSPs = pd.concat(dfs)

In [9]:
# delete FSPs for which no Y4S file entry was found
df_FSPs = df_FSPs[df_FSPs['__event__'].isin(df_Hc["__event__"])]

## check that each particles occurs only ones

In [10]:
groupsFSPs_uniqParID = pd.DataFrame({'count' : df_FSPs.groupby( ["__event__","uniqueParticleIdentifier"] ).size()}).reset_index()
groupsFSPs_uniqParID.value_counts('count')

count
1    2991
dtype: int64

## add cols

In [27]:
df_Hc['Bsig_uniqParID'] = df_Hc.apply(whichBisSig_NAHS, axis=1)

In [12]:
df_Hc['Bsig_uniqParID'].value_counts()

83886082.0    95
83886081.0    83
Name: Bsig_uniqParID, dtype: int64

In [13]:
# function to create col with the particles mother B's uniqueParticleIdentifier
df_FSPs['B_ID'] = df_FSPs.apply(B_ID, axis=1)

In [14]:
df_FSPs['B_ID'].value_counts()

83886081    1349
83886082    1255
0            387
Name: B_ID, dtype: int64

## check D*lnu

In [30]:
groupsHc = pd.DataFrame({'count' : df_Hc[df_Hc["Bsig_uniqParID"] == 83886082.0].groupby( 
    ["genUp4S_PDG_1_0","genUp4S_PDG_1_1","genUp4S_PDG_1_2"] ).size()}).reset_index()

groupsHc

Unnamed: 0,genUp4S_PDG_1_0,genUp4S_PDG_1_1,genUp4S_PDG_1_2,count
0,-413.0,-13.0,14.0,22
1,-413.0,-11.0,12.0,27
2,413.0,11.0,-12.0,22
3,413.0,13.0,-14.0,24


In [33]:
groupsHc = pd.DataFrame({'count' : df_Hc[df_Hc["Bsig_uniqParID"] == 83886081.0].groupby( 
    ["genUp4S_PDG_0_0","genUp4S_PDG_0_1","genUp4S_PDG_0_2"] ).size()}).reset_index()

groupsHc

Unnamed: 0,genUp4S_PDG_0_0,genUp4S_PDG_0_1,genUp4S_PDG_0_2,count
0,-413.0,-13.0,14.0,21
1,-413.0,-11.0,12.0,18
2,413.0,11.0,-12.0,24
3,413.0,13.0,-14.0,20


## delete evts where Hc mother and Bsig overlap

In [23]:
print(df_Hc.shape[0])

178


In [34]:
df_Hc[df_Hc['Bsig_uniqParID'] == df_Hc['mcMother0_uniqParID']][["__event__","uniqueParticleIdentifier","PDG",
    "genMothPDG_0",'Bsig_uniqParID',"mcMother0_uniqParID"]][:10]

Unnamed: 0,__event__,uniqueParticleIdentifier,PDG,genMothPDG_0,Bsig_uniqParID,mcMother0_uniqParID
51,2331013,100663296.0,-413.0,511.0,83886081.0,83886081.0
52,2331096,100663296.0,-413.0,511.0,83886082.0,83886082.0
53,2331142,100663296.0,413.0,-511.0,83886081.0,83886081.0
54,2331249,100663296.0,-413.0,511.0,83886082.0,83886082.0
55,2331349,100663296.0,413.0,-511.0,83886082.0,83886082.0
56,2331398,100663296.0,413.0,-511.0,83886081.0,83886081.0
57,2331398,100663296.0,413.0,-511.0,83886081.0,83886081.0
61,2331499,100663296.0,-413.0,511.0,83886082.0,83886082.0
62,2331581,100663296.0,413.0,-511.0,83886082.0,83886082.0
63,2331619,100663296.0,413.0,-511.0,83886081.0,83886081.0


In [35]:
df_Hc = df_Hc[df_Hc['Bsig_uniqParID'] != df_Hc['mcMother0_uniqParID']]

In [36]:
print(df_Hc.shape[0])

97


In [37]:
#[["__event__","uniqueParticleIdentifier","PDG","genMothPDG_0",'Bsig_uniqParID',"mcMother0_uniqParID"]]

## take care of duplis in df_Hc, delete them, because with the prior, duplis here mean there must have been an error

In [63]:
groupsHc_uniqParID = pd.DataFrame({'count' : df_Hc.groupby( ["__event__"] ).size()}).reset_index()
groupsHc_uniqParID.value_counts('count')

count
1    95
2     1
dtype: int64

In [46]:
df_Hc[df_Hc['__event__'].isin(groupsHc_uniqParID[groupsHc_uniqParID["count"]>1]["__event__"])][["__event__",
    "uniqueParticleIdentifier","PDG",
    "genMothPDG_0",'Bsig_uniqParID',"mcMother0_uniqParID","px","E","x","genUp4S_PDG_1_0"]][:10]

Unnamed: 0,__event__,uniqueParticleIdentifier,PDG,genMothPDG_0,Bsig_uniqParID,mcMother0_uniqParID,px,E,x,genUp4S_PDG_1_0
20,2332070,100663296.0,-411.0,-511.0,83886081.0,83886082.0,-0.257812,2.284948,-0.051214,411.0
21,2332070,100663296.0,411.0,-511.0,83886081.0,83886082.0,0.036926,2.162483,-0.044568,411.0


In [73]:
df_Hc = df_Hc[df_Hc['__event__'].isin(groupsHc_uniqParID[groupsHc_uniqParID["count"]==1]["__event__"])]

In [74]:
df_FSPs = df_FSPs[df_FSPs['__event__'].isin(df_Hc["__event__"])]

In [75]:
groupsHc_uniqParID = pd.DataFrame({'count' : df_Hc.groupby( ["__event__"] ).size()}).reset_index()
groupsHc_uniqParID.value_counts('count')

count
1    95
dtype: int64

##  

##  

##  

##  