In [54]:
import matplotlib.pyplot as plt
import uproot
import numpy as np
import pandas as pd

from pathlib import Path
from sklearn.model_selection import train_test_split


events_num_identifier = "_15000_events"

subdir = "wSim_wReco"
save_data = True
path = "/nfs/dust/belle2/user/axelheim/MC_studies/my6modes/"
root_path = path + "rootfiles/" + subdir + '/'
file_pi = uproot.open((root_path + "pi_nTuples_mode1_10_events.root"))

#file_raw = uproot.open(root_path + "mode0_17_events.root")

In [55]:
file_pi["variables"].keys()


['__experiment__',
 '__run__',
 '__event__',
 '__candidate__',
 '__ncandidates__',
 '__weight__',
 'M',
 'x',
 'y',
 'z',
 'px',
 'py',
 'pz',
 'pt',
 'p',
 'E',
 'kaonID',
 'pionID',
 'isSignal',
 'mcErrors',
 'mcPDG',
 'mcPhotos',
 'mcPrimary',
 'mcInitial',
 'charge',
 'uniqueParticleIdentifier',
 'genMotherID',
 'genMotherPDG',
 'genMotherPDG__bo0__bc',
 'genMotherPDG__bo1__bc',
 'genMotherPDG__bo2__bc',
 'genMotherPDG__bo3__bc']

In [56]:
def conditions(s):
    label = -1
    if int(s['genPDG0']) == 0:
        label = 0 # background, cause not related to MC Particles
    elif (int(s['genPDG0']) == Bs_pdg) or (int(s['genPDG1']) == Bs_pdg) or (int(s['genPDG2']) == Bs_pdg) or (int(s['genPDG3']) == Bs_pdg):
        label = 2 # Bs
    elif (int(s['genPDG0']) == Hc_pdg) or (int(s['genPDG1']) == Hc_pdg) or (int(s['genPDG2']) == Hc_pdg) or (int(s['genPDG3']) == Hc_pdg):    
        label = 3 # Hc
    else: 
        label = 1 # X
    
    return label

In [None]:
extraInput_list = [["charge"],["pionID","kaonID"],["dx","dy","dz"],["z0"],["tanlambda"],
                   ["nCDCHits"],["trackNECLClusters"]]

Bs_pdg_list = [-511, 511,-521,-521,521,521]
Hc_pdg_list = [-411, 411, -411, -421, 411, 423]
dfs =[]
for mode in range(6):
    print("mode:",mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode)
    
    Bs_pdg = Bs_pdg_list[mode]
    Hc_pdg = Hc_pdg_list[mode]
    
    nm = f"_nTuples_mode{mode}" + events_num_identifier + ".root"
    print((root_path + "gamma" + nm))
    raw_nTuples = uproot.concatenate([(root_path + "gamma" + nm), (root_path + "pi" + nm), (root_path + "K" + nm)])
    df = pd.DataFrame({"event" : np.array(raw_nTuples["__event__"]),
       "mcPDG" : np.array(raw_nTuples["mcPDG"]),
       "mcPrimary":np.array(raw_nTuples["mcPrimary"]),
       "isSignal":np.array(raw_nTuples["isSignal"]),
       #"mcInitial":np.array(raw_nTuples["mcInitial"]),
       "px" :  np.array(raw_nTuples["px"]),
       "py" : np.array(raw_nTuples["py"]),
       "pz" : np.array(raw_nTuples["pz"]),
       "E" : np.array(raw_nTuples["E"]),
                   
       "dx" : np.array(raw_nTuples["dx"]),
       "dy" : np.array(raw_nTuples["dy"]),
       "dz" : np.array(raw_nTuples["dz"]),
       "prodVertexX" : np.array(raw_nTuples["prodVertexX"]),
       "prodVertexY" : np.array(raw_nTuples["prodVertexY"]),
       "prodVertexZ" : np.array(raw_nTuples["prodVertexZ"]),
       "z0" : np.array(raw_nTuples["z0"]),
       "d0" : np.array(raw_nTuples["d0"]),
       "tanlambda" : np.array(raw_nTuples["tanlambda"]),
       "nCDCHits" : np.array(raw_nTuples["nCDCHits"]),
       "trackNECLClusters" : np.array(raw_nTuples["trackNECLClusters"]),

       "pionID":np.array(raw_nTuples["pionID"]),   
       "kaonID": np.array(raw_nTuples["kaonID"]),
       "M" : np.array(raw_nTuples["M"]),
       "charge" : np.array(raw_nTuples["charge"]),
       #"fromY4S" : hasAncestor_300553,
       "genPDG0" : np.array(raw_nTuples["genMotherPDG__bo0__bc"]),
       "genPDG1" : np.array(raw_nTuples["genMotherPDG__bo1__bc"]),
       "genPDG2" : np.array(raw_nTuples["genMotherPDG__bo2__bc"]),
       "genPDG3" :  np.array(raw_nTuples["genMotherPDG__bo3__bc"]) })
    df['label'] = df.apply(conditions, axis=1)
    

    ## data preprocessing, dealing with NaN
    imputelist = [["pionID",-1.],["kaonID",-1.],["z0",10.],["tanlambda",10.],["nCDCHits",-1.],
                  ["trackNECLClusters",-1.]]
    for impute in imputelist:
        column_name = impute[0]
        impute_val = impute[1]
        mask = df[column_name].isna() == 1
        df.loc[mask, column_name] = impute_val

    
    #print(df)
    
    dfs.append(df)
    print('\n')
    
    # labels: bg=0 , X=1 , Bs=2 , Hc=3
    label_cut_name_list = [["BsHc",1],["XHc",2],["BsX",3],["BsXHc",-10]]
    for label_cut_names in label_cut_name_list:
        label_cut = label_cut_names[1]
        label_cut_name = label_cut_names[0]
        print(label_cut,label_cut_name,label_cut,label_cut_name,label_cut,label_cut_name)
        #print(df.describe())

        df_cut = df.copy()
        if label_cut_name != "BsXHc":
            df_cut = df_cut[df_cut.label != label_cut]
            
            #important to fix labels (no gap in label classes allowed for NN)
            # only first two cases need relabeling, for third BsX deletion is sufficient
            if label_cut<3: 
                mask = df_cut.label == 3
                column_name = 'label'
                df_cut.loc[mask, column_name] = label_cut
                #df_cut[df_cut.label == 2] = label_cut

       # print(df_cut.describe())
        
        # read out data to save it
        #event_tmp = df_cut.copy()        
        #event_tmp = event_tmp[event_tmp.event == 1]
        #print(event_tmp.describe())

        numFSPs_df = pd.DataFrame({'count' : df_cut.groupby( [ "event"] ).size()}).reset_index()
        minFSPs = numFSPs_df["count"].min()
        maxFSPs = numFSPs_df["count"].max()
        print("minFSPs:",minFSPs)
        print("maxFSPs:",maxFSPs,'\n')
        
        df_cut['numFSPs'] = df_cut.groupby('event')['event'].transform('count')
        
        
        #TODO!!
        for num_FSPs_toData in range(minFSPs, maxFSPs+1):
            #print("num_FSPs_toData:",num_FSPs_toData)

            df_num_subset = df_cut.copy()
            df_num_subset = df_num_subset[df_num_subset['numFSPs'] == num_FSPs_toData]
        

            numEvents = df_num_subset.event.nunique()
            print("numEvents:",numEvents)
            print("num_FSPs_toData:",num_FSPs_toData)  
            if numEvents == 0:
                print("skipped because empty \n")
                continue
            
            if numEvents < 10:
                print("skipped because <10 events \n")
                continue

            for extraInput in extraInput_list:
                #extraInput_names = extraInput[0]
                num_features = 4 + len(extraInput) 
                
                special_dataLabel = str(extraInput)
                
                leaves = np.zeros((numEvents, num_FSPs_toData,  num_features))  
                SA_target =  np.zeros((numEvents, num_FSPs_toData))
                global_tag = np.chararray((numEvents, num_FSPs_toData + 1), itemsize=30)

                event_list = df_num_subset[df_num_subset["numFSPs"] == num_FSPs_toData]["event"].unique()
                #print("len(event_list):",len(event_list))
                for i in range(numEvents):

                    event_iter = event_list[i]

                    global_tag_masterInfo = str(mode) + "_evt" + str(event_iter)
                    global_tag[i,-1] = global_tag_masterInfo
                    #print("global_tag[i,-1]:",global_tag[i,-1])
                    #print("i:",i,"event_iter:",event_iter)
                    #event_df = df_cut.copy()
                    #event_df = event_df[event_df.event == event_iter]
                    event_df = df_num_subset[df_num_subset.event == event_iter]

                    for j in range(num_FSPs_toData):
                        #print("numParticle:",j)
                        particle = event_df.iloc[j]

                        #print(particle["mcPDG"],particle["px"],particle["py"],particle["pz"],particle["E"])
                        leaves[i,j,0] = particle["px"]
                        leaves[i,j,1] = particle["py"]
                        leaves[i,j,2] = particle["pz"]
                        leaves[i,j,3] = particle["E"]
                        for l in range(4, 4 + len(extraInput)):
                            leaves[i,j,l] = particle[extraInput[l-4]]
                            

                        global_tag[i,j] = particle["mcPDG"]
                        #global_tag[i,j] += "_"
                        global_tag[i,j] += (particle["mcPrimary"])
                        #global_tag[i,j] += "_"
                        global_tag[i,j] += (particle["isSignal"])
                        SA_target[i,j] = int(particle["label"])

                    del event_df

                for idx in np.arange(leaves.shape[0]):   # arange is like range but gives ndarray instead of list
                    perms = np.random.permutation(leaves.shape[1])

                    leaves[idx,:] = leaves[idx,perms]
                    SA_target[idx,:] = SA_target[idx,perms]
                    global_tag[idx,0:-1] = global_tag[idx,perms]

                data_subdir = subdir + "_" + label_cut_name + events_num_identifier + special_dataLabel + "/"        
                data_dir = Path("/nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/" + data_subdir)    
                data_dir.mkdir(parents=True, exist_ok=True)


                train_ratio = 0.75
                validation_ratio = 0.15
                test_ratio = 0.10

                #print("leaves.shape:",leaves.shape)
                #print("SA_target.shape:",leaves.shape)
                #print("global_tag.shape:",leaves.shape)

                x=leaves
                y=SA_target
                z=global_tag

                x_train, x_test, y_train, y_test, z_train, z_test = train_test_split(x, y, z, test_size=1 - train_ratio, shuffle=False)
                x_val, x_test, y_val, y_test, z_val, z_test = train_test_split(x_test, y_test, z_test, test_size=test_ratio/(test_ratio + validation_ratio), shuffle=False) 

                if save_data==True:
                    np.save(data_dir / "leaves_train_{}_FSP{}.npy".format(mode,num_FSPs_toData), x_train)
                    np.save(data_dir / "is_left_arr_train_{}_FSP{}.npy".format(mode,num_FSPs_toData), y_train)
                    np.save(data_dir / "global_tag_train_{}_FSP{}.npy".format(mode,num_FSPs_toData), z_train)

                    np.save(data_dir / "leaves_val_{}_FSP{}.npy".format(mode,num_FSPs_toData), x_val)
                    np.save(data_dir / "is_left_arr_val_{}_FSP{}.npy".format(mode,num_FSPs_toData), y_val)
                    np.save(data_dir / "global_tag_val_{}_FSP{}.npy".format(mode,num_FSPs_toData), z_val)

                    np.save(data_dir / "leaves_test_{}_FSP{}.npy".format(mode,num_FSPs_toData), x_test)
                    np.save(data_dir / "is_left_arr_test_{}_FSP{}.npy".format(mode,num_FSPs_toData), y_test)
                    np.save(data_dir / "global_tag_test_{}_FSP{}.npy".format(mode,num_FSPs_toData), z_test)

                print("Data saved to:", data_dir,'is', save_data ,'\n')
                print("")
                #del df_num_subset

        
        del df_cut
        
        

mode: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
/nfs/dust/belle2/user/axelheim/MC_studies/my6modes/rootfiles/wSim_wReco/gamma_nTuples_mode0_15000_events.root


1 BsHc 1 BsHc 1 BsHc
minFSPs: 1
maxFSPs: 19 

numEvents: 1
num_FSPs_toData: 1
skipped because <10 events 

numEvents: 29
num_FSPs_toData: 2
Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsHc_15000_events['charge'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsHc_15000_events['pionID', 'kaonID'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsHc_15000_events['dx', 'dy', 'dz'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsHc_15000_events['z0'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsHc_15000_events['tanlambda'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wR

Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsHc_15000_events['nCDCHits'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsHc_15000_events['trackNECLClusters'] is True 


numEvents: 1060
num_FSPs_toData: 11
Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsHc_15000_events['charge'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsHc_15000_events['pionID', 'kaonID'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsHc_15000_events['dx', 'dy', 'dz'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsHc_15000_events['z0'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsHc_15000_events['tanlambda'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsHc_

Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_XHc_15000_events['tanlambda'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_XHc_15000_events['nCDCHits'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_XHc_15000_events['trackNECLClusters'] is True 


numEvents: 1153
num_FSPs_toData: 4
Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_XHc_15000_events['charge'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_XHc_15000_events['pionID', 'kaonID'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_XHc_15000_events['dx', 'dy', 'dz'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_XHc_15000_events['z0'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_XHc_15000_eve

Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_XHc_15000_events['nCDCHits'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_XHc_15000_events['trackNECLClusters'] is True 


numEvents: 46
num_FSPs_toData: 13
Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_XHc_15000_events['charge'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_XHc_15000_events['pionID', 'kaonID'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_XHc_15000_events['dx', 'dy', 'dz'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_XHc_15000_events['z0'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_XHc_15000_events['tanlambda'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_XHc_15000_even

Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsX_15000_events['nCDCHits'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsX_15000_events['trackNECLClusters'] is True 


numEvents: 2542
num_FSPs_toData: 8
Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsX_15000_events['charge'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsX_15000_events['pionID', 'kaonID'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsX_15000_events['dx', 'dy', 'dz'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsX_15000_events['z0'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsX_15000_events['tanlambda'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsX_15000_eve

Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsXHc_15000_events['tanlambda'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsXHc_15000_events['nCDCHits'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsXHc_15000_events['trackNECLClusters'] is True 


numEvents: 184
num_FSPs_toData: 5
Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsXHc_15000_events['charge'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsXHc_15000_events['pionID', 'kaonID'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsXHc_15000_events['dx', 'dy', 'dz'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsXHc_15000_events['z0'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_

Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsXHc_15000_events['tanlambda'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsXHc_15000_events['nCDCHits'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsXHc_15000_events['trackNECLClusters'] is True 


numEvents: 764
num_FSPs_toData: 14
Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsXHc_15000_events['charge'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsXHc_15000_events['pionID', 'kaonID'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsXHc_15000_events['dx', 'dy', 'dz'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco_BsXHc_15000_events['z0'] is True 


Data saved to: /nfs/dust/belle2/user/axelheim/MC_studies/my6modes/data/wSim_wReco

In [43]:
Bs_pdg = 511
Hc_pdg = 411
nm = f"_nTuples_mode1" + events_num_identifier + ".root"
raw_nTuples = uproot.concatenate([(root_path + "gamma" + nm), (root_path + "pi" + nm), (root_path + "K" + nm)])

df = pd.DataFrame({"event" : np.array(raw_nTuples["__event__"]),
       "mcPDG" : np.array(raw_nTuples["mcPDG"]),
       "mcPrimary":np.array(raw_nTuples["mcPrimary"]),
       "isSignal":np.array(raw_nTuples["isSignal"]),
       #"mcInitial":np.array(raw_nTuples["mcInitial"]),
       "px" :  np.array(raw_nTuples["px"]),
       "py" : np.array(raw_nTuples["py"]),
       "pz" : np.array(raw_nTuples["pz"]),
       "E" : np.array(raw_nTuples["E"]),
                   
       "dx" : np.array(raw_nTuples["dx"]),
       "dy" : np.array(raw_nTuples["dy"]),
       "dz" : np.array(raw_nTuples["dz"]),
       "prodVertexX" : np.array(raw_nTuples["prodVertexX"]),
       "prodVertexY" : np.array(raw_nTuples["prodVertexY"]),
       "prodVertexZ" : np.array(raw_nTuples["prodVertexZ"]),
       "z0" : np.array(raw_nTuples["z0"]),
       "d0" : np.array(raw_nTuples["d0"]),
       "tanlambda" : np.array(raw_nTuples["tanlambda"]),
       "nCDCHits" : np.array(raw_nTuples["nCDCHits"]),
       "trackNECLClusters" : np.array(raw_nTuples["trackNECLClusters"]),

       "pionID":np.array(raw_nTuples["pionID"]),   
       "kaonID": np.array(raw_nTuples["kaonID"]),
       "M" : np.array(raw_nTuples["M"]),
       "charge" : np.array(raw_nTuples["charge"]),
       #"fromY4S" : hasAncestor_300553,
       "genPDG0" : np.array(raw_nTuples["genMotherPDG__bo0__bc"]),
       "genPDG1" : np.array(raw_nTuples["genMotherPDG__bo1__bc"]),
       "genPDG2" : np.array(raw_nTuples["genMotherPDG__bo2__bc"]),
       "genPDG3" :  np.array(raw_nTuples["genMotherPDG__bo3__bc"]) })
df['label'] = df.apply(conditions, axis=1)

## data preprocessing, dealing with NaN
imputelist = [["pionID",-1.],["kaonID",-1.],["z0",10.],["tanlambda",10.],["nCDCHits",-1.],
              ["trackNECLClusters",-1.]]
for impute in imputelist:
    column_name = impute[0]
    impute_val = impute[1]
    mask = df[column_name].isna() == 1
    df.loc[mask, column_name] = impute_val


In [42]:
df[df["tanlambda"].isna() == True].describe()

Unnamed: 0,event,mcPDG,mcPrimary,isSignal,px,py,pz,E,dx,dy,dz,prodVertexX,prodVertexY,prodVertexZ,z0,d0,tanlambda,nCDCHits,trackNECLClusters,pionID,kaonID,M,charge,genPDG0,genPDG1,genPDG2,genPDG3,label
count,51683.0,29272.0,29272.0,29272.0,51683.0,51683.0,51683.0,51683.0,51683.0,51683.0,51683.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,51683.0,51683.0,51683.0,51683.0,51683.0,51683.0,51683.0
mean,7496.158253,18.994227,0.383541,0.690455,0.008633,-9.3e-05,0.062502,0.213961,-7.450581e-10,9.23872e-10,2.235174e-10,,,,,,,,,,,0.0,0.0,36.121781,3567.411257,35213.186483,84561.402898,1.175125
std,4320.530731,98.310735,0.486256,0.462314,0.14286,0.142075,0.147049,0.142895,0.0,0.0,0.0,,,,,,,,,,,0.0,0.0,203.632816,32913.348387,96642.699222,135146.870374,1.140713
min,1.0,-321.0,0.0,0.0,-1.41602,-1.509677,-1.156026,0.1,-7.450581e-10,9.23872e-10,2.235174e-10,,,,,,,,,,,0.0,0.0,-511.0,-511.0,-511.0,-511.0,0.0
25%,3773.0,22.0,0.0,0.0,-0.077548,-0.082845,-0.02176,0.127256,-7.450581e-10,9.23872e-10,2.235174e-10,,,,,,,,,,,0.0,0.0,0.0,-421.0,0.0,0.0,0.0
50%,7515.0,22.0,0.0,1.0,0.011084,0.000912,0.065567,0.168191,-7.450581e-10,9.23872e-10,2.235174e-10,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1.0
75%,11222.0,22.0,1.0,1.0,0.090542,0.083187,0.126275,0.246847,-7.450581e-10,9.23872e-10,2.235174e-10,,,,,,,,,,,0.0,0.0,111.0,0.0,511.0,300553.0,2.0
max,15000.0,3122.0,1.0,1.0,1.850542,1.423843,2.042743,2.236134,-7.450581e-10,9.23872e-10,2.235174e-10,,,,,,,,,,,0.0,0.0,3122.0,300553.0,300553.0,300553.0,3.0


In [None]:
mask = df.label == 2
column_name = "name"
df.loc[mask, column_name] = label_cu

In [25]:
Bs_pdg_list = [-511, 511,-521,-521,521,521]
Hc_pdg_list = [-411, 411, -411, -421, 411, 423]
dfs =[]
for mode in range(6):
    print("mode:",mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode)
    
    Bs_pdg = Bs_pdg_list[mode]
    Hc_pdg = Hc_pdg_list[mode]
    
    nm = f"_nTuples_mode{mode}" + events_num_identifier + ".root"
    raw_nTuples = uproot.concatenate([(root_path + "gamma" + nm), (root_path + "pi" + nm), (root_path + "K" + nm)])
    df = pd.DataFrame({"event" : np.array(raw_nTuples["__event__"]),

       "z0" : np.array(raw_nTuples["z0"]),
       "d0" : np.array(raw_nTuples["d0"]),
       "tanlambda" : np.array(raw_nTuples["tanlambda"])})
    print("z0:")
    print("min:",df["z0"].min())
    print("max:",df["z0"].max())
    print("d0:")
    print("min:",df["d0"].min())
    print("max:",df["d0"].max())
    print("tanlambda:")
    print("min:",df["tanlambda"].min())
    print("max:",df["tanlambda"].max())
    print("")

mode: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
z0:
min: -3.98970365524292
max: 3.921771764755249
d0:
min: -1.9737915992736816
max: 1.933559536933899
tanlambda:
min: -1.72421133518219
max: 3.2667858600616455

mode: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
z0:
min: -3.962635040283203
max: 3.975609540939331
d0:
min: -1.9999605417251587
max: 1.9967619180679321
tanlambda:
min: -1.7295774221420288
max: 3.267096757888794

mode: 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
z0:
min: -3.9460055828094482
max: 3.9480488300323486
d0:
min: -2.009253740310669
max: 2.021289825439453
tanlambda:
min: -1.716620922088623
max: 3.2667810916900635

mode: 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
z0:
min: -3.9199156761169434
max: 3.9245262145996094
d0:
min: -1.9598942995071411
max: 1.9915940761566162
tanlambda:
min: -1.7295814752578735
max: 3.266483783721924

mode: 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
z0:
min: -3.898904800415039
max: 3.8998429775238037
d0:
min: -2.0395004749298096
max: 1.9579038619995117


In [17]:
pd.set_option("display.max_columns", None)

In [46]:
df2 = df[["event","tanlambda","z0","pionID","kaonID","nCDCHits","trackNECLClusters"]]
#df2.describe()
df2[df2.event==1]

Unnamed: 0,event,tanlambda,z0,pionID,kaonID,nCDCHits,trackNECLClusters
0,1,10.0,10.0,-1.0,-1.0,-1.0,-1.0
1,1,10.0,10.0,-1.0,-1.0,-1.0,-1.0
2,1,10.0,10.0,-1.0,-1.0,-1.0,-1.0
3,1,10.0,10.0,-1.0,-1.0,-1.0,-1.0
51683,1,1.802309,-0.014199,0.9938843,1.658564e-12,44.0,1.0
94814,1,0.252021,0.017823,1.044172e-08,1.0,54.0,1.0
94815,1,2.492803,-0.036559,4.53788e-05,0.8632434,8.0,1.0


In [37]:
extraInput_list = [[],["charge"],["pionID","kaonID"],["dx","dy","dz"],["z0"],["tanlambda"],
                   ["nCDCHits"],["trackNECLClusters"]]
for extraInput in extraInput_list:
    print(extraInput, len(extraInput))
    for l in range(4, 4 + len(extraInput)):
        print(l,l-3)


[] 0
['charge'] 1
4 1
['pionID', 'kaonID'] 2
4 1
5 2
['dx', 'dy', 'dz'] 3
4 1
5 2
6 3
['z0'] 1
4 1
['tanlambda'] 1
4 1
['nCDCHits'] 1
4 1
['trackNECLClusters'] 1
4 1


In [23]:
(df[df["event"]==5])

Unnamed: 0,event,mcPDG,mcPrimary,isSignal,px,py,pz,E,dx,dy,dz,prodVertexX,prodVertexY,prodVertexZ,z0,d0,tanlambda,nCDCHits,trackNECLClusters,pionID,kaonID,M,charge,genPDG0,genPDG1,genPDG2,genPDG3,label
18,5,22.0,1.0,1.0,-0.075411,-0.00999,0.076805,0.1081,-7.450581e-10,9.23872e-10,2.235174e-10,,,,,,,,,,,0.0,0.0,111.0,-423.0,511.0,300553.0,2
19,5,,,,0.127772,0.02168,0.066967,0.145878,-7.450581e-10,9.23872e-10,2.235174e-10,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0
20,5,,,,0.11422,-0.149937,0.030443,0.190929,-7.450581e-10,9.23872e-10,2.235174e-10,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0
51693,5,,,,0.291903,-0.102633,0.665003,0.746625,-0.007808039,-0.02226003,-0.1070773,,,,-0.00362,0.024117,2.149191,37.0,1.0,0.761046,1.1e-05,0.13957,1.0,0.0,0.0,0.0,0.0,0
51694,5,-211.0,1.0,1.0,2.236038,0.266323,1.247372,2.578025,-4.326724e-06,3.635552e-05,-0.06919962,,,,-0.052811,-0.022829,0.553934,56.0,1.0,0.99797,0.002029,0.13957,-1.0,511.0,300553.0,0.0,0.0,2
94820,5,321.0,1.0,1.0,-1.584469,-1.075134,0.204367,1.987948,-0.002859164,0.004214325,-0.07127227,,,,-0.084669,0.047233,0.10673,56.0,1.0,6.877523e-06,0.999993,0.493677,1.0,-421.0,-423.0,511.0,300553.0,2
94821,5,-321.0,1.0,1.0,0.210248,-0.835317,0.190761,1.010973,0.03802297,0.00956447,0.004964881,,,,0.001317,0.005133,0.221462,60.0,1.0,9.581388e-08,1.0,0.493677,-1.0,411.0,-511.0,300553.0,0.0,3
94822,5,-321.0,1.0,1.0,-1.391557,1.36596,-0.649844,2.113834,0.002359285,0.002404006,-0.05493843,,,,-0.049078,-0.019523,-0.333263,54.0,1.0,4.553697e-12,1.0,0.493677,-1.0,-511.0,300553.0,0.0,0.0,1


In [6]:
dfs[3][dfs[3].event==1].sort_values("label")

Unnamed: 0,event,mcPDG,mcPrimary,isSignal,px,py,pz,E,pionID,kaonID,M,charge,genPDG0,genPDG1,genPDG2,genPDG3,label
6,1,,,,-0.074836,0.018443,-0.124483,0.146413,,,0.0,0.0,0.0,0.0,0.0,0.0,0
2,1,22.0,1.0,1.0,-0.36116,-0.350682,-0.061892,0.507193,,,0.0,0.0,111.0,213.0,521.0,300553.0,1
4,1,22.0,1.0,1.0,-0.201496,-0.094562,-0.074745,0.234797,,,0.0,0.0,111.0,213.0,521.0,300553.0,1
0,1,22.0,0.0,1.0,0.093109,0.145431,0.106453,0.202859,,,0.0,0.0,-211.0,-521.0,300553.0,0.0,2
3,1,22.0,0.0,1.0,0.004253,0.112888,-0.028027,0.116392,,,0.0,0.0,-211.0,-521.0,300553.0,0.0,2
5,1,211.0,1.0,0.0,0.031815,0.129437,-0.08171,0.156341,,,0.0,0.0,-521.0,300553.0,0.0,0.0,2
68791,1,-211.0,1.0,1.0,2.088494,-0.430028,1.018039,2.366984,0.9798776,0.009366,0.13957,-1.0,-521.0,300553.0,0.0,0.0,2
120392,1,-321.0,1.0,1.0,-1.044177,0.340423,-0.388148,1.265136,1.398664e-16,1.0,0.493677,-1.0,421.0,423.0,-521.0,300553.0,2
1,1,22.0,0.0,1.0,0.36803,0.151758,0.145102,0.423711,,,0.0,0.0,111.0,321.0,-421.0,521.0,3


In [7]:
bin_n = 20
bins = np.linspace(0, bin_n, bin_n)

new[(new.mcPDG==22.0) & (new.mcPrimary==1)]["count"].hist(bins=bins, label='gammas')
new[(new.mcPDG==211.0) & (new.mcPrimary==1)]["count"].hist(bins=bins, label='pions')
new[(new.mcPDG==321.0) & (new.mcPrimary==1)]["count"].hist(bins=bins, legend=True)


NameError: name 'new' is not defined

In [None]:
numFSPs_df = pd.DataFrame({'count' : df.groupby( [ "event"] ).size()}).reset_index()
minFSPs = numFSPs_df["count"].min()
maxFSPs = numFSPs_df["count"].max()


print("maxFSPs:",maxFSPs)
print("minFSPs:",minFSPs)

bins = np.linspace(0, maxFSPs, maxFSPs)
numFSPs_df["count"].hist(bins=bins, legend=True)


In [None]:
for i in range(3):
    evNum = i+1
    print("event",evNum)
    print("# FSPs:",len(df[df.event == evNum]))
    print("# pions:",len(df[(df.event == evNum) & (abs(df.mcPDG) == 211)]))
    print("# kaons:",len(df[(df.event == evNum) & (abs(df.mcPDG) == 321)]))
    print("# gammas:",len(df[(df.event == evNum) & (abs(df.mcPDG) == 22)]))
    print('\n')
    
    

In [None]:
print(len(df[df.event == 1]))
df[df.event == 1]