In [2]:
import matplotlib.pyplot as plt
import uproot
import numpy as np
import pandas as pd

from pathlib import Path
from sklearn.model_selection import train_test_split


data_subdir = "noSim_noReco_BsXHc_100events/"

path = "/afs/desy.de/user/a/axelheim/private/MC_studies/my6modes/"
root_path = path + "rootfiles/"
#file_pi = uproot.open((root_path + "pi_nTuples_mode0.root"))

#file_raw = uproot.open(root_path + "mode0_17_events.root")

In [3]:
def conditions(s):
    label = -1
    if (int(s['genMotherPDG0']) == Bs_pdg) or (int(s['genMotherPDG1']) == Bs_pdg) or (int(s['genMotherPDG2']) == Bs_pdg) or (int(s['genMotherPDG3']) == Bs_pdg):
        label = 1
    elif (int(s['genMotherPDG0']) == Hc_pdg) or (int(s['genMotherPDG1']) == Hc_pdg) or (int(s['genMotherPDG2']) == Hc_pdg) or (int(s['genMotherPDG3']) == Hc_pdg):    
        label = 2
    else:
        label = 0
    
    return label

In [4]:
Bs_pdg_list = [-511, 511,-521,-521,521,521]
Hc_pdg_list = [-411, 411, -411, -421, 411, 423]
dfs =[]
for mode in range(6):
    print("mode:",mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode,mode)
    
    Bs_pdg = Bs_pdg_list[mode]
    Hc_pdg = Hc_pdg_list[mode]
    
    nm = "_nTuples_mode{}.root".format(mode)
    raw_nTuples = uproot.concatenate([(root_path + "gamma" + nm), (root_path + "pi" + nm), (root_path + "K" + nm)])
    event = np.array(raw_nTuples["__event__"])
    mcPDG = np.array(raw_nTuples["mcPDG"])
    px = np.array(raw_nTuples["px"])
    py = np.array(raw_nTuples["py"])
    pz = np.array(raw_nTuples["pz"])
    E = np.array(raw_nTuples["E"])
    M = np.array(raw_nTuples["M"])
    charge = np.array(raw_nTuples["charge"])
    #mcPDG = np.array(raw_nTuples["kaonID"])
    #mcPDG = np.array(raw_nTuples["pionID"])
    genMotherPDG0 = np.array(raw_nTuples["genMotherPDG__bo0__bc"])
    genMotherPDG1 = np.array(raw_nTuples["genMotherPDG__bo1__bc"])
    genMotherPDG2 = np.array(raw_nTuples["genMotherPDG__bo2__bc"])
    genMotherPDG3 = np.array(raw_nTuples["genMotherPDG__bo3__bc"])
    
    #u,c = np.unique(event,return_counts=True)
    #print('\n entries per event:',u,c)
    
    
    df = pd.DataFrame({"event" : event,
                   "mcPDG" : mcPDG,
                   "px" : px,
                   "py" : py,
                   "pz" : pz,
                   "E" : E,
                   #"M" : M,
                   "charge" : charge,
                   "genMotherPDG0" : genMotherPDG0,
                   "genMotherPDG1" : genMotherPDG1,
                   "genMotherPDG2" : genMotherPDG2,
                   "genMotherPDG3" : genMotherPDG3})
    df['label'] = df.apply(conditions, axis=1)
    
    #print(df)
    
    dfs.append(df)
    print('\n')
    
    
    # read out data to save it
    event_df = df[df.event == 1]

    num_FSPs_toData = len(event_df)
    numEvents = df.event.max()
    num_features = 4 


    leaves = np.zeros((numEvents, num_FSPs_toData,  num_features))  
    SA_target =  np.zeros((numEvents, num_FSPs_toData))
    global_tag = np.chararray((numEvents, num_FSPs_toData + 1), itemsize=30)
    global_tag[:,-1] = str(mode)

    for i in range(numEvents):
        event = i + 1

        event_df = df[df.event == event]

        for j in range(num_FSPs_toData):

            particle = event_df.iloc[j]

            #print(particle["mcPDG"],particle["px"],particle["py"],particle["pz"],particle["E"])
            leaves[i,j,0] = particle["px"]
            leaves[i,j,1] = particle["py"]
            leaves[i,j,2] = particle["pz"]
            leaves[i,j,3] = particle["E"]

            global_tag[i,j] = int(particle["mcPDG"])
            SA_target[i,j] = int(particle["label"])


    for idx in np.arange(leaves.shape[0]):   # arange is like range but gives ndarray instead of list
        perms = np.random.permutation(leaves.shape[1])

        leaves[idx,:] = leaves[idx,perms]
        SA_target[idx,:] = SA_target[idx,perms]
        global_tag[idx,0:-1] = global_tag[idx,perms]

    data_dir = Path("/afs/desy.de/user/a/axelheim/private/MC_studies/my6modes/data/" + data_subdir)    
    data_dir.mkdir(parents=True, exist_ok=True)
    
    
    train_ratio = 0.75
    validation_ratio = 0.15
    test_ratio = 0.10
    
    
    x_train, x_test, y_train, y_test, z_train, z_test = train_test_split(x, y, z, test_size=1 - train_ratio, shuffle=False)
    x_val, x_test, y_val, y_test, z_val, z_test = train_test_split(x_test, y_test, z_test, test_size=test_ratio/(test_ratio + validation_ratio), shuffle=False) 

    
    np.save(data_dir / "leaves{}.npy".format(mode), leaves)
    np.save(data_dir / "is_left_arr{}.npy".format(mode), SA_target)
    np.save(data_dir / "global_tag{}.npy".format(mode), global_tag)

    print("Data saved to:", data_dir,'\n')

0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

 entries per event: [  1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
  91  92  93  94  95  96  97  98  99 100] [14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14
 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14
 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14
 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14
 14 14 14 14]


Data saved to: /afs/desy.de/user/a/axelheim/private/MC_studies/my6modes/data/BsXHc_100events 

1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1

 entries per event: [  1   2   3   4   5   6   7   8   9  10  11  12  13  14  15

In [None]:
l = 14
print(leaves[l])
print(SA_target[l])
print(global_tag[l])

In [None]:
event_df = df[df.event == 15]
event_df.iloc[3]["mcPDG"]

In [None]:
event_df

In [None]:
df = dfs[5]
event_df = df[df.event == 1]
event_df

In [None]:
u,c = np.unique(event,return_counts=True)
print('\n entries per event:',u,c)

print(mcPDG)

print(event)