In [2]:
import matplotlib.pyplot as plt
import uproot
import numpy as np
import pandas as pd

import sys
sys.path.insert(1, '/afs/desy.de/user/a/axelheim/private/MC_studies/Dstlnu_Bt_generic/util_funcs/')
from pandas_colFuncs import B_ID, whichBisSig, D0_decay_type, whichBisSig_NAHS

## load the NN

In [2]:
nn_vars = ["px","py","pz","E","M","charge","dr","dz","clusterReg","clusterE9E21","pionID","kaonID","electronID","muonID","protonID"]

In [3]:
sys.path.append('/afs/desy.de/user/a/axelheim/private/baumbauen/notebooks/')
from BranchSeparatorModel import BranchSeparatorModel
# See below why I put this



model_dir="/nfs/dust/belle2/user/axelheim/MC_studies/Dstlnu_Bt_generic/saved_models/NAHSA_Gmodes_fixedD0modes/NAHS_allEvts_twoSubs_fixedD0run/NAHSA_no_xyz/256_0_64_0.1_4/"
checkpoint_name = "model_checkpoint_model_perfectSA=0.7674.pt"
specs_output_label = "256_0_64_0.1_4"
num_classes = 3    


specs = specs_output_label.split("_")

model = BranchSeparatorModel(infeatures=len(nn_vars),
            dim_feedforward=int(specs[0]),
            num_classes=num_classes,
            dropout=float(specs[3]),
            nblocks=int(specs[4]))



import torch

checkpoint = torch.load(model_dir +  checkpoint_name, map_location=torch.device('cpu'))
model.load_state_dict(checkpoint)

Using factor graph MLP encoder.


<All keys matched successfully>

## load the online data

In [4]:
nfs_path = "/nfs/dust/belle2/user/axelheim/MC_studies/Dstlnu_Bt_generic/appliedNNdata/8thRun/"
#nfs_path="/afs/desy.de/user/a/axelheim/private/MC_studies/Dstlnu_Bt_generic/load_NN_to_basf2/productive_method/testOut/"

In [5]:
FSPs_file = uproot.open(nfs_path + "FSPs.root:variables;1")
df_FSPs = FSPs_file.arrays(library="pd")

## add labels

In [6]:
df_FSPs['B_ID'] = df_FSPs.apply(B_ID, axis=1)

In [7]:
Hc_motherB_df = df_FSPs[df_FSPs["NN_prediction"].isna() == True].drop_duplicates(subset=("__event__"), keep='first')
Hc_motherB_df["B_tag_ID"] = Hc_motherB_df["B_ID"]
df_FSPs = pd.merge(df_FSPs,Hc_motherB_df[["__event__","__production__","B_tag_ID"]], on=["__event__","__production__"])

In [8]:
def labels(s):
    label = -1
    if int(s['B_ID']) == 0:
        label = 0 # background, cause not related to MC Particles
    else: 
        B_tagID = s['B_tag_ID']
        
        if int(s['B_ID']) == B_tagID:
            label = 1 # X
        else:
            label = 2 # Bsig
    return label
df_FSPs['label'] = df_FSPs.apply(labels, axis=1)

In [9]:
df_FSPs["correct_pred_onlineNN"] = (df_FSPs["label"] == df_FSPs["NN_prediction"]).astype(int)

In [10]:
df_FSPs.shape[0]

848135

## prepare input for NN

In [11]:
nonHc_FSPs = df_FSPs[df_FSPs["NN_prediction"].notna() == True]

In [12]:
nonHc_FSPs["NN_prediction"].describe()

count    702287.000000
mean          1.159428
std           0.760966
min           0.000000
25%           1.000000
50%           1.000000
75%           2.000000
max           2.000000
Name: NN_prediction, dtype: float64

In [13]:
nonHc_FSPs.shape[0]

702287

In [14]:
nonHc_FSPs["offline_NN_pred"] = -1
nonHc_FSPs["offline_NN_pred_shuffled"] = -1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nonHc_FSPs["offline_NN_pred"] = -1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nonHc_FSPs["offline_NN_pred_shuffled"] = -1


In [15]:
evts = nonHc_FSPs["__event__"].unique()

In [16]:
print(len(evts))

44945


In [17]:
evts[2]

2336782

In [18]:
labels=[]
evtnum=[]
online_NN_pred=[]
offline_NN_pred=[]
offline_NN_pred_shuff=[]

for i in range(len(evts)):
    one_evt = nonHc_FSPs[(nonHc_FSPs["__event__"] == evts[i])]
    
    if (i % 100) == 0:
        print("processing evt",i,"of",len(evts))
    
    
    num_particles = one_evt.shape[0]
    
    tmp_par_vars = []
    
    for j in range(num_particles):
        particle = one_evt.iloc[j]
        #print(particle)
        readOut_features = [particle[var] for var in nn_vars]
        tmp_par_vars.append(readOut_features)

    NN_input_features = np.array([np.array(xi) for xi in tmp_par_vars])

    # impute the nan values with -1. (check if that's logical for all values if input vars get changed)
    NN_input_features= np.nan_to_num(NN_input_features, copy=False, nan=-1.0)

    shape = NN_input_features.shape
    NN_input_features = NN_input_features.reshape(shape[0], 1, shape[1])

    #print("NN_input_features.shape:",NN_input_features.shape)
    NN_input_features = torch.Tensor(NN_input_features)
    #print(NN_input_features.shape[0])

    SA_pred = model(NN_input_features)

    probs = torch.softmax(SA_pred, dim=1)  # (N, C, d1)
    winners = probs.argmax(dim=1)
    
    
    shape = NN_input_features.shape
    r=torch.randperm(shape[0])
    shuffled_input=NN_input_features[r, :, :]
    
    shuffled_SA_pred = model(shuffled_input)

    shuffled_probs = torch.softmax(shuffled_SA_pred, dim=1)  # (N, C, d1)
    shuffled_winners = shuffled_probs.argmax(dim=1)
    #print("r:",r)
    for j in range(num_particles):
        particle = one_evt.iloc[j]
        labels.append(particle["label"].item())
        evtnum.append(particle["__event__"].item())
        online_NN_pred.append(particle["NN_prediction"].item())
        #particle["offline_NN_pred"] = winners[0,j].item()
        offline_NN_pred.append(winners[0,j].item())
        
        
        index_Shuffreversed = (r == j).nonzero(as_tuple=True)[0].item()
        #print("j:",j,"index_Shuffreversed:",index_Shuffreversed)
        offline_NN_pred_shuff.append(shuffled_winners[0,index_Shuffreversed].item())
        
        #particle["offline_NN_pred_shuffled"] = shuffled_winners[0,index_Shuffreversed].item()
        #one_evt.iloc[j] = particle
        
    #nonHc_FSPs[(nonHc_FSPs["__event__"] == evts[i])] = one_evt 


processing evt 0 of 44945
processing evt 100 of 44945
processing evt 200 of 44945
processing evt 300 of 44945
processing evt 400 of 44945
processing evt 500 of 44945
processing evt 600 of 44945
processing evt 700 of 44945
processing evt 800 of 44945
processing evt 900 of 44945
processing evt 1000 of 44945
processing evt 1100 of 44945
processing evt 1200 of 44945
processing evt 1300 of 44945
processing evt 1400 of 44945
processing evt 1500 of 44945
processing evt 1600 of 44945
processing evt 1700 of 44945
processing evt 1800 of 44945
processing evt 1900 of 44945
processing evt 2000 of 44945
processing evt 2100 of 44945
processing evt 2200 of 44945
processing evt 2300 of 44945
processing evt 2400 of 44945
processing evt 2500 of 44945
processing evt 2600 of 44945
processing evt 2700 of 44945
processing evt 2800 of 44945
processing evt 2900 of 44945
processing evt 3000 of 44945
processing evt 3100 of 44945
processing evt 3200 of 44945
processing evt 3300 of 44945
processing evt 3400 of 449

processing evt 27700 of 44945
processing evt 27800 of 44945
processing evt 27900 of 44945
processing evt 28000 of 44945
processing evt 28100 of 44945
processing evt 28200 of 44945
processing evt 28300 of 44945
processing evt 28400 of 44945
processing evt 28500 of 44945
processing evt 28600 of 44945
processing evt 28700 of 44945
processing evt 28800 of 44945
processing evt 28900 of 44945
processing evt 29000 of 44945
processing evt 29100 of 44945
processing evt 29200 of 44945
processing evt 29300 of 44945
processing evt 29400 of 44945
processing evt 29500 of 44945
processing evt 29600 of 44945
processing evt 29700 of 44945
processing evt 29800 of 44945
processing evt 29900 of 44945
processing evt 30000 of 44945
processing evt 30100 of 44945
processing evt 30200 of 44945
processing evt 30300 of 44945
processing evt 30400 of 44945
processing evt 30500 of 44945
processing evt 30600 of 44945
processing evt 30700 of 44945
processing evt 30800 of 44945
processing evt 30900 of 44945
processing

In [19]:
NN_results = pd.DataFrame({'__event__': evtnum,
                          'label' : labels,
                          'online_NN_pred' : online_NN_pred,
                          'offline_NN_pred' : offline_NN_pred,
                          'offline_NN_pred_shuff' : offline_NN_pred_shuff})

In [None]:
NN_results.to_csv(nfs_path + "NN_results.csv")

In [5]:
NN_results = pd.read_csv(nfs_path + "NN_results.csv")

In [6]:
NN_results["off_eq_on"] = (NN_results["offline_NN_pred"] == NN_results["online_NN_pred"]).astype(int)
NN_results["off_eq_on_shuffled"] = (NN_results["offline_NN_pred_shuff"] == NN_results["online_NN_pred"]).astype(int)
NN_results["unshuff_eq_shuff"] = (NN_results["offline_NN_pred_shuff"] == NN_results["offline_NN_pred"]).astype(int)

In [8]:
NN_results["correct_pred_onlineNN"] = (NN_results["label"] == NN_results["online_NN_pred"]).astype(int)
NN_results["correct_pred_offlineNN"] = (NN_results["label"] == NN_results["offline_NN_pred"]).astype(int)
NN_results["correct_pred_offlineNN_shuff"] = (NN_results["label"] == NN_results["offline_NN_pred_shuff"]).astype(int)

In [9]:
for var in ["off_eq_on","off_eq_on_shuffled","unshuff_eq_shuff","correct_pred_onlineNN","correct_pred_offlineNN","correct_pred_offlineNN_shuff"]:
    print(var)
    print(NN_results[var].mean()*100, '% \n')

off_eq_on
81.98799066478519 % 

off_eq_on_shuffled
81.9911232872031 % 

unshuff_eq_shuff
81.89016741018985 % 

correct_pred_onlineNN
60.269234657625724 % 

correct_pred_offlineNN
60.0980795600659 % 

correct_pred_offlineNN_shuff
60.12200140398441 % 



In [10]:
off_on_pred = pd.DataFrame({'count' : NN_results.groupby( ["off_eq_on",
                    "correct_pred_onlineNN","correct_pred_offlineNN"] ).size() / NN_results.shape[0] }).reset_index()
off_on_pred

Unnamed: 0,off_eq_on,correct_pred_onlineNN,correct_pred_offlineNN,count
0,0,0,0,0.02661
1,0,0,1,0.075899
2,0,1,0,0.077611
3,1,0,0,0.294798
4,1,1,1,0.525082
