# LEE Analyzer notebook

## Imports & Constants

In [1]:
import math
import glob
import time
import os
import sys
import numpy as np
import pandas as pd
from collections import OrderedDict
from root_numpy import list_branches
from root_pandas import read_root

In [2]:
pd.options.display.max_columns = 999
gr      = 1.618
nan     = -2147483648

mass_p= 0.93827 #GeV
mass_e= 0.00511 #GeV

# LAr EM showers
R_moliere =  9.5 # cm
X_o       = 13.9 # cm
E_c       = 0.035# GeV

## Definitions

In [3]:
# Dictionary translating the category field in the root tree
categories={1: 'Cosmic',
            2: 'e CC',
            3: 'mu CC',
            4: 'NC',
            5: 'Dirt',
            6: 'Data',
            7: 'Mixed',
            0: 'Other'}

# Fiducial volume borders in x,y,z:
fid_arr= [[10,10],[20,20],[10,50]]
# Fiducial volume for the end points of tracks
fid_min = 10
sh_cont_percent = .75

# Minimum reconstructable energies:
min_e = 0.02+mass_e # 20MeV
min_p = 0.04+mass_p # 40MeV

# list ROOT files
#filelist = glob.glob('/home/wouter/Public/*/*.root')
filelist = glob.glob('/home/wouter/Templates/nue/*/*.root')

# List of fields in the ROOT tree you want to include
columns = ['n_tracks','n_showers','vx','vy','vz','category','distance',
           'nu_pdg','interaction_type',
           'true_vx_sce','true_vy_sce','true_vz_sce','nu_E','true_shower_depE',
           'true_shower_x_sce','true_shower_y_sce','true_shower_z_sce','true_shower_pdg',
           'nu_daughters_pdg','nu_daughters_E',
           'nu_daughters_endx','nu_daughters_endy',"nu_daughters_endz",
           'nu_daughters_px','nu_daughters_py','nu_daughters_pz',
           'nu_track_ids','nu_shower_ids','nu_shower_daughters','nu_track_daughters',
           'flash_PE','flash_time',
           'shower_dir_x','shower_dir_y','shower_dir_z',
           "shower_start_x","shower_start_y","shower_start_z",
           'shower_open_angle','shower_length',
           "shower_energy","track_energy_dedx","track_energy_hits",
           "track_dir_x","track_dir_y","track_dir_z",
           "track_start_x","track_start_y","track_start_z",
           'track_end_x','track_end_y','track_end_z',
           'predict_p','predict_mu','predict_pi','predict_em','predict_cos',
           'shower_pca','track_pca','track_nhits','shower_nhits',
           'matched_showers','matched_tracks','matched_tracks_energy','matched_showers_energy',
           'shower_dEdx_hits','track_dEdx_hits','bnbweight'
          ]

columns_remove_extra = ['n_tracks','n_showers','vx','vy','vz','interaction_type',
           'true_shower_x_sce','true_shower_y_sce','true_shower_z_sce',
           'nu_daughters_endx','nu_daughters_endy',"nu_daughters_endz",
           'nu_daughters_px','nu_daughters_py','nu_daughters_pz',
           'nu_track_ids','nu_shower_ids','nu_shower_daughters','nu_track_daughters',
           'flash_PE','flash_time',
           'shower_open_angle',
           "track_energy_dedx","track_energy_hits",
           'predict_mu','predict_pi','predict_em','predict_cos',
           'shower_pca','track_pca',
           'shower_dEdx_hits','track_dEdx_hits'
          ]

#print(filelist)

## Classes

In [4]:
# Returns the ratio of collection charge of the first part and the second part of the summed shower.
def showerChargeProfile(row):
    x,y,z = row['vx'],row['vy'],row['vz']
    sps_x_arr,sps_y_arr,sps_z_arr,sps_int_arr = row['shower_sp_x'],row['shower_sp_y'],row['shower_sp_z'],row['shower_sp_int']
    
    center= np.array([0.0,0.0,0.0])
    total_Q = 0.0
    for sps_x,sps_y,sps_z,sps_int in zip(sps_x_arr,sps_y_arr,sps_z_arr,sps_int_arr):
        center+=np.array([sps_x,sps_y,sps_z])*sps_int
        total_Q+=sps_int
    center/=total_Q
    norm = (center-np.array([x,y,z])) / np.linalg.norm(center-np.array([x,y,z]))
    
    distance = []
    for sps_x,sps_y,sps_z in zip(sps_x_arr,sps_y_arr,sps_z_arr):
        distance.append( np.dot([sps_x-x,sps_y-y,sps_z-z],norm) )
        
    y,x = np.histogram( distance, weights = sps_int_arr )
    l = len(y)/2
    return np.mean(y[:l])/np.mean(y[-l:])


# def CorrectMatchedDaughters(row):
#     matched_showers, matched_tracks            = list(row['matched_showers']),list(row['matched_tracks'])
#     nu_shower_ids, nu_track_ids                = row['nu_shower_ids'],row['nu_track_ids']
#     nu_shower_daughters, nu_track_daughters    = list(row['nu_shower_daughters']),list(row['nu_track_daughters'])
#     # check if there are unmatched shower daughters:
#     err = -2147483648
#     if err in matched_showers:
#         index = matched_showers.index(err)
#         pf    = nu_shower_ids[index]
#         pdg=0
#         print(pf)
#         print(nu_shower_daughters)
#         if [pf] in nu_shower_daughters:
#             pdg = matched_showers[nu_shower_daughters.index([pf])]
#         if [pf] in nu_track_daughters:
#             pdg = matched_tracks[nu_track_daughters.index([pf])]
#         matched_showers[index]=pdg*100
#     #now for matched tracks
#     if err in matched_tracks:
#         index = matched_tracks.index(err)
#         pf    = nu_track_ids[index]
#         pdg=0
#         if [pf] in nu_shower_daughters:
#             pdg = matched_showers[nu_shower_daughters.index([pf])]
#         if [pf] in nu_track_daughters:
#             pdg = matched_tracks[nu_track_daughters.index([pf])]
#         matched_tracks[index]=pdg*100  
#     return pd.Series({"matched_showers": matched_showers, "matched_tracks": matched_tracks})

def DaughterInfo(row):
    nu_shower_ids, nu_track_ids                = row['nu_shower_ids'],row['nu_track_ids']
    nu_shower_daughters, nu_track_daughters    = list(row['nu_shower_daughters']),list(row['nu_track_daughters'])

    showerdaughter = []
    for sh in nu_shower_daughters:
        if len(sh)==0:
            showerdaughter.append(0)
        elif len(sh)==1:
            if sh[0] in nu_shower_ids:
                showerdaughter.append(1)
            elif sh[0] in nu_track_ids:
                showerdaughter.append(2)
        else:
            showerdaughter.append(3)
            
    trackdaughter = []
    for tr in nu_track_daughters:
        if len(tr)==0:
            trackdaughter.append(0)
        elif len(tr)==1:
            if tr[0] in nu_shower_ids:
                trackdaughter.append(1)
            elif tr[0] in nu_track_ids:
                trackdaughter.append(2)
        else:
            trackdaughter.append(3)
    return pd.Series({"shower_daughter": showerdaughter, "track_daughter": trackdaughter})      
        

## Functions

In [5]:
# Return true if the point is in the TPC with a tolerance.
def CheckBorderTPC(x,y,z,tolerance=0):
    detectorx   =256.35     # In cm
    detectory   =116.5      # Symmetric around 0     
    detectorz   =1036.8
    d=tolerance # border tolerance
    if (0+d) < x < (detectorx-d):
            if (-detectory+d)< y < (detectory-d):
                    if (0+d) < z < (detectorz-d):
                        return True
    return False

def CheckBorderFid(x,y,z,arr):
    detectorx   =256.35     # In cm
    detectory   =116.5      # Symmetric around 0     
    detectorz   =1036.8
    if (0+arr[0][0]) < x < (detectorx-arr[0][1]):
            if (-detectory+arr[1][0])< y < (detectory-arr[1][1]):
                    if (0+arr[2][0]) < z < (detectorz-arr[2][1]):
                        return True
    return False


# Return the angle between two numpy vectors.
def anglevec(v1,v2):
    v1_u = v1 / np.linalg.norm(v1)
    v2_u = v2 / np.linalg.norm(v2)
    return np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0))


# Verifies if the event passed the flash precuts.
def flash_precut(row):
    #Flash precuts
    t_start = 3.2
    t_end   = t_start+1.6
    min_PE  = 50

    for time,PE in zip(row['flash_time'],row['flash_PE']):
        if time>t_start and time<t_end and PE>min_PE:
            return True
    return False

# Sum reconstructed energy
def CalcRecoE(row):
    reco_energy = [0,0,0,0]
    total_nhits = [0,0,0]
    
    for tr_hits,tr_dedx,nhits in zip(row['track_energy_hits'],row['track_energy_dedx'],row["track_nhits"]):
        reco_energy[:3]+=tr_hits  
        reco_energy[-1]+=tr_dedx 
        total_nhits+=nhits
        
    for shower,nhits in zip(row['shower_energy'],row["shower_nhits"]):
        reco_energy[:3]+=shower
        reco_energy[-1]+=shower[2] #4th entry uses plane 2 hits for showers and dedx for tracks
        total_nhits+=nhits
        
    return pd.Series({"reconstructed_energy": reco_energy, "total_nhits": total_nhits})

# Signal Definition 1e0p 
def true_thresholds_1e0p(row):
    for pdg,E in zip(row["nu_daughters_pdg"],row["nu_daughters_E"]):
        if pdg==11 and E>min_e:
            return True
    return False

# Signal Definition 1e0p 
def true_thresholds_1eNp(row):
    passed_e=False
    passed_p=False
    for pdg,E in zip(row["nu_daughters_pdg"],row["nu_daughters_E"]):
        if pdg==11 and E>min_e:
            passed_e=True
        if pdg==2212 and E>min_p:
            passed_p=True
    return passed_e and passed_p

# Formatting
def sciNot(x):
    x=float(x)
    return "{:.1f}".format(x)

# Calculates the true end point for electron showers, for 95% of energy
def showerTrueEnd(row):
    i_daughter = np.argwhere(row['nu_daughters_pdg']==11)
    if len(i_daughter[0])>1:
        print('More than 1 true electron daughter')
    i_daughter = i_daughter[0][0]
    
    E_ratio = (row['nu_daughters_E'][i_daughter])/E_c
    t_max = np.log(E_ratio)-1.0
    length = (t_max+0.08*18+9.6)*X_o
    #print('E_ratio',E_ratio,'E',row['nu_daughters_E'][i_daughter],' t_max',t_max,'length',length)
    direction = np.array([row['nu_daughters_px'][i_daughter],row['nu_daughters_py'][i_daughter],row['nu_daughters_pz'][i_daughter]])
    true_shower_start = np.array([row['nu_daughters_vx'][i_daughter],row['nu_daughters_vy'][i_daughter],row['nu_daughters_vz'][i_daughter]])
    true_shower_end = true_shower_start+length*direction/np.linalg.norm(direction)
    true_shower_tmax = true_shower_start+(t_max*X_o)*direction/np.linalg.norm(direction)
    
    return pd.Series({"true_shower_endx": true_shower_end[0], "true_shower_endy": true_shower_end[1], "true_shower_endz": true_shower_end[2],
                      "true_shower_tmax_x": true_shower_tmax[0], "true_shower_tmax_y": true_shower_tmax[1], "true_shower_tmax_z": true_shower_tmax[2]})

def showerRecoEnd(row):
    for x,y,z,px,py,pz,l in zip(row['shower_start_x'],row['shower_start_y'],row['shower_start_z'],row['shower_dir_x'],row['shower_dir_y'],row['shower_dir_z'],row['shower_length']):
        start=np.array([x,y,z])
        direc=np.array([px,py,pz])
        end=start+l*direc/np.linalg.norm(direc)
        if not CheckBorderTPC(*end,tolerance=fid_min):
            return False
    return True

def TrackRecoEnd(row):
    for x,y,z in zip(row['track_end_x'],row['track_end_y'],row['track_end_z']):
        if not CheckBorderTPC(x,y,z,tolerance=fid_min):
            return False
    return True

# Calculates the percentage of sps inside the fiducial volume and the percentage of deposited collection plane charge
def containedPercent(row):
    d=0.01
    n=0.0
    
    for x,y,z,q in zip(row['shower_sp_x'],row['shower_sp_y'],row['shower_sp_z'],row['shower_sp_int']):
        d+=q
        if CheckBorderFid(x,y,z,fid_arr):
            n+=q

    return n/d

def shower_nhits_plane(shower_nhits):
    sh_hits_plane =0 
    for sh in shower_nhits:
        sh_hits_plane+=sh[2] # Collection plane
    return sh_hits_plane

def CC_daughter_E(row):
    CC_daughter_i = numpy.in1d(row['nu_daughters_pdg'], [11,-11,13,-13])
    CC_daughter_E = -1
    if len(CC_daughters)>0:
        if len(CC_daughters)>2:
            'Multiple electron/muon daughters found!'
        else:      
            CC_daughter_E = row['nu_daughters_E'][CC_daughter_i[0]]
    return CC_daughter_E

In [9]:
def add_columns(df_shower):
    # Calculate the true end point of electron showers
    df_shower[['true_shower_endx','true_shower_endy','true_shower_endz','true_shower_tmax_x','true_shower_tmax_y','true_shower_tmax_z']]=df_shower.apply(showerTrueEnd,axis=1)
    # Calculate true containment of electron showers
    df_shower['e_contained']=df_shower.apply(lambda x: CheckBorderTPC(x['true_shower_endx'],x['true_shower_endy'],x['true_shower_endz']), axis=1)
    df_shower['e_contained_tmax']=df_shower.apply(lambda x: CheckBorderTPC(x['true_shower_tmax_x'],x['true_shower_tmax_y'],x['true_shower_tmax_z']), axis=1)
    # Calculate reco containment of showers spacepoints
    df_shower['shower_containment_q']=df_shower[['shower_sp_x','shower_sp_y','shower_sp_z','shower_sp_int']].apply(containedPercent,axis=1)
    # Calculate reco containment of shower lengths
    df_shower['reco_length_containment']=df_shower.apply(showerRecoEnd,axis=1) 
    # Return true in case more energy is deposited in the start or in the end of the shower
    df_shower['shower_sp_profile']=df_shower[['vx','vy','vz','shower_sp_x','shower_sp_y','shower_sp_z','shower_sp_int']].apply(showerChargeProfile,axis=1)
    # Fix matched daughters WORK IN PROGRESS
    df_shower[['shower_daughter','track_daughter']]=df_shower.apply(DaughterInfo,axis=1)
    return df_shower

In [10]:
# Load root files into a dataframe or dictionary of dataframes.

def loadData(filelist,                  # List of input files
             columns,                   # List of fields (columns) to include in the dataframe  
             maxf=1,                    # Maximum number of files to loop over
             contained=True,            # Is the true neutrino interaction in a defined volume?
             truecontains=[11,2212],    # Require the true neutrino interaction to have the following daughters.
             LEE_denom=False,           # 1 proton of at least 40MeV, 1 electron of at least 20MeV, true vertex inside: x[10,10],y[20,20],z[10,50]
             Nue_inclusive=True,        # 1 electron of at least 20MeV, true vertex inside: x[10,10],y[20,20],z[10,50]
             flashpassed=True,          # Output only rows with a candidate passing the optical selection
             passed=True,               # Output only rows with a candidate passing the selection
             returnpure=False ):        # Output only rows with a candidate passing that is within 5cm and nu or mixed category.
                     
    columns_req = ['true_vx','true_vy','true_vz','bnbweight',
                   'nu_daughters_vx','nu_daughters_vy','nu_daughters_vz',
                   'flash_time','flash_PE',
                   'nu_daughters_pdg','nu_daughters_E','nu_daughters_px','nu_daughters_py','nu_daughters_pz',
                   'flash_passed','passed','category','distance',
                   'track_energy_hits','shower_energy','track_energy_dedx','shower_nhits','track_nhits',
                   'true_shower_x_sce','true_shower_x_sce',"true_shower_x_sce","true_shower_pdg",
                   'shower_start_x','shower_start_y','shower_start_z',
                   'shower_sp_x','shower_sp_y','shower_sp_z','shower_sp_int',
                   'shower_length',"shower_dir_x","shower_dir_y","shower_dir_z"
                  ]
    columns_remove = [item for item in columns_req if item not in columns]
    
    global categories
    global fid_arr
    chunks=[]
    chunks_all=[]
    columns_all = list(set(columns) | set(columns_req))
    
    entries=0
    entries_contained=0
    entries_truecontains=[0]*len(truecontains)
    entries_sig_def=0
    entries_valid_flash=0
    entries_flashpassed=0
    entries_passed=0
    entries_reco_fiducial=0
    entries_hitcut=0
    entries_sh_cont=0
    entries_tr_cont=0
    entries_tr_score=0
    
    entries_noncosmic=0
    entries_pure=0
    entries_pure_plus=0
    entries_noncosmic_test=0
    entries_pure_test=0
    entries_pure_plus_test=0
    
    entries_final=0
    
    total_pot=0
    
    nfiles=len(filelist)
    if maxf<nfiles:
        nfiles=maxf
    
    print
    #print(columns_remove)
    print ('Start to load entries from',nfiles,'files.\n')
    start_time = time.time()
    
    progress=0
    for i_f,fname in enumerate(filelist[:nfiles]):
        if(i_f % math.ceil(nfiles/10)==0 ):
            print ('Progress:',progress,'%.')
            progress+=10
        
        if (os.stat(fname).st_size)<50000:
            print('File',fname,'was corrupt. Size:',os.stat(fname).st_size/1000, 'kb, skipping to next file.')
            continue
        
        dftemp=read_root(fname,"robertoana/pandoratree",columns=columns_all)
        
        #store the POT of the sample
        total_pot+=read_root(fname,"robertoana/pot",columns='pot').sum()
        
        entries+=len(dftemp.index)
        
        if contained:
            dftemp=dftemp[dftemp.apply(lambda x: CheckBorderFid(x['true_vx_sce'],x['true_vy_sce'],x['true_vz_sce'],fid_arr), axis=1)]
            entries_contained+=len(dftemp.index)
            
        for i,pdg in enumerate(truecontains):
            dftemp = dftemp[dftemp["nu_daughters_pdg"].apply(lambda x: pdg in x)]
            entries_truecontains[i]+=len(dftemp.index)
        
        if Nue_inclusive or LEE_denom:
            #dftemp = dftemp[dftemp.apply(lambda x: CheckBorderFid(x['true_vx'],x['true_vy'],x['true_vz'],fid_arr), axis=1)]
            if Nue_inclusive:
                dftemp = dftemp[dftemp[["nu_daughters_pdg","nu_daughters_E"]].apply(true_thresholds_1e0p, axis=1)]
            if LEE_denom:
                dftemp = dftemp[dftemp[["nu_daughters_pdg","nu_daughters_E"]].apply(true_thresholds_1eNp, axis=1)]
            entries_sig_def+=len(dftemp.index)
        
        # Up to here you do true cuts on the sample to define the signal, before you do cuts, safe some fields:
        df_all_temp = dftemp[['true_shower_x_sce','true_shower_y_sce','true_shower_z_sce',
                             'nu_E','bnbweight','nu_daughters_pdg','nu_daughters_E']]
        
        dftemp = dftemp[dftemp.apply(flash_precut, axis=1)]
        entries_valid_flash+=len(dftemp.index)
            
        if flashpassed:
            dftemp = dftemp[ dftemp['flash_passed'].apply(lambda x: not np.all(x==-1))]
            entries_flashpassed+=len(dftemp.index)
            
        if passed:
            dftemp=dftemp[dftemp['passed']==1]
            entries_passed+= len(dftemp.index)
        
        # reconstructed vertex in fiducial volume
        dftemp=dftemp[dftemp.apply(lambda x: CheckBorderFid(x['vx'],x['vy'],x['vz'],fid_arr), axis=1)]
        entries_reco_fiducial+=len(dftemp.index)
           
        # Do calculations with the small dataframe
        dftemp = add_columns(dftemp)
        
        # 5 hit min cut:
        dftemp['plane2']=dftemp['shower_nhits'].apply(shower_nhits_plane)
        df_test = dftemp[dftemp['plane2']>5]
        entries_hitcut+=len(df_test.index)
        
        # Shower containment
        df_test = df_test[df_test['shower_containment_q']>sh_cont_percent]
        #df_test = df_test[df_test['reco_length_containment']]
        entries_sh_cont+=len(df_test.index)
        
        # Track containment
        df_test=df_test[df_test.apply(TrackRecoEnd, axis=1)]
        entries_tr_cont+=len(df_test.index)
        
        # Track score 
        df_test=df_test[df_test.apply(lambda x: np.all(x['predict_p']>0.0003), axis=1)]
        entries_tr_score+=len(df_test.index)

        
        entries_noncosmic+= len(dftemp[dftemp['category'].isin([2,3,7]) ].index)

        entries_pure+= len(dftemp[ (dftemp['distance']<5) & 
                                   (dftemp['category'].isin([2,3,7]))  ].index)
           
        entries_pure_plus+= len(dftemp[ (dftemp['distance']<5) & 
                                   (dftemp['category'].isin([2,3]))  ].index)
        
        entries_noncosmic_test+= len(df_test[df_test['category'].isin([2,3,7]) ].index)

        entries_pure_test+= len(df_test[ (df_test['distance']<5) & 
                                   (df_test['category'].isin([2,3,7]))  ].index)
        
        entries_pure_plus_test+= len(df_test[ (df_test['distance']<5) & 
                                   (df_test['category'].isin([2,3]))  ].index)
            
            
        if returnpure:
            dftemp = dftemp[ (dftemp['category'].isin([2,3,7])) & (dftemp['distance']<5) ]
            
        entries_final+=len(dftemp.index)
        
        dftemp["category"]=dftemp["category"].map(categories)        
        dftemp[['reconstructed_energy','total_nhits']]=dftemp[["track_energy_hits","shower_energy",'track_energy_dedx',"shower_nhits","track_nhits"]].apply(CalcRecoE,axis=1)
        
        
        
        
        
        # Remove columns from the small dataframe
        dftemp.drop(columns_remove,axis=1,inplace=True)
        dftemp.drop(columns_remove_extra,axis=1,inplace=True)
        
        chunks.append(dftemp)
        chunks_all.append(df_all_temp)
        
    print('\nSummary:')
    print (entries,'entries were loaded from',nfiles,'files, corresponding to',str(total_pot[0]),'POT.')
    print (entries_contained, 'entries with true vertex in TPC.')
    for nr,pdg in zip(entries_truecontains,truecontains):
        print (nr, 'entries with a pdg',pdg,'particle.')
    print (entries_sig_def, 'entries with the signal definition.')
    print (entries_valid_flash, 'entries with a valid flash.')
    print (entries_flashpassed, 'entries passing the optical selection.')
    print (entries_passed, 'entries passing the selection.')
    print (entries_reco_fiducial, 'entries have reco vertex in fiducial volume.\n')
    print (entries_hitcut,' entries with at least 5 shower hit on plane2')
    print (entries_sh_cont,' entries with contained showers')
    print (entries_tr_cont,' entries with contained tracks')
    print (entries_tr_score,' entries with a minimum track score\n')
    
    print ('Category efficiency :', entries_noncosmic,'/', entries_sig_def,'(',sciNot(entries_noncosmic/entries_sig_def*100),'%)','signal events passed and category nu or mixed.')
    print ('Closeness purity :', entries_pure,'/', entries_reco_fiducial,'(',sciNot(entries_pure/entries_reco_fiducial*100),'%)','of passed events is within 5cm from true_sce vertex and category nu or mixed.')
    print ('Closeness purity plus :', entries_pure_plus,'/', entries_reco_fiducial,'(',sciNot(entries_pure_plus/entries_reco_fiducial*100),'%)','of passed events is within 5cm from true_sce vertex and category nu.')
    print (entries_final,'entries in the final dataframe.\n')
    
    print ('Category efficiency test:', entries_noncosmic_test,'/', entries_sig_def,'(',sciNot(entries_noncosmic_test/entries_sig_def*100),'%)','signal events passed and category nu or mixed.')
    print ('Closeness purity test:', entries_pure_test,'/', entries_tr_score,'(',sciNot(entries_pure_test/entries_tr_score*100),'%)','of passed events is within 5cm from true_sce vertex and category nu or mixed.')
    print ('Closeness purity plus test:', entries_pure_plus_test,'/', entries_tr_score,'(',sciNot(entries_pure_plus_test/entries_tr_score*100),'%)','of passed events is within 5cm from true_sce vertex and category nu.')

    print('Concatenating selection dataframe')
    df = pd.concat(chunks,ignore_index=True,copy=False) 
    print('Concatenating full slimmed dataframe')
    df_all = pd.concat(chunks_all,ignore_index=True,copy=False) 
    df_all.to_pickle('Input/all_events.pckl')
    
    end_time = time.time()
    print('Loading took ',sciNot(end_time-start_time),' seconds.')
    return df

## Load dataframe and save to Pickle

In [11]:
df= loadData(filelist,                  # List of input files
             columns,                   # List of fields (columns) to include in the dataframe  
             maxf=1,                    # Maximum number of files to loop over
             contained=True,            # Is the true neutrino interaction in a defined volume?
             truecontains=[11],         # Require the true neutrino interaction to have the following daughters.
             LEE_denom=False,           # 1 proton of at least 40MeV, 1 electron of at least 20MeV, true vertex inside: x[10,10],y[20,20],z[10,50]
             Nue_inclusive=True,        # 1 electron of at least 20MeV, true vertex inside: x[10,10],y[20,20],z[10,50]
             flashpassed=True,          # Output only rows with a candidate passing the optical selection
             passed=True,               # Output only rows with a candidate passing the selection
             returnpure=False )         # Output only rows with a candidate passing that is within 5cm and not a cosmic.

df.to_pickle('Input/nue_cosmic_passed_LEEdef.pckl')

Start to load entries from 1 files.

Progress: 0 %.





Summary:
2000 entries were loaded from 1 files, corresponding to 2.39889737342e+20 POT.
701 entries with true vertex in TPC.
517 entries with a pdg 11 particle.
516 entries with the signal definition.
503 entries with a valid flash.
470 entries passing the optical selection.
404 entries passing the selection.
379 entries have reco vertex in fiducial volume.

374  entries with at least 5 shower hit on plane2
325  entries with contained showers
278  entries with contained tracks
264  entries with a minimum track score

Category efficiency : 375 / 516 ( 72.7 %) signal events passed and category nu or mixed.
Closeness purity : 304 / 379 ( 80.2 %) of passed events is within 5cm from true_sce vertex and category nu or mixed.
Closeness purity plus : 259 / 379 ( 68.3 %) of passed events is within 5cm from true_sce vertex and category nu.
379 entries in the final dataframe.

Category efficiency test: 264 / 516 ( 51.2 %) signal events passed and category nu or mixed.
Closeness purity test: 217 

In [14]:
len(df['shower_sp_profile'])
#df.info(memory_usage='deep') 
#df.memory_usage(deep=True)[:25]

379

In [15]:
import matplotlib.pyplot as plt
%matplotlib inline
d=2
print(max(df['shower_sp_profile'][d]))
plt.hist(df['shower_sp_profile'][d],weights = df['shower_sp_int'][d])
y,x = np.histogram( df['shower_sp_profile'][d],weights = df['shower_sp_int'][d] )
print(np.mean(y[:5])/np.mean(y[-5:])>1)

TypeError: 'numpy.float64' object is not iterable

In [18]:
df[['nu_track_ids','nu_shower_ids','nu_shower_daughters','nu_track_daughters','matched_showers','matched_tracks','track_daughter','shower_daughter']]

Unnamed: 0,nu_track_ids,nu_shower_ids,nu_shower_daughters,nu_track_daughters,matched_showers,matched_tracks,track_daughter,shower_daughter
0,[],[0],[[]],[],[11],[],[],[0]
1,[2],"[1, 4, 6]","[[], [], []]",[[]],"[11, 211, 2212]",[2212],[0],"[0, 0, 0]"
2,[8],"[0, 5, 9, 10]","[[], [], [], []]",[[]],"[11, 2212, 11, 11]",[11],[0],"[0, 0, 0, 0]"
3,[2],"[0, 4, 6, 8]","[[], [], [], []]",[[]],"[11, 22, -13, 11]",[2212],[0],"[0, 0, 0, 0]"
4,[2],[0],[[]],[[]],[11],[2212],[0],[0]
5,"[4, 6]","[1, 9]","[[], []]","[[], []]","[11, 11]","[2212, 2212]","[0, 0]","[0, 0]"
6,[3],"[1, 4, 5]","[[], [5], []]",[[]],"[11, 11, -2147483648]",[11],[0],"[0, 1, 0]"
7,"[4, 7, 6, 8]","[2, 0, 10, 11]","[[], [], [], []]","[[7], [], [0], []]","[211, -2147483648, 2212, 11]","[2212, -2147483648, 11, 211]","[2, 0, 1, 0]","[0, 0, 0, 0]"
8,"[4, 5, 7]","[2, 6]","[[], []]","[[], [], []]","[11, 22]","[2212, 2212, 22]","[0, 0, 0]","[0, 0]"
9,[8],"[2, 4, 6, 7]","[[], [], [], []]",[[]],"[11, 11, 11, 11]",[11],[0],"[0, 0, 0, 0]"
