In [1]:
import uproot
import sys, os
import pandas as pd
import numpy as np
sys.path.append('../PIDGen_PIDCalib_MVA/')
from Add_MVA import AddBDTinfo

filedir = '/disk/lhcb_data2/RLcMuonic2016/'

Welcome to JupyROOT 6.18/04


In [2]:
file = uproot.open(filedir+'Data/Lb_DataSS_MagUp.root')

In [3]:
file.keys()

[b'tupleout;1', b'GetIntegratedLuminosity;1']

In [4]:
#tree = file['tupleout']['DecayTree']
varsON = ['Lb_L0Global_TIS','Lb_L0HadronDecision_TOS','Lc_Hlt1TrackMVADecision_TOS',
          'Lc_Hlt1TwoTrackMVADecision_TOS','Lb_Hlt2XcMuXForTauB2XcMuDecision_TOS',
          'Lb_Hlt2XcMuXForTauB2XcFakeMuDecision_TOS','Lc_M','p_ProbNNp','p_ProbNNk','mu_PID*','*_P',
          '*_PT','nTracks','runNumber','eventNumber','Lb_ISOLATION_*','mu_PX','mu_PY','mu_PZ','mu_ID',
          'Lc_PX','Lc_PY','Lc_PZ']
df_list=[]
for df in uproot.pandas.iterate(filedir+'Data/Lb_DataSS_MagUp.root','tupleout/DecayTree',
                            varsON, entrysteps=500000):
    df_list.append(df)
#df = tree.arrays(varsON,outputtype=pd.DataFrame)

In [5]:
df=df_list[0]

In [6]:
def L0TriggerData(df):
    check = lambda x,y: True if (x | y) else False
    df['L0'] = df.apply(lambda x: check(x['Lb_L0Global_TIS'],x['Lb_L0HadronDecision_TOS']),axis=1)
    return df

def HLT1TriggerData(df):
    check = lambda x,y: True if (x | y) else False
    df['HLT1'] = df.apply(lambda x: check(x['Lc_Hlt1TrackMVADecision_TOS'],x['Lc_Hlt1TwoTrackMVADecision_TOS']),axis=1)
    return df

def HLT2TriggerData(df,dtype):
    if dtype =='Data' or dtype == 'DataSS':
        df['HLT2'] = df.apply((lambda x: True if x['Lb_Hlt2XcMuXForTauB2XcMuDecision_TOS'] else False),axis=1)
    if dtype =='FakeMu' or dtype=='FakeMuSS':
        df['HLT2'] = df.apply((lambda x: True if x['Lb_Hlt2XcMuXForTauB2XcFakeMuDecision_TOS'] else False),axis=1)
    return df

def TriggerData(df):
    check = lambda x,y,z: True if (x & y & z) else False
    df['Trigger'] = df.apply(lambda x: check(x['L0'],x['HLT1'],x['HLT2']),axis=1)
    return df

In [7]:
def LcMassCut(df):
    df['LcMass'] = df.apply(lambda x: True if 2230<= x['Lc_M'] <=2330 else False, axis=1)
    return df

def ApplyPIDCalibCuts(df):
    CheckNtrks = lambda x: True if x>0 and x<700 else False
    CheckPAndPT = lambda x,y: True if (x>0 and x<200000 and y>0 and y<60000) else False
    CheckAll = lambda x, y, z, v, t: True if x & y & z & v & t else False
    particles = ['mu','p','pi','K']
    df['PIDCalib'] = df.apply(lambda x:True if (CheckNtrks(x['nTracks'])& CheckPAndPT(x['mu_P'],x['mu_PT']) &
                              CheckPAndPT(x['pi_P'],x['pi_PT']) & CheckPAndPT(x['p_P'],x['p_PT']) &
                              CheckPAndPT(x['K_P'],x['K_PT']) )else False, axis=1)
    return df

def ApplyMuCuts(df,dtype):
    if dtype=='Data'or dtype=='DataSS':
        df['MuCuts'] = df.apply(lambda x: True if (x['mu_PIDmu']>2 and x['mu_PIDmu']-x['mu_PIDK']>2 and
                                                  x['mu_PIDmu']-x['mu_PIDp']>2) else False, axis=1)
    else:
        df['MuCuts'] = True
    return df

def GetFinalPreselection(df):
    df['Preselection'] = df.apply(lambda x: True if x['Trigger']& x['LcMass'] &x['PIDCalib'] &
                                  x['MuCuts'] else False,axis=1)
    return df

In [8]:
def LoadBDTdf(dtype,polarity):
    df_list_bdt=[]
    ifname = filedir+'Data/Lb_'+dtype+'_'+polarity+'.root'
    bdtfname = ifname[0:-5]+'_MVA.root'
    if os.path.isfile(bdtfname):
        print('BDT file already created')
    else:
        print()
        print('>>>   Creating file with BDT variable')
        print()
        AddBDTinfo(ifname, 'tupleout/DecayTree', bdtfname, 'Data',
                   pickled_model_path = '../PIDGen_PIDCalib_MVA/xgb_reg.pkl')
    bdt_tree=uproot.open(bdtfname)['DecayTree']
    df_bdt= bdt_tree.arrays('bdt',outputtype=pd.DataFrame)
    return df_bdt

def MergeDataFrames(df,df_bdt):
    mergedDf = df.merge(df_bdt['bdt'],left_index=True, right_index=True)
    return mergedDf


In [9]:
BDTcut=0.7
def PassBDT(df,BDTcut):
    df['PassBDT'] = df.apply(lambda x: True if x['bdt']>BDTcut else False, axis=1)
    return df

def RemoveDDstar(df):
    df['NoDDstar'] = df.apply(lambda x: True if x['p_ProbNNp']- x['p_ProbNNk']>0 else False,axis=1)
    return df

def FinalSelection(df):
    df['FinalSel']= df.apply(lambda x: True if x['Preselection']&x['PassBDT']&x['NoDDstar'] else False, axis=1)
    return df

In [10]:
%%time

df = L0TriggerData(df)
df = HLT1TriggerData(df)
df = HLT2TriggerData(df,'DataSS')
df = TriggerData(df)

CPU times: user 1min 15s, sys: 2.96 s, total: 1min 18s
Wall time: 1min 17s


In [11]:
%%time
df = LcMassCut(df)

CPU times: user 10.9 s, sys: 712 ms, total: 11.6 s
Wall time: 11.6 s


In [12]:
%%time
df = ApplyPIDCalibCuts(df)
df.head()

CPU times: user 1min 4s, sys: 896 ms, total: 1min 5s
Wall time: 1min 5s


Unnamed: 0,Lb_L0Global_TIS,Lb_L0HadronDecision_TOS,Lc_Hlt1TrackMVADecision_TOS,Lc_Hlt1TwoTrackMVADecision_TOS,Lb_Hlt2XcMuXForTauB2XcMuDecision_TOS,Lb_Hlt2XcMuXForTauB2XcFakeMuDecision_TOS,Lc_M,p_ProbNNp,p_ProbNNk,mu_PIDmu,...,mu_ID,Lc_PX,Lc_PY,Lc_PZ,L0,HLT1,HLT2,Trigger,LcMass,PIDCalib
0,True,False,False,True,True,False,2293.895486,0.139599,0.240121,-6.083391,...,13,-1960.88,-2152.35,63309.98,True,True,True,True,True,True
1,False,False,True,True,True,False,2241.863511,0.630536,0.040897,7.032762,...,13,6361.34,-5894.45,75556.7,False,True,True,False,True,True
2,False,False,True,True,True,False,2272.953145,0.056319,0.993643,9.509515,...,-13,4257.29,5955.58,99383.79,False,True,True,False,True,True
3,True,False,True,True,True,False,2249.818104,0.021579,0.99931,2.28287,...,-13,7568.78,-2914.19,66231.56,True,True,True,True,True,True
4,False,False,True,True,True,False,2320.249556,0.495502,0.988775,5.231485,...,13,3292.81,1116.76,50034.2,False,True,True,False,True,True


In [13]:
%%time
df= ApplyMuCuts(df,'DataSS')
df.head()

CPU times: user 14.6 s, sys: 816 ms, total: 15.4 s
Wall time: 15.4 s


Unnamed: 0,Lb_L0Global_TIS,Lb_L0HadronDecision_TOS,Lc_Hlt1TrackMVADecision_TOS,Lc_Hlt1TwoTrackMVADecision_TOS,Lb_Hlt2XcMuXForTauB2XcMuDecision_TOS,Lb_Hlt2XcMuXForTauB2XcFakeMuDecision_TOS,Lc_M,p_ProbNNp,p_ProbNNk,mu_PIDmu,...,Lc_PX,Lc_PY,Lc_PZ,L0,HLT1,HLT2,Trigger,LcMass,PIDCalib,MuCuts
0,True,False,False,True,True,False,2293.895486,0.139599,0.240121,-6.083391,...,-1960.88,-2152.35,63309.98,True,True,True,True,True,True,False
1,False,False,True,True,True,False,2241.863511,0.630536,0.040897,7.032762,...,6361.34,-5894.45,75556.7,False,True,True,False,True,True,True
2,False,False,True,True,True,False,2272.953145,0.056319,0.993643,9.509515,...,4257.29,5955.58,99383.79,False,True,True,False,True,True,True
3,True,False,True,True,True,False,2249.818104,0.021579,0.99931,2.28287,...,7568.78,-2914.19,66231.56,True,True,True,True,True,True,True
4,False,False,True,True,True,False,2320.249556,0.495502,0.988775,5.231485,...,3292.81,1116.76,50034.2,False,True,True,False,True,True,True


In [14]:
%%time
df=GetFinalPreselection(df)
df.head()

CPU times: user 32.6 s, sys: 868 ms, total: 33.5 s
Wall time: 33.4 s


Unnamed: 0,Lb_L0Global_TIS,Lb_L0HadronDecision_TOS,Lc_Hlt1TrackMVADecision_TOS,Lc_Hlt1TwoTrackMVADecision_TOS,Lb_Hlt2XcMuXForTauB2XcMuDecision_TOS,Lb_Hlt2XcMuXForTauB2XcFakeMuDecision_TOS,Lc_M,p_ProbNNp,p_ProbNNk,mu_PIDmu,...,Lc_PY,Lc_PZ,L0,HLT1,HLT2,Trigger,LcMass,PIDCalib,MuCuts,Preselection
0,True,False,False,True,True,False,2293.895486,0.139599,0.240121,-6.083391,...,-2152.35,63309.98,True,True,True,True,True,True,False,False
1,False,False,True,True,True,False,2241.863511,0.630536,0.040897,7.032762,...,-5894.45,75556.7,False,True,True,False,True,True,True,False
2,False,False,True,True,True,False,2272.953145,0.056319,0.993643,9.509515,...,5955.58,99383.79,False,True,True,False,True,True,True,False
3,True,False,True,True,True,False,2249.818104,0.021579,0.99931,2.28287,...,-2914.19,66231.56,True,True,True,True,True,True,True,True
4,False,False,True,True,True,False,2320.249556,0.495502,0.988775,5.231485,...,1116.76,50034.2,False,True,True,False,True,True,True,False


In [15]:
df_bdt=LoadBDTdf('DataSS','MagUp')
mergedDf = df.merge(df_bdt,left_index=True, right_index=True)

BDT file already created


In [16]:
%%time
mergedDf=PassBDT(mergedDf,BDTcut)

CPU times: user 11.2 s, sys: 733 ms, total: 12 s
Wall time: 12 s


In [17]:
%%time
mergedDf= RemoveDDstar(mergedDf)
mergedDf = FinalSelection(mergedDf)

CPU times: user 45.8 s, sys: 1.6 s, total: 47.4 s
Wall time: 47.4 s


In [20]:
mergedDf.head()


Unnamed: 0,Lb_L0Global_TIS,Lb_L0HadronDecision_TOS,Lc_Hlt1TrackMVADecision_TOS,Lc_Hlt1TwoTrackMVADecision_TOS,Lb_Hlt2XcMuXForTauB2XcMuDecision_TOS,Lb_Hlt2XcMuXForTauB2XcFakeMuDecision_TOS,Lc_M,p_ProbNNp,p_ProbNNk,mu_PIDmu,...,HLT2,Trigger,LcMass,PIDCalib,MuCuts,Preselection,bdt,PassBDT,NoDDstar,FinalSel
0,True,False,False,True,True,False,2293.895486,0.139599,0.240121,-6.083391,...,True,True,True,True,False,False,0.00116,False,False,False
1,False,False,True,True,True,False,2241.863511,0.630536,0.040897,7.032762,...,True,False,True,True,True,False,0.273441,False,True,False
2,False,False,True,True,True,False,2272.953145,0.056319,0.993643,9.509515,...,True,False,True,True,True,False,0.011024,False,False,False
3,True,False,True,True,True,False,2249.818104,0.021579,0.99931,2.28287,...,True,True,True,True,True,True,0.008728,False,False,False
4,False,False,True,True,True,False,2320.249556,0.495502,0.988775,5.231485,...,True,False,True,True,True,False,0.223288,False,False,False


In [13]:
%%time
ISOBDTcut =0.35
ISOBDT2cut=0.2

#Masses pi, K, p, mu, Lc
m_pi = 139.57018 #+/- 0.00035 MeV (PDG)
m_K = 493.677 #+/- 0.016 MeV (PDG)
m_p = 938.272081 #+/- 0.000006 MeV (PDG)
m_mu = 105.6583745 #+/- 0.0000024 MeV (PDG)
m_Lc = 2286.46 #+/- 0.14 MeV (PDG)

df2 = mergedDf.copy()
df2.shape
#df1 = df1[df1.Lb_ISOLATION_BDT>ISOBDTcut & df1.Lb_ISOLATION_BDT2>ISOBDT2cut]
df2['KenrCUTS'] = df2.apply(lambda x: True if (x.Lb_ISOLATION_BDT>ISOBDTcut) &(x.Lb_ISOLATION_BDT2>ISOBDT2cut) else False,axis=1) 
df2 = df2[df2['KenrCUTS']==True]
df2.shape

CPU times: user 16.1 s, sys: 1.36 s, total: 17.4 s
Wall time: 17.4 s


(276550, 91)

In [15]:
%%time
ISOBDTcut =0.35
ISOBDT2cut=0.2

#Masses pi, K, p, mu, Lc
m_pi = 139.57018 #+/- 0.00035 MeV (PDG)
m_K = 493.677 #+/- 0.016 MeV (PDG)
m_p = 938.272081 #+/- 0.000006 MeV (PDG)
m_mu = 105.6583745 #+/- 0.0000024 MeV (PDG)
m_Lc = 2286.46 #+/- 0.14 MeV (PDG)

df1 = mergedDf.copy()
df1.shape
df1 = df1[(df1.Lb_ISOLATION_BDT>ISOBDTcut) & (df1.Lb_ISOLATION_BDT2>ISOBDT2cut)]
#df1['KenrCUTS'] = df1.apply(lambda x: True if (x.Lb_ISOLATION_BDT>ISOBDTcut) &(x.Lb_ISOLATION_BDT2>ISOBDT2cut) else False,axis=1) 
#df1 = df1[df1['KenrCUTS']==True]
df1.shape

CPU times: user 73.4 ms, sys: 55.9 ms, total: 129 ms
Wall time: 128 ms


(276550, 90)

In [38]:
df1['E1pi'] = np.sqrt(df1["Lb_ISOLATION_PX"]**2 + df1['Lb_ISOLATION_PY']**2 + df1['Lb_ISOLATION_PZ']**2 + m_pi**2)
df1['E2pi'] = np.sqrt(df1["Lb_ISOLATION_PX2"]**2 + df1['Lb_ISOLATION_PY2']**2 + df1['Lb_ISOLATION_PZ2']**2 + m_pi**2)
df1['ELc'] =np.sqrt(df1['Lc_PX']**2+df1['Lc_PY']**2+df1['Lc_PZ']**2 + m_Lc**2)
df1['pLc12_x'] = df1['Lc_PX']+df1["Lb_ISOLATION_PX"]+df1["Lb_ISOLATION_PX2"]
df1['pLc12_y'] = df1['Lc_PY']+df1["Lb_ISOLATION_PY"]+df1["Lb_ISOLATION_PY2"]
df1['pLc12_z'] = df1['Lc_PZ']+df1["Lb_ISOLATION_PZ"]+df1["Lb_ISOLATION_PZ2"]
df1['mLc12'] = np.sqrt((df1['ELc']+df1['E1pi']+df1['E2pi'])**2 - 
                       (df1['pLc12_x']**2+df1['pLc12_y']**2+df1['pLc12_z']**2))
#df1=df1.drop(columns = ['pLc12_x','pLc12_y','pLc12_z'])
df1['muCharge'] = -df1['mu_ID']/13
df1['PIDdiff'] =df1['Lb_ISOLATION_PIDp'] - df1['Lb_ISOLATION_PIDK']
df1['PIDdiff2'] =df1['Lb_ISOLATION_PIDp2'] - df1['Lb_ISOLATION_PIDK2']
df1.head()

Unnamed: 0,Lb_L0Global_TIS,Lb_L0HadronDecision_TOS,Lc_Hlt1TrackMVADecision_TOS,Lc_Hlt1TwoTrackMVADecision_TOS,Lb_Hlt2XcMuXForTauB2XcMuDecision_TOS,Lb_Hlt2XcMuXForTauB2XcFakeMuDecision_TOS,Lc_M,p_ProbNNp,p_ProbNNk,mu_PIDmu,...,E1pi,E2pi,ELc,pLc12_x,pLc12_y,pLc12_z,mLc12,muCharge,PIDdiff,PIDdiff2
0,True,False,False,True,True,False,2293.895486,0.139599,0.240121,-6.083391,...,16659.644889,2399.802145,63418.129331,-3008.129512,-3303.988489,82296.480244,3142.412989,-1.0,-11.001801,0.0
1,False,False,True,True,True,False,2241.863511,0.630536,0.040897,7.032762,...,2066.897756,3289.552224,76087.147414,6859.140339,-6039.880641,80860.907397,3323.439045,-1.0,0.0,0.0
2,False,False,True,True,True,False,2272.953145,0.056319,0.993643,9.509515,...,23152.791794,5227.833187,99679.281024,5654.690391,7355.009611,127669.491172,3710.641454,1.0,6.405901,0.0
3,True,False,True,True,True,False,2249.818104,0.021579,0.99931,2.28287,...,10224.203042,2382.291929,66765.457936,9119.744386,-2973.382207,78728.390811,3120.360425,1.0,0.604401,0.0
4,False,False,True,True,True,False,2320.249556,0.495502,0.988775,5.231485,...,6988.733779,8329.539713,50206.959872,3750.962939,1920.979421,65302.244922,3378.33253,-1.0,0.0,0.0


In [39]:
def SetMassParticles(df):
    df['m1'] = df.apply(lambda x: m_K if (x.Lb_ISOLATION_PIDK>4.)&((x.Lb_ISOLATION_CHARGE==-x.muCharge)|((x.Lb_ISOLATION_CHARGE==x.muCharge) & (x.PIDdiff<0))) else m_pi, axis=1)
    df['m2'] = df.apply(lambda x: m_K if (x.Lb_ISOLATION_PIDK2>4.)&((x.Lb_ISOLATION_CHARGE2==-x.muCharge)|((x.Lb_ISOLATION_CHARGE2==x.muCharge) & (x.PIDdiff2<0))) else m_pi, axis=1)
    return df

df1 = SetMassParticles(df1)
df1

Unnamed: 0,Lb_L0Global_TIS,Lb_L0HadronDecision_TOS,Lc_Hlt1TrackMVADecision_TOS,Lc_Hlt1TwoTrackMVADecision_TOS,Lb_Hlt2XcMuXForTauB2XcMuDecision_TOS,Lb_Hlt2XcMuXForTauB2XcFakeMuDecision_TOS,Lc_M,p_ProbNNp,p_ProbNNk,mu_PIDmu,...,ELc,pLc12_x,pLc12_y,pLc12_z,mLc12,muCharge,PIDdiff,PIDdiff2,m1,m2
0,True,False,False,True,True,False,2293.895486,0.139599,0.240121,-6.083391,...,63418.129331,-3008.129512,-3303.988489,82296.480244,3142.412989,-1.0,-11.001801,0.0000,493.67700,139.57018
1,False,False,True,True,True,False,2241.863511,0.630536,0.040897,7.032762,...,76087.147414,6859.140339,-6039.880641,80860.907397,3323.439045,-1.0,0.000000,0.0000,139.57018,139.57018
2,False,False,True,True,True,False,2272.953145,0.056319,0.993643,9.509515,...,99679.281024,5654.690391,7355.009611,127669.491172,3710.641454,1.0,6.405901,0.0000,139.57018,139.57018
3,True,False,True,True,True,False,2249.818104,0.021579,0.999310,2.282870,...,66765.457936,9119.744386,-2973.382207,78728.390811,3120.360425,1.0,0.604401,0.0000,139.57018,139.57018
4,False,False,True,True,True,False,2320.249556,0.495502,0.988775,5.231485,...,50206.959872,3750.962939,1920.979421,65302.244922,3378.332530,-1.0,0.000000,0.0000,139.57018,139.57018
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
499995,True,False,True,True,True,False,2271.636965,0.042828,0.998249,5.800219,...,47013.637329,-3449.663882,1475.164274,57350.159609,2992.718587,-1.0,0.000000,0.0000,139.57018,493.67700
499996,False,False,False,False,True,False,2328.887717,0.029709,0.090516,5.794516,...,70751.241099,5506.154175,587.790264,143349.220547,3431.024178,-1.0,-19.608700,2.2383,139.57018,139.57018
499997,False,False,False,True,True,False,2273.122082,0.014471,0.025393,-1.795917,...,310613.480778,11160.146364,4611.143941,325199.409160,3646.042854,-1.0,0.000000,0.0000,139.57018,139.57018
499998,True,False,False,False,True,False,2233.107924,0.285419,0.771195,4.822522,...,38795.679467,-897.460435,-2868.937405,54391.332656,3678.361520,-1.0,0.000000,0.0000,139.57018,139.57018


In [45]:
df1['E1'] = np.sqrt(df1["Lb_ISOLATION_PX"]**2 + df1['Lb_ISOLATION_PY']**2 + df1['Lb_ISOLATION_PZ']**2 +df1['m1']**2)
df1['E2'] = np.sqrt(df1["Lb_ISOLATION_PX2"]**2 + df1['Lb_ISOLATION_PY2']**2 + df1['Lb_ISOLATION_PZ2']**2 +df1['m2']**2)
df1['Emu'] = np.sqrt(df1.mu_PX**2 + df1.mu_PY**2 + df1.mu_PZ**2 +m_mu**2)
df1['ETOT'] = df1['ELc']+df1['E1']+df1['E2']+df1['Emu']
df1['pTOT_x'] = df1['pLc12_x']+df1.mu_PX
df1['pTOT_y'] = df1['pLc12_y']+df1.mu_PY
df1['pTOT_z'] = df1['pLc12_z']+df1.mu_PZ
df1['mTOT'] = np.sqrt(df1.ETOT**2 -(df1.pTOT_x**2+df1.pTOT_y**2+df1.pTOT_z**2))

df1

Unnamed: 0,Lb_L0Global_TIS,Lb_L0HadronDecision_TOS,Lc_Hlt1TrackMVADecision_TOS,Lc_Hlt1TwoTrackMVADecision_TOS,Lb_Hlt2XcMuXForTauB2XcMuDecision_TOS,Lb_Hlt2XcMuXForTauB2XcFakeMuDecision_TOS,Lc_M,p_ProbNNp,p_ProbNNk,mu_PIDmu,...,E1,E2,Emu,ETOT,pTOT_x,pTOT_y,pTOT_z,mTOT,KenrCUTs,KenrCUTS
0,True,False,False,True,True,False,2293.895486,0.139599,0.240121,-6.083391,...,16666.373480,2399.802145,4336.872178,86821.177134,-3209.319512,-3145.678489,86624.500244,3730.647590,True,True
1,False,False,True,True,True,False,2241.863511,0.630536,0.040897,7.032762,...,2066.897756,3289.552224,16626.745290,98070.342685,7988.690339,-7158.880641,97411.117397,3714.508889,False,False
2,False,False,True,True,True,False,2272.953145,0.056319,0.993643,9.509515,...,23152.791794,5227.833187,5558.862276,133618.768280,5597.310391,7559.729611,133223.281172,4129.551439,True,True
3,True,False,True,True,True,False,2249.818104,0.021579,0.999310,2.282870,...,10224.203042,2382.291929,4767.006392,84138.959299,9241.974386,-2897.922207,83492.060811,3824.684284,True,True
4,False,False,True,True,True,False,2320.249556,0.495502,0.988775,5.231485,...,6988.733779,8329.539713,29891.863090,95417.096454,5539.842939,1633.989421,95138.964922,4431.689072,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
499995,True,False,True,True,True,False,2271.636965,0.042828,0.998249,5.800219,...,4867.960953,5688.761287,11537.900510,69108.260078,-4362.363882,1936.574274,68842.159609,3732.041368,True,True
499996,False,False,False,False,True,False,2328.887717,0.029709,0.090516,5.794516,...,48806.940079,23938.976249,40218.157179,183715.314605,7252.844175,185.830264,183527.280547,4051.645847,True,True
499997,False,False,False,True,True,False,2273.122082,0.014471,0.025393,-1.795917,...,11138.744267,3691.719225,9303.608464,334747.552734,10485.046364,4898.823941,334473.429160,7036.659268,False,False
499998,True,False,False,False,True,False,2233.107924,0.285419,0.771195,4.822522,...,7901.352785,7901.352785,6624.837847,61223.222884,-715.960435,-3674.367405,60963.672656,4207.144095,False,False


In [26]:
df1[['m1']][(df1.Lb_ISOLATION_CHARGE==-df1.muCharge)|((df1.Lb_ISOLATION_CHARGE==df1.muCharge) & df1.PIDdiff<0)]=m_K

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.loc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_array(key, value)


In [50]:
df1['isKenriched'] = df1.apply(lambda x: True if (x.KenrCUTS) & (x.mLc12>2700) & (x.mTOT<5620) & ((x.m1==m_K) | (x.m2==m_K)) else False,axis=1)
df1=df1.drop(df1.loc[:, 'E1pi':'KenrCUTS'].columns,axis=1)
df1

Unnamed: 0,Lb_L0Global_TIS,Lb_L0HadronDecision_TOS,Lc_Hlt1TrackMVADecision_TOS,Lc_Hlt1TwoTrackMVADecision_TOS,Lb_Hlt2XcMuXForTauB2XcMuDecision_TOS,Lb_Hlt2XcMuXForTauB2XcFakeMuDecision_TOS,Lc_M,p_ProbNNp,p_ProbNNk,mu_PIDmu,...,Trigger,LcMass,PIDCalib,MuCuts,Preselection,bdt,PassBDT,NoDDstar,FinalSel,isKenriched
0,True,False,False,True,True,False,2293.895486,0.139599,0.240121,-6.083391,...,True,True,True,False,False,0.001160,False,False,False,True
1,False,False,True,True,True,False,2241.863511,0.630536,0.040897,7.032762,...,False,True,True,True,False,0.273441,False,True,False,False
2,False,False,True,True,True,False,2272.953145,0.056319,0.993643,9.509515,...,False,True,True,True,False,0.011024,False,False,False,False
3,True,False,True,True,True,False,2249.818104,0.021579,0.999310,2.282870,...,True,True,True,True,True,0.008728,False,False,False,False
4,False,False,True,True,True,False,2320.249556,0.495502,0.988775,5.231485,...,False,True,True,True,False,0.223288,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
499995,True,False,True,True,True,False,2271.636965,0.042828,0.998249,5.800219,...,True,True,True,True,True,0.011298,False,False,False,True
499996,False,False,False,False,True,False,2328.887717,0.029709,0.090516,5.794516,...,False,True,True,True,False,0.005908,False,False,False,False
499997,False,False,False,True,True,False,2273.122082,0.014471,0.025393,-1.795917,...,False,True,False,False,False,0.003769,False,False,False,False
499998,True,False,False,False,True,False,2233.107924,0.285419,0.771195,4.822522,...,False,True,True,True,False,0.001157,False,False,False,False


In [51]:
def CheckIfIsIsolated(df):
    df['isIsolated'] = df.apply(lambda x: True if x['Lb_ISOLATION_BDT']<ISOBDTcut
                                else False, axis=1)
    return df

df1 = CheckIfIsIsolated(df1)
df1

Unnamed: 0,Lb_L0Global_TIS,Lb_L0HadronDecision_TOS,Lc_Hlt1TrackMVADecision_TOS,Lc_Hlt1TwoTrackMVADecision_TOS,Lb_Hlt2XcMuXForTauB2XcMuDecision_TOS,Lb_Hlt2XcMuXForTauB2XcFakeMuDecision_TOS,Lc_M,p_ProbNNp,p_ProbNNk,mu_PIDmu,...,LcMass,PIDCalib,MuCuts,Preselection,bdt,PassBDT,NoDDstar,FinalSel,isKenriched,isIsolated
0,True,False,False,True,True,False,2293.895486,0.139599,0.240121,-6.083391,...,True,True,False,False,0.001160,False,False,False,True,False
1,False,False,True,True,True,False,2241.863511,0.630536,0.040897,7.032762,...,True,True,True,False,0.273441,False,True,False,False,True
2,False,False,True,True,True,False,2272.953145,0.056319,0.993643,9.509515,...,True,True,True,False,0.011024,False,False,False,False,False
3,True,False,True,True,True,False,2249.818104,0.021579,0.999310,2.282870,...,True,True,True,True,0.008728,False,False,False,False,False
4,False,False,True,True,True,False,2320.249556,0.495502,0.988775,5.231485,...,True,True,True,False,0.223288,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
499995,True,False,True,True,True,False,2271.636965,0.042828,0.998249,5.800219,...,True,True,True,True,0.011298,False,False,False,True,False
499996,False,False,False,False,True,False,2328.887717,0.029709,0.090516,5.794516,...,True,True,True,False,0.005908,False,False,False,False,False
499997,False,False,False,True,True,False,2273.122082,0.014471,0.025393,-1.795917,...,True,False,False,False,0.003769,False,False,False,False,True
499998,True,False,False,False,True,False,2233.107924,0.285419,0.771195,4.822522,...,True,True,True,False,0.001157,False,False,False,False,False


In [22]:
df10 = df_list[0].copy()

In [23]:
%%time
def L0TriggerData1(df):
    df['L0'] = False
    df['L0'][df.Lb_L0Global_TIS | df.Lb_L0HadronDecision_TOS] = True
    return df
df10 = L0TriggerData1(df10)
df10.head()

CPU times: user 4.2 ms, sys: 3.9 ms, total: 8.1 ms
Wall time: 7 ms


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,Lb_L0Global_TIS,Lb_L0HadronDecision_TOS,Lc_Hlt1TrackMVADecision_TOS,Lc_Hlt1TwoTrackMVADecision_TOS,Lb_Hlt2XcMuXForTauB2XcMuDecision_TOS,Lb_Hlt2XcMuXForTauB2XcFakeMuDecision_TOS,Lc_M,p_ProbNNp,p_ProbNNk,mu_PIDmu,...,Lc_PY,Lc_PZ,L0,HLT1,HLT2,Trigger,LcMass,PIDCalib,MuCuts,Preselection
0,True,False,False,True,True,False,2293.895486,0.139599,0.240121,-6.083391,...,-2152.35,63309.98,True,True,True,True,True,True,False,False
1,False,False,True,True,True,False,2241.863511,0.630536,0.040897,7.032762,...,-5894.45,75556.7,False,True,True,False,True,True,True,False
2,False,False,True,True,True,False,2272.953145,0.056319,0.993643,9.509515,...,5955.58,99383.79,False,True,True,False,True,True,True,False
3,True,False,True,True,True,False,2249.818104,0.021579,0.99931,2.28287,...,-2914.19,66231.56,True,True,True,True,True,True,True,True
4,False,False,True,True,True,False,2320.249556,0.495502,0.988775,5.231485,...,1116.76,50034.2,False,True,True,False,True,True,True,False


In [None]:
def L0TriggerData(df):
    check = lambda x,y: True if (x | y) else False
    df['L0'] = df.apply(lambda x: check(x['Lb_L0Global_TIS'],x['Lb_L0HadronDecision_TOS']),axis=1)
    return df

In [None]:
def HLT1TriggerData(df):
    check = lambda x,y: True if (x | y) else False
    df['HLT1'] = df.apply(lambda x: check(x['Lc_Hlt1TrackMVADecision_TOS'],x['Lc_Hlt1TwoTrackMVADecision_TOS']),axis=1)
    return df

def HLT2TriggerData(df,dtype):
    if dtype =='Data' or dtype == 'DataSS':
        df['HLT2'] = df.apply((lambda x: True if x['Lb_Hlt2XcMuXForTauB2XcMuDecision_TOS'] else False),axis=1)
    if dtype =='FakeMu' or dtype=='FakeMuSS':
        df['HLT2'] = df.apply((lambda x: True if x['Lb_Hlt2XcMuXForTauB2XcFakeMuDecision_TOS'] else False),axis=1)
    return df

def TriggerData(df):
    check = lambda x,y,z: True if (x & y & z) else False
    df['Trigger'] = df.apply(lambda x: check(x['L0'],x['HLT1'],x['HLT2']),axis=1)
    return df