In [None]:
import uproot
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from scipy import stats

In [None]:
fName = "root://cmseos.fnal.gov//store/user/dnoonan/HGCAL_Concentrator/IsolationStudies/v3/fastJetClustering_VBF_200PU_1of10.root"

_tree = uproot.open(fName,xrootdsource=dict(chunkbytes=1024**3, limitbytes=1024**3))["jetTree"]

df = _tree.pandas.df("jet*")
dfGen = _tree.pandas.df(["genJetPt","genJetEta"])


In [None]:
# maxN = 0
# for i in range(1,11):
#     print(i,maxN)
#     fName = "root://cmseos.fnal.gov//store/user/dnoonan/HGCAL_Concentrator/IsolationStudies/v3/fastJetClustering_VBF_200PU_%iof10.root"%i

#     _tree = uproot.open(fName,xrootdsource=dict(chunkbytes=1024**3, limitbytes=1024**3))["jetTree"]

#     this_df = _tree.pandas.df("jet*")
#     this_dfGen = _tree.pandas.df(["genJetPt","genJetEta"])

#     this_df.reset_index(inplace=True)
#     this_dfGen.reset_index(inplace=True)
#     this_df.entry += maxN
#     this_dfGen.entry += maxN
#     this_df.set_index(['entry','subentry'],inplace=True)
#     this_dfGen.set_index(['entry','subentry'],inplace=True)
    
#     if i==1:
#         df = this_df.copy()        
#         dfGen = this_dfGen.copy()
#     else:
#         df = pd.concat([df,this_df])
#         dfGen = pd.concat([dfGen,this_dfGen])
#     maxN = df.index.levels[0].max()+1


In [None]:
df['VBF'] = df.jetGenJetMatch>-1
df['PU'] = (df.jetGenJetMatch==-1) & (df.jetMinGenJetDR>0.5)

In [None]:
import numba


@numba.vectorize([numba.float64(numba.int64,numba.int64)])
def getGenJetPt(entry, genIndex):
    global dfGen
    if genIndex==-1:
        return -1.
    else:
        return dfGen.loc[entry,genIndex].genJetPt

@numba.vectorize([numba.float64(numba.int64,numba.int64)])
def getGenJetEta(entry, genIndex):
    global dfGen
    if genIndex==-1:
        return -1.
    else:
        return dfGen.loc[entry,genIndex].genJetEta


In [None]:
df.reset_index(inplace=True)
df['genJetPt'] = getGenJetPt(df.entry,df.jetGenJetMatch)
df.set_index(['entry','subentry'],inplace=True)

In [None]:
df['jetPt_PUcorr'] = df.jetPt - (df.jetPtR05 + df.jetPtR06)/1.25

In [None]:
sel = df.VBF & (abs(df.jetEta)>1.9) & (abs(df.jetEta)<2.6)

x = df.genJetPt[sel]
y = df.jetPt[sel]#-df.genJetPt[sel]

yi = y[x>0]
xi = x[x>0]

slope, intercept, r_value, p_value, std_err = stats.linregress(xi,yi)

print(slope,intercept)

plt.plot(x,y,'o',markersize=1.5)
plt.plot(x, slope*x+intercept,'k')

plt.text(0,y.max()*.9,s="y= %.4f x  + %.4f"%(slope,intercept))
plt.xlabel("Gen Jet Pt")
plt.ylabel("Reco Jet Pt")

In [None]:
#sns.regplot(x=df.genJetPt[df.VBF],y=df.jetPt[df.VBF],x_bins=np.arange(0,700,10))