# Dask investigation: comparing uproot vs dask_histograms

### This is a Jupyter notebook about comparing histograms created via uproot (current ntuple package) and the new implementation (dask).

## 1. Import packages

In [None]:
import ROOT
ROOT.enableJSVis()
import sys
sys.path.insert(0, '..')

sys.path.insert(1, '../python/')
import l1THistos

## 2. Load a histogram file created with uproot

In [None]:
my_file_uproot = ROOT.TFile('/afs/cern.ch/user/g/gsvedas/gintautas_fast/ntuple-analysis/plots/histos_doubleele_flat1to100_PU200_egplots_v160A.102Ei_uproot_test.root')

In [None]:
my_file_uproot.ls()

In [None]:
my_file_uproot.GetDirectory('EGHistos')

In [None]:
my_file_uproot.ls()

In [None]:
hist_uproot = my_file_uproot.Get("EGHistos/TkEmL2_all_nomatch_pt")

In [None]:
hist_uproot.GetEntries()

In [None]:
type(hist_uproot)

In [None]:
dir(my_file_uproot)

In [None]:
for h in my_file_uproot.GetListOfKeys():
    print(h.GetName())

In [None]:
lhist_uproot = l1THistos.EGHistos('TkEmL2_all_nomatch', my_file_uproot)
lhist_pt2_uproot = l1THistos.EGHistos('TkEmL2_Pt2_nomatch', my_file_uproot)

In [None]:
can = ROOT.TCanvas()
can.cd()
lhist_uproot.h_pt.Draw()
lhist_pt2_uproot.h_pt.SetLineColor(2)
lhist_pt2_uproot.h_pt.Draw('same')

can.Draw()

In [None]:
can = ROOT.TCanvas()
can.cd()
hist_uproot.Draw()
can.Draw()

## 3. Load a histogram file created with dask

In [None]:
my_file_dask = ROOT.TFile('/afs/cern.ch/user/g/gsvedas/gintautas_fast/ntuple-analysis/plots/histos_doubleele_flat1to100_PU200_egplots_v160A.102Ei_dask_single_file.root')

### 3.1 Load a histogram file created with dask + with the option -n -1

In [None]:
my_file_dask = ROOT.TFile('/afs/cern.ch/user/g/gsvedas/gintautas_fast/ntuple-analysis/plots/histos_doubleele_flat1to100_PU200_egplots_v160A.102Ei_n_minus.root')

In [None]:
my_file_dask.ls()

In [None]:
my_file_dask.GetDirectory('EGHistos')

In [None]:
my_file_dask.ls()

In [None]:
hist_dask = my_file_dask.Get("EGHistos/TkEmL2_all_nomatch_pt")

In [None]:
type(hist_dask_dir)

In [None]:
hist_dask.GetEntries()

In [None]:
for h in hist_dask.GetListOfKeys():
    print(h.GetName())

In [None]:
dir(hist_dask)

In [None]:
lhist_dask = l1THistos.EGHistos('TkEmL2_all_nomatch', my_file_dask)
lhist_pt2_dask = l1THistos.EGHistos('TkEmL2_Pt2_nomatch', my_file_dask)

In [None]:
can = ROOT.TCanvas()
can.cd()
hist_dask.Draw()
can.Draw()

In [None]:
can = ROOT.TCanvas()
can.cd()
lhist_dask.h_pt.Draw()
lhist_pt2_dask.h_pt.SetLineColor(2)
lhist_pt2_dask.h_pt.Draw('same')

can.Draw()

## 4. Comparing charts side-by-side

### uproot

In [None]:
lhist_uproot_other = l1THistos.EGHistos('TkEmL2_Pt5EtaEE_nomatch', my_file_uproot)
lhist_pt2_uproot_other = l1THistos.EGHistos('TkEmL2_Pt5EtaEE_nomatch', my_file_uproot)

### dask

In [None]:
lhist_dask_other = l1THistos.EGHistos('TkEmL2_Pt5EtaEE_nomatch', my_file_dask)
lhist_pt2_dask_other = l1THistos.EGHistos('TkEmL2_Pt5EtaEE_nomatch', my_file_dask)

### plot histograms

In [None]:
can = ROOT.TCanvas()
can.cd()
lhist_uproot_other.h_pt.Draw()
lhist_pt2_uproot_other.h_pt.SetLineColor(2)
lhist_pt2_uproot_other.h_pt.Draw('same')

can.Draw()

In [None]:
can = ROOT.TCanvas()
can.cd()
lhist_dask_other.h_pt.Draw()
lhist_pt2_dask_other.h_pt.SetLineColor(2)
lhist_pt2_dask_other.h_pt.Draw('same')

can.Draw()