Skip to content

Commit

Permalink
add spinner and dropdown menu
Browse files Browse the repository at this point in the history
  • Loading branch information
mchiusi committed Oct 3, 2023
1 parent 7147413 commit 518c706
Show file tree
Hide file tree
Showing 21 changed files with 415 additions and 428 deletions.
54 changes: 26 additions & 28 deletions bye_splits/data_handle/data_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,10 @@

import bye_splits
from bye_splits.utils import common

from utils import params
from data_handle.geometry import GeometryData
from data_handle.event import EventData
from data_handle.data_input import InputData
from plotly.express.colors import qualitative

def baseline_selection(df_gen, df_cl, sel, **kw):
data = pd.merge(left=df_gen, right=df_cl, how='inner', on='event')
Expand Down Expand Up @@ -72,14 +70,14 @@ def baseline_selection(df_gen, df_cl, sel, **kw):
print("The baseline selection has a {}% efficiency: {}/{}".format(np.round(eff,2), nout, nin))
return data

def get_data_reco_chain_start(nevents=500, reprocess=False, tag='chain', particles='photons', PU='PU0', event=None):
def get_data_reco_chain_start(nevents=500, reprocess=False, tag='chain', particles='photons', pu=0, event=None):
"""Access event data."""
data_particle = EventDataParticle(particles, PU, tag, reprocess, debug=True)
data_particle = EventDataParticle(particles, pu, tag, reprocess)
if event is None:
ds_all, events = data_particle.provide_random_events(n=nevents)
# ds_all = data_particle.provide_events(events=[170004, 170015, 170017, 170014])
else:
ds_all = data_particle.provide_event(event,merge=False)
ds_all = data_particle.provide_event(event, merge=False)
events = event

if ds_all["gen"].empty:
Expand All @@ -89,17 +87,17 @@ def get_data_reco_chain_start(nevents=500, reprocess=False, tag='chain', particl
"event": "event",
"good_tc_waferu": "tc_wu",
"good_tc_waferv": "tc_wv",
"good_tc_cellu": "tc_cu",
"good_tc_cellv": "tc_cv",
"good_tc_layer": "tc_layer",
"good_tc_pt": "tc_pt",
"good_tc_mipPt": "tc_mipPt",
"good_tc_cellu" : "tc_cu",
"good_tc_cellv" : "tc_cv",
"good_tc_layer" : "tc_layer",
"good_tc_pt" : "tc_pt",
"good_tc_mipPt" : "tc_mipPt",
"good_tc_energy": "tc_energy",
"good_tc_x": "tc_x",
"good_tc_y": "tc_y",
"good_tc_z": "tc_z",
"good_tc_eta": "tc_eta",
"good_tc_phi": "tc_phi",
"good_tc_x" : "tc_x",
"good_tc_y" : "tc_y",
"good_tc_z" : "tc_z",
"good_tc_eta" : "tc_eta",
"good_tc_phi" : "tc_phi",
}

ds_tc = ds_all["tc"]
Expand All @@ -108,41 +106,41 @@ def get_data_reco_chain_start(nevents=500, reprocess=False, tag='chain', particl

gen_keep = {
"event": "event",
"good_genpart_exeta": "gen_eta",
"good_genpart_exphi": "gen_phi",
"good_genpart_exeta" : "gen_eta",
"good_genpart_exphi" : "gen_phi",
"good_genpart_energy": "gen_en",
"good_genpart_pt" : "gen_pt",
}
ds_gen = ds_all["gen"]
ds_gen = ds_gen.rename(columns=gen_keep)

cl_keep = {
"event": "event",
"good_cl3d_eta": "cl3d_eta",
"good_cl3d_phi": "cl3d_phi",
"good_cl3d_id": "cl3d_id",
"good_cl3d_eta" : "cl3d_eta",
"good_cl3d_phi" : "cl3d_phi",
"good_cl3d_id" : "cl3d_id",
"good_cl3d_energy": "cl3d_en",
"good_cl3d_pt": "cl3d_pt",
"good_cl3d_pt" : "cl3d_pt",
}
ds_cl = ds_all["cl"]
ds_cl = ds_cl.rename(columns=cl_keep)
return ds_gen, ds_cl, ds_tc

def EventDataParticle(particles, PU, tag, reprocess, debug=False, logger=None):
def EventDataParticle(particles, pu, tag, reprocess, logger=None):
"""Factory for EventData instances of different particle types"""
with open(params.CfgPath, "r") as afile:
cfg = yaml.safe_load(afile)
if particles is None:
particles = cfg["selection"]["particles"]
if particles not in ("photons", "electrons", "pions"):
raise ValueError("{} are not supported.".format(particles))
defevents = cfg["defaultEvents"][PU][particles]
defevents = cfg["defaultEvents"][f"PU{pu}"][particles]

indata = InputData()
indata.path = cfg["io"][PU][particles]["file"]
indata.adir = cfg["io"][PU][particles]["dir"]
indata.tree = cfg["io"][PU][particles]["tree"]
indata.path = cfg["io"][f"PU{pu}"][particles]["file"]
indata.adir = cfg["io"][f"PU{pu}"][particles]["dir"]
indata.tree = cfg["io"][f"PU{pu}"][particles]["tree"]

tag = particles + "_" + PU + "_" + tag
tag += "_debug" * debug
tag = particles + "_" + f"PU{pu}" + "_" + tag

return EventData(indata, tag, defevents, reprocess, logger)
12 changes: 8 additions & 4 deletions bye_splits/data_handle/event.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,17 +160,21 @@ def select(self):
total_events = tree.num_entries
allvars = set([y for x in self.var.values() for y in x.values()])

threshold_size = 0.1
threshold_size_bytes = threshold_size * 8e+9
threshold_size_bytes = 1e+9 # 1 gigabyte
data = ak.Array([])
for array in tree.iterate(filter_name='/' + '|'.join(allvars) + '/', step_size='20 MB', library='ak'):
if (data.layout.nbytes + array.layout.nbytes) <= threshold_size_bytes:
data = ak.concatenate([data, array], axis=0)
else:
break

if len(data)/total_events < 0.1:
print('[WARNING] Function select() in event.py \nThe number of events in the Parquet file is less than 10% compared to the events in the ROOT file.')
threshold = 0.1
try:
if len(data) / total_events < threshold:
print(f'[WARNING] Function select() in event.py\nThe number of events in the Parquet file is less than {threshold * 100}% compared to the events in the ROOT file.')
except ZeroDivisionError:
print("The input file is empty.")

#data = tree.arrays(filter_name='/' + '|'.join(allvars) + '/', entry_stop=5000, library='ak')
# data[self.var.v] = data.waferv
# data[self.newvar.vs] = -1 * data.waferv
Expand Down
6 changes: 3 additions & 3 deletions bye_splits/data_handle/geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,8 +302,8 @@ def _display_plotly_sci_cartesian(self, df):
return df.drop(columns=['rmin', 'rmax', 'phimin', 'phimax'])

def cil2cart(self, r, phi):
x,y = r*np.cos(np.pi/2-phi), r*np.sin(np.pi/2-phi)
return x,y
x, y = r*np.cos(np.pi/2-phi), r*np.sin(np.pi/2-phi)
return x, y

def filter_columns(self, d):
"""Filter some columns to reduce memory usage"""
Expand All @@ -326,7 +326,7 @@ def prepare_for_display(self, df):
"""Prepares dataframe to be displayed by certain libraries."""
libraries = ('bokeh', 'plotly')
if self.library not in libraries:
raise NotImplementedError()
raise NotImplementedError(f"'{self.library}' library is not supported. Supported libraries: {', '.join(libraries)}")
if self.is_tc: df = self._display_trigger_cells(df)
return df

Expand Down
40 changes: 20 additions & 20 deletions bye_splits/plot/chain_plotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,29 +163,29 @@ def _get_bins(self, df):

def seed_plotter(self, df, pars):
self.df_seed = df
self._output('seed_'+self.tag, pars, folder='SeedROIStudies')
self._output('seed_'+self.tag, pars, folder='SeedCSStudies')

# required to calculate the seeding efficiency
if self.mode == 'both':
for suf in ('_def', '_roi'):
for suf in ('_def', '_cs'):
nans = df['nseeds'+suf].isna() #NaN's cannot be converted to a boolean!
df['has_seeds'+suf] = (df['nseeds'+suf] > 0).astype(int)
df['has_seeds'+suf][nans] = np.nan
df['less_seeds'+suf] = (df['nseeds'+suf] < df['nrois'+suf]).astype(int)
df['less_seeds'+suf] = (df['nseeds'+suf] < df['ncss'+suf]).astype(int)
df['less_seeds'+suf][nans] = np.nan
else:
df['has_seeds'] = (df.nseeds > 0).astype(int)
df['less_seeds'] = (df.nseeds < df.nrois).astype(int)
df['less_seeds'] = (df.nseeds < df.ncss).astype(int)

_, bins = self._get_bins(df)

if self.mode == 'both':
avars = []
for suf in ('_def', '_roi'):
avars.extend(['nseeds' + suf, 'nrois' + suf, 'has_seeds' + suf, 'less_seeds' + suf])
for suf in ('_def', '_cs'):
avars.extend(['nseeds' + suf, 'ncss' + suf, 'has_seeds' + suf, 'less_seeds' + suf])
#avars.extend(['nseeds' + suf, 'has_seeds' + suf, 'less_seeds' + suf])
else:
avars = ['nseeds', 'nrois', 'has_seeds', 'less_seeds']
avars = ['nseeds', 'ncss', 'has_seeds', 'less_seeds']
aggr_quantities = ['median', 'mean', 'std', self._q1, self._q3, 'sum', 'size', self._nanlen]
values = {x: df.groupby(pd.cut(df['gen'+x], bins[x]))
.agg(aggr_quantities)
Expand All @@ -195,27 +195,27 @@ def seed_plotter(self, df, pars):

if self.mode == 'both':
leglab = {}
_leglab = {'_def': 'R/z,'+self.uc['phi'], '_roi': 'CS'}
_leglab = {'_def': 'R/z,'+self.uc['phi'], '_cs': 'CS'}
for suf in _leglab.keys():
leglab.update({'nseeds'+suf: '#seeds ('+_leglab[suf]+')',
'nrois' +suf: '#CS regions ('+_leglab[suf]+')',
'ncss' +suf: '#CS regions ('+_leglab[suf]+')',
'has_seeds'+suf: _leglab[suf],
'less_seeds'+suf: _leglab[suf],
})
else:
leglab = {'nseeds': '#seeds', 'nrois': '#CS regions'}
leglab = {'nseeds': '#seeds', 'ncss': '#CS regions'}

# efficiencies need a separate treatment
if self.mode == 'both':
for suf in ('_def', '_roi'):
for suf in ('_def', '_cs'):
avars.remove('has_seeds'+suf)
avars.remove('less_seeds'+suf)
else:
avars.remove('has_seeds')
avars.remove('less_seeds')

# plot ROI and seed multiplicities
average_numbers_y_range = (0.98,1.25) if self.cfg['seed_roi']=='NoROItcOut' else (0.98,1.5)
# plot CS and seed multiplicities
average_numbers_y_range = (0.98,1.25) if self.cfg['seed_cs']=='NoCStcOut' else (0.98,1.5)
for binvar in bins.keys():
hshift = (bins[binvar][1]-bins[binvar][0])/2
fcounts[binvar] = bokeh.plotting.figure(
Expand All @@ -226,7 +226,7 @@ def seed_plotter(self, df, pars):
)

for ivar,avar in enumerate(avars):
# skip plotting number of ROIs for the default chain
# skip plotting number of CSs for the default chain
bincenters = (bins[binvar][:-1]+bins[binvar][1:])/2
opt = dict(x=bincenters+self._x_shift(ivar,binvar,len(avars)),
legend_label=leglab[avar], color=self.palette[ivar])
Expand Down Expand Up @@ -258,27 +258,27 @@ def seed_plotter(self, df, pars):
fcounts[binvar].add_layout(quant)

fcounts[binvar].xaxis.axis_label = self.info[binvar]['label_2d']
fcounts[binvar].yaxis.axis_label = 'Average of the number of seeds and ROIs'
self._set_fig_common_attributes(fcounts[binvar], title="ROIs and Seeds multiplicities",
fcounts[binvar].yaxis.axis_label = 'Average of the number of seeds and CSs'
self._set_fig_common_attributes(fcounts[binvar], title="CSs and Seeds multiplicities",
location='top_right')

# plot seeding efficiency
for ivar,vvv in enumerate(('has_seeds', 'less_seeds')):
roi_waste_y_range = ((0.8,1.2) if 'has_seeds' in vvv else (-0.02,0.2)
if self.cfg['seed_roi']['InputName']=='NoROItcOut' else (-0.02,0.35))
cs_waste_y_range = ((0.8,1.2) if 'has_seeds' in vvv else (-0.02,0.2)
if self.cfg['seed_cs']['InputName']=='NoCStcOut' else (-0.02,0.35))
for binvar in bins.keys():
figname = binvar + '_' + vvv
hshift = (bins[binvar][1]-bins[binvar][0])/2
feff[figname] = bokeh.plotting.figure(
width=600, height=self.fig_height, title='', tools=self.fig_tools,
x_range=(bins[binvar][0], bins[binvar][-1]+hshift),
y_range=roi_waste_y_range,
y_range=cs_waste_y_range,
y_axis_type='linear'
)

bincenters = (bins[binvar][:-1]+bins[binvar][1:])/2

for isuf,suf in enumerate(('_def', '_roi')):
for isuf,suf in enumerate(('_def', '_cs')):
if self.mode != 'both': #handle the single chain case
if isuf>0:
break
Expand Down
Loading

0 comments on commit 518c706

Please sign in to comment.