In [None]:
from functools import partial

import numpy as np
import pandas as pd
import seaborn as sns

from datasets import get_hits_bman, get_hits_simple, get_hits_trackml, get_hits_trackml_by_volume, get_hits_trackml_by_module

data = []
for name, getter in [('BM@N', get_hits_bman), ('simple', partial(get_hits_simple, n_events=1000, event_size=100)),
                     ('TrackML_event', get_hits_trackml), ('TrackML_volume', get_hits_trackml_by_volume), ('TrackML_module', get_hits_trackml_by_module),]:
    hits = getter()
    stat = hits.groupby('event_id').track.aggregate(['size', 'nunique']).rename(columns={'size': 'hits', 'nunique': 'tracks'}).reset_index()
    del hits
    stat['dataset'] = name
    data.append(stat)
stats = pd.concat(data)

In [None]:
stats.hist('tracks', by='dataset', layout=(2, 3), figsize=(32, 8))

In [None]:
stats.hist('hits', by='dataset', layout=(2, 3), figsize=(32, 8), log=True)

In [None]:
stats.groupby('dataset').plot.hexbin('hits', 'tracks', bins='log', xscale='log', figsize=(16, 10), sharex=False)

In [None]:
stats.groupby('dataset').plot.hexbin('hits', 'tracks', gridsize=32, bins='log', xscale='log', figsize=(16, 10), sharex=False)

In [None]:
stats.groupby('dataset').plot.hexbin('hits', 'tracks', gridsize=32, bins='log', figsize=(16, 10), sharex=False)