# Visualize the distribution of segment lengths
- for each dataset
- radius bins in linear and log scale 
- segment in bin counts in linear and log scale
- show cumulative proportion of segments *longer* than R, to estimate optimal cut-off radius.
    - That is, the neighbor search distance for initial segment candidate generation.

In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
from datasets import get_hits
from tracking.segment import gen_seg_track_sequential

In [None]:
def plot_seg_len_stats(hits, segmentation=gen_seg_track_sequential, figsize=(24, 12)):
    seg = np.concatenate(hits.groupby('event_id').apply(segmentation))
    starts = hits.loc[seg[:, 0], ['x', 'y', 'z']].to_numpy()
    ends = hits.loc[seg[:, 1], ['x', 'y', 'z']].to_numpy()
    r = np.linalg.norm(ends - starts, axis=1)
    fig, ax = plt.subplots(3, 4, figsize=figsize)
    sns.histplot(x=r, log_scale=(True, True), ax=ax[0, 0])
    sns.histplot(x=r, log_scale=(True, False), ax=ax[0, 1])
    sns.histplot(x=r, log_scale=(False, True), ax=ax[0, 2])
    sns.histplot(x=r, log_scale=(False, False), ax=ax[0, 3])
    sns.histplot(x=r, log_scale=(True, True), ax=ax[1, 0], cumulative=True, stat='proportion')
    sns.histplot(x=r, log_scale=(True, False), ax=ax[1, 1], cumulative=True, stat='proportion')
    sns.histplot(x=r, log_scale=(False, True), ax=ax[1, 2], cumulative=True, stat='proportion')
    sns.histplot(x=r, log_scale=(False, False), ax=ax[1, 3], cumulative=True, stat='proportion')
    ax[2, 0].hist(r, log=True, bins=np.geomspace(min(r), max(r), 32), cumulative=-1, density=True)
    ax[2, 0].set_xscale('log')
    ax[2, 1].hist(r, log=False, bins=np.geomspace(min(r), max(r), 32), cumulative=-1, density=True)
    ax[2, 1].set_xscale('log')
    ax[2, 2].hist(r, log=True, bins=32, cumulative=-1, density=True)
    ax[2, 3].hist(r, log=False, bins=32, cumulative=-1, density=True)


In [None]:
plot_seg_len_stats(get_hits('simple', 512))

In [None]:
plot_seg_len_stats(get_hits('spdsim', 512))

In [None]:
plot_seg_len_stats(get_hits('bman'))

In [None]:
plot_seg_len_stats(get_hits('trackml_volume'))

In [None]:
plot_seg_len_stats(get_hits('trackml'))