# Imports & Custom Functions

In [1]:
print('Running...')

from pathlib import Path
import os
HERE = Path(os.path.abspath(''))
os.chdir(HERE.parent)

# config imports
from cfg import config

# custom beacon imports
from analysis.reader import Reader
from analysis.sinesubtraction import SineSubtract
from analysis import reconstruction
from analysis import utils

# standard imports
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
pd.set_option("display.max_columns", None)
import dask
import dask.dataframe as dd
from glob import glob
import datetime

# import for interactive viewing
from tqdm.auto import tqdm
from ipywidgets import interact
import ipywidgets as widgets
from IPython.display import display

os.chdir(HERE)

print('Finished.')

Running...
Welcome to JupyROOT 6.28/06
Finished.


In [2]:
# Nice sorter
import re

def tryint(s):
    try:
        return int(s)
    except:
        return s

def alphanum_key(s):
    """ Turn a string into a list of string and number chunks.
        "z23a" -> ["z", 23, "a"]
    """
    return [ tryint(c) for c in re.split('([0-9]+)', s) ]

def sort_nicely(l):
    """ Sort the given list in the way that humans expect.
    """
    l.sort(key=alphanum_key)

# Load Data

In [None]:
print('Loading data...')
dataFiles = glob(str(config.BEACON_PROCESSED_DATAPATH / '*.pkl'))
sort_nicely(dataFiles)
# display(dataFiles)

missingRuns = [i for i in range(120,1911)]
for df in dataFiles:
    r = df.split('_')[-2].split('Run')[-1]
    if int(r) in missingRuns: missingRuns.remove(int(r))
print(f'Found {len(dataFiles)} runs.')

@dask.delayed
def load_pickle(filePath):
    return pd.read_pickle(filePath)

delayed_dfs = [load_pickle(dataFile) for dataFile in dataFiles]
fullData = dd.from_delayed(delayed_dfs).reset_index(drop=False)
print('Done.')

## Data Summary

In [8]:
print('--- Data Summary ---\n')

print(f'Data has {len(fullData.columns)} Columns x {len(fullData.index)} Rows')
print(f'Size: ~{1e-9*len(fullData.columns)*len(fullData.index)*64/8:0.2f} GB')

columns = fullData.columns
genCols = ", ".join([col for col in columns if ("sc " not in col) & ("rf" not in col)])
scCols = ", ".join([col for col in columns if ("sc " in col)])
rfHcols = ", ".join([col for col in columns if ("rfH" in col)])
rfVcols = ", ".join([col for col in columns if ("rfV" in col)])
print(f'\n- Columns -\nGeneral: {genCols}\nScint: {scCols}\nRF Hpol: {rfHcols}\nRF Vpol: {rfVcols}\n')

t_min = fullData['time'].min().compute()
t_max = fullData['time'].max().compute()
t0 = datetime.datetime.fromtimestamp(t_min)
tf = datetime.datetime.fromtimestamp(t_max)
print(f'Start: {t0.strftime("%Y-%m-%d %H:%M:%S")}')
print(f'End: {tf.strftime("%Y-%m-%d %H:%M:%S")}')
print(f'Time Span: {tf - t0}')

display(fullData.head())
display(fullData.tail())

--- Data Summary ---

Data has 52 Columns x 51000 Rows
Size: ~0.02 GB

- Columns -
General: index, run, entry, type, time
Scint: sc score, sc zen, sc az, sc zen spread, sc az spread, sc amp range, sc amp mean, sc peaks min, sc peaks max, sc peaks mean, sc arrival time mean, sc risetime range, sc risetime mean, sc integration range, sc integration mean
RF Hpol: rfH score, rfH zen, rfH az, rfH zen spread, rfH az spread, rfH snr min, rfH snr max, rfH snr mean, rfH coherent snr, rfH time delay range, rfH time delay mean, rfH h peak range, rfH h peak mean, rfH h peak loc min, rfH h peak loc max, rfH h peak loc mean, rfH coherent h peak loc, rfH impulsitivy min, rfH impulsivity max, rfH impulsivity mean, rfH coherent impulsivity
RF Vpol: rfV snr min, rfV snr max, rfV snr mean, rfV h peak range, rfV h peak mean, rfV h peak loc min, rfV h peak loc max, rfV h peak loc mean, rfV impulsitivy min, rfV impulsivity max, rfV impulsivity mean

Start: 2023-12-05 12:09:34
End: 2023-12-11 18:15:47
Time S

Unnamed: 0,index,run,entry,type,time,sc score,sc zen,sc az,sc zen spread,sc az spread,sc amp range,sc amp mean,sc peaks min,sc peaks max,sc peaks mean,sc arrival time mean,sc risetime range,sc risetime mean,sc integration range,sc integration mean,rfH score,rfH zen,rfH az,rfH zen spread,rfH az spread,rfH snr min,rfH snr max,rfH snr mean,rfH coherent snr,rfH time delay range,rfH time delay mean,rfH h peak range,rfH h peak mean,rfH h peak loc min,rfH h peak loc max,rfH h peak loc mean,rfH coherent h peak loc,rfH impulsitivy min,rfH impulsivity max,rfH impulsivity mean,rfH coherent impulsivity,rfV snr min,rfV snr max,rfV snr mean,rfV h peak range,rfV h peak mean,rfV h peak loc min,rfV h peak loc max,rfV h peak loc mean,rfV impulsitivy min,rfV impulsivity max,rfV impulsivity mean
0,0,550,0,1,1701799774,,,,,,,,0,0,0.0,,,,0.0,0.0,0.104702,59.25,17.75,11.0,45.75,2.846706,3.515332,3.188294,2.974937,108.066196,64.500788,10.993054,16.529608,0.423828,0.78125,0.578776,0.279555,0.071542,0.359616,0.20028,0.12008,2.763035,3.251238,2.94095,30.621224,18.629035,0.105469,0.964844,0.553711,0.067951,0.520122,0.241657
1,1,550,1,4,1701799774,7.5e-05,27.5,-142.75,28.0,25.5,17.0,17.5,1,1,1.0,601.095357,66.5,593.267857,1724.0,1732.0,0.082474,155.75,-87.5,4.25,26.5,3.019567,3.540332,3.201793,3.180336,83.793677,-4.386661,11.554144,14.797033,0.0,0.386719,0.214844,0.95933,0.140348,0.291989,0.189812,0.49397,2.727545,4.11605,3.388219,13.465411,16.864909,0.152344,0.994141,0.570638,0.042917,1.0,0.375522
2,2,550,2,1,1701799775,,,,,,,,0,0,0.0,,,,0.0,0.0,0.067381,116.25,-170.75,27.0,35.25,2.751596,3.628691,3.096046,3.37065,116.479049,-63.42483,8.862326,12.226817,0.095703,0.779297,0.556315,0.578855,0.114144,0.41854,0.23689,0.231648,2.859457,3.471963,3.189359,26.226831,20.311208,0.341797,0.921875,0.673828,0.044202,0.289536,0.177107
3,3,550,3,4,1701799775,0.000251,25.5,122.5,8.5,32.25,27.0,73.8,1,2,1.25,604.852161,80.634747,595.486395,15784.0,11806.0,0.082135,35.25,-23.25,1.75,29.5,2.858923,3.832414,3.240811,3.623021,41.865176,-4.754031,6.382335,14.101449,0.285156,0.875,0.488932,0.464467,0.0317,0.410867,0.172284,0.218007,2.901197,5.722277,3.813008,84.726921,30.893501,0.017578,0.759766,0.350911,0.092167,0.452397,0.231067
4,4,550,4,1,1701799775,,,,,,,,0,0,0.0,,,,0.0,0.0,0.08169,162.25,97.5,17.25,29.5,2.66775,4.005098,3.183615,3.491923,78.403507,49.292586,19.110986,14.919929,0.429688,0.990234,0.657227,0.182935,-0.053532,0.517794,0.229334,0.210339,2.987649,5.014971,3.89299,23.222757,24.072747,0.089844,0.992188,0.585286,0.205311,0.552274,0.435027


Unnamed: 0,index,run,entry,type,time,sc score,sc zen,sc az,sc zen spread,sc az spread,sc amp range,sc amp mean,sc peaks min,sc peaks max,sc peaks mean,sc arrival time mean,sc risetime range,sc risetime mean,sc integration range,sc integration mean,rfH score,rfH zen,rfH az,rfH zen spread,rfH az spread,rfH snr min,rfH snr max,rfH snr mean,rfH coherent snr,rfH time delay range,rfH time delay mean,rfH h peak range,rfH h peak mean,rfH h peak loc min,rfH h peak loc max,rfH h peak loc mean,rfH coherent h peak loc,rfH impulsitivy min,rfH impulsivity max,rfH impulsivity mean,rfH coherent impulsivity,rfV snr min,rfV snr max,rfV snr mean,rfV h peak range,rfV h peak mean,rfV h peak loc min,rfV h peak loc max,rfV h peak loc mean,rfV impulsitivy min,rfV impulsivity max,rfV impulsivity mean
0,995,600,995,4,1702340146,0.000103,47.75,18.5,43.75,13.25,25.0,22.0,0,1,0.75,604.822469,25.407407,594.385802,3424.0,1624.0,0.093216,53.75,13.75,2.5,26.25,2.613154,3.428808,3.125883,3.510318,93.165605,53.917465,16.462968,13.986589,0.052734,0.865234,0.477214,0.23699,0.061525,0.408585,0.220911,0.029003,2.933559,4.003371,3.285644,13.739997,14.24569,0.291016,0.970703,0.637044,-0.114749,0.389318,0.198172
1,996,600,996,1,1702340146,,,,,,,,0,0,0.0,,,,0.0,0.0,0.095669,53.0,-27.5,10.25,13.5,2.780904,3.973346,3.392797,3.55452,84.793318,3.55607,13.450644,16.907741,0.0,0.494141,0.307617,2e-05,0.08453,0.39705,0.294157,0.060221,3.081404,4.25393,3.543716,87.939613,26.545259,0.113281,0.992188,0.580729,0.007004,0.512649,0.248984
2,997,600,997,1,1702340147,,,,,,,,0,0,0.0,,,,0.0,0.0,0.074316,93.0,176.0,2.25,11.75,3.07535,3.681148,3.250683,3.34794,142.232339,-66.334965,7.260328,13.278048,0.082031,0.982422,0.500651,0.521316,0.144729,0.561265,0.294556,-0.021641,2.815214,4.380031,3.308644,28.104725,16.42194,0.001953,0.994141,0.446289,0.11905,1.0,0.357588
3,998,600,998,4,1702340147,3.6e-05,19.0,92.5,17.25,109.75,12.0,10.5,1,1,1.0,591.844167,62.6,584.016667,1332.0,1019.0,0.110875,17.5,22.5,7.0,20.0,2.850103,3.765231,3.297263,3.5999,25.940808,-0.082612,14.266356,15.403124,0.0,0.994141,0.46582,0.996874,0.312012,1.0,0.511484,0.216373,2.739997,3.50785,3.076382,28.350078,17.965486,0.060547,0.898438,0.456055,0.142724,0.400928,0.265186
4,999,600,999,4,1702340147,4.7e-05,16.0,118.0,12.75,119.0,47.0,24.25,1,1,1.0,589.619708,60.0,581.792208,4836.0,2414.0,0.086479,29.75,-40.25,4.0,29.75,2.723911,3.084843,2.873481,2.998524,46.822467,-24.767596,12.228268,12.18817,0.095703,0.976562,0.495117,0.399612,0.112835,0.324716,0.219029,0.260308,3.004377,4.343752,3.612084,22.042028,20.07795,0.0,0.720703,0.338216,0.119882,0.456584,0.297575


## Variable Distributions

In [9]:
# one at a time

columns = list(fullData.columns)
column = widgets.Dropdown(
        options=columns,
        value=columns[0],
        description='Column:',
        disabled=False,
        continuous_update=True,
        orientation='horizontal',
    )

bins = widgets.BoundedIntText(
    value=51,
    min=5,
    max=101,
    step=1,
    description='Bins:',
    disabled=False
)

logy = widgets.Checkbox(
    value=False,
    description='Log?',
    disabled=False,
    indent=True
)

@interact(column=column, bins=bins, logy=logy)
def run_app(column, bins, logy):
    fig, ax = plt.subplots(figsize=[10,4])
    ax.hist(fullData[column].compute().values, bins=bins, log=logy)
    ax.set(xlabel=column, ylabel='Count')

interactive(children=(Dropdown(description='Column:', options=('index', 'run', 'entry', 'type', 'time', 'sc sc…