### Notebook to convert RQ lables for known data into a better format and link with the raw data

In [1]:
import qetpy as qp
import rqpy as rp
import numpy as np
import pandas as pd
import vae
from glob import glob
import pickle as pkl
import matplotlib.pyplot as plt
from matplotlib import rcParams
import deepdish as dd

In [2]:
rcParams['figure.figsize'] = (10,6)
rcParams['axes.grid'] = True
rcParams['grid.linestyle'] = '--'
rcParams['xtick.direction'] = 'in'
rcParams['xtick.top'] = True
rcParams['ytick.direction'] = 'in'
rcParams['ytick.right'] = True

Define paths

In [3]:
base_path = '/gpfs/slac/staas/fs1/supercdms/tf/slac/Run44/Run44_v2/'
rq_path = '/gpfs/slac/staas/fs1/supercdms/tf/slac/Run44/Run44_processed_DF.pkl'
label_save_path = base_path+'labels/'

In [4]:
traces_path = sorted(glob(base_path+'traces/*'))
meta_path = sorted(glob(base_path+'metadata/*'))


In [5]:
vae.store_rq_labels(traces_path, rq_path, label_save_path)

In [42]:
def store_rq_labels(traces_path, rq_path, savepath):
    """
    Utility function to store all conventional features 
    from PD2 DM analysis for corresponding events. 
    
    Parameters
    ----------
    traces_path : list of str
        Absolute path to folder of processed traces
    rq_path : str
        Absolute path to RQ dataframe
    savepath : str
        Absolute path where the labels should be saved
    """
    with open(rq_path, 'rb') as rq_file:
        rq = pkl.load(rq_file)
    rq.sort_values('ser_ev', inplace=True)
    
    for p in traces_path: 
        traces, eventnumbers = vae.load_preprocessed_traces(p)
        label = f"{p.split('/')[-1][:20]}labels.h5"
        cuts = np.zeros(rq.shape[0], dtype=bool)
        for ev in eventnumbers:
            cuts = cuts | (rq.ser_ev == ev)
        df_labels = rq[cuts]
        if not np.all(df_labels.ser_ev == eventnumbers):
            raise ValueError('Shape dump and rq labels do not match')
        df_labels.to_hdf(savepath+label, 'labels')


In [43]:
store_rq_labels(traces_path[0:3], rq_path, './')

In [44]:
pd.read_hdf('09180909_1047_F0001_labels.h5', 'labels')

Unnamed: 0,eventnumber,seriesnumber,eventtime,triggertype,pollingendtime,triggertime,triggeramp,readoutstatusZ1,deadtimeZ1,livetimeZ1,...,ofenergy_constrain_err_al_int_PDS1Z1,ofenergy_unconstrain_al_int_PDS1Z1,ofenergy_unconstrain_err_al_int_PDS1Z1,ofenergy_constrain_pcon_al_int_PDS1Z1,ofenergy_constrain_pcon_err_al_int_PDS1Z1,ofenergy_unconstrain_pcon_al_int_PDS1Z1,ofenergy_unconstrain_pcon_err_al_int_PDS1Z1,integral_energy_al_int_PDS1Z1,integral_energy_err_al_int_PDS1Z1,ser_ev
2,10002,91809091047,1536515283,2,0.0,1787.459839,0,1.0,-999999.0,-999999.0,...,0.054544,22.027222,0.167561,7.067154,0.054544,22.027222,0.167561,77.351125,0.001394,9180909104710002
3,10003,91809091047,1536515283,2,0.0,1787.621826,0,1.0,-999999.0,-999999.0,...,0.070887,19.404926,0.147991,9.203500,0.070887,19.404926,0.147991,-59.348780,0.001070,9180909104710003
4,10004,91809091047,1536515283,2,0.0,1787.893311,0,1.0,-999999.0,-999999.0,...,0.114811,17.735058,0.135476,14.989913,0.114811,17.735058,0.135476,-12.716157,0.000229,9180909104710004
5,10005,91809091047,1536515283,2,0.0,1788.134277,0,1.0,-999999.0,-999999.0,...,0.052885,20.341565,0.154993,5.925137,0.045780,20.341565,0.154993,58.992340,0.001063,9180909104710005
6,10006,91809091047,1536515283,2,0.0,1788.435303,0,1.0,-999999.0,-999999.0,...,0.187406,,,24.700393,0.187406,,,-6880.738626,0.124038,9180909104710006
7,10007,91809091047,1536515285,2,0.0,1788.690674,0,1.0,-999999.0,-999999.0,...,0.043818,25.316243,0.191963,4.827027,0.037335,25.316243,0.191963,14.828153,0.000267,9180909104710007
8,10008,91809091047,1536515285,2,0.0,1789.119751,0,1.0,-999999.0,-999999.0,...,0.092769,15.585057,0.119301,12.078049,0.092769,15.585057,0.119301,-9.131640,0.000165,9180909104710008
9,10009,91809091047,1536515285,2,0.0,1789.534546,0,1.0,-999999.0,-999999.0,...,0.260497,5211.952712,283.882015,34.672242,0.260497,5211.952712,283.882015,19.925280,0.000359,9180909104710009
10,10010,91809091047,1536515285,2,0.0,1789.838257,0,1.0,-999999.0,-999999.0,...,0.068315,16.776695,0.128274,8.866689,0.068315,16.776695,0.128274,-34.565772,0.000623,9180909104710010
11,10011,91809091047,1536515285,2,0.0,1790.174316,0,1.0,-999999.0,-999999.0,...,0.040636,18.343620,0.140042,5.255935,0.040636,18.343620,0.140042,-50.162066,0.000904,9180909104710011


In [12]:
with open(rq_path, 'rb') as thing:
    rq = pkl.load(thing)

In [24]:
traces_path[0].split('/')[-1][:20]

'09180909_1047_F0001_'

In [18]:
cuts = np.zeros(rq.shape[0], dtype=bool)
for ev in eventnumbers_test:
    cuts = cuts | (rq.ser_ev == ev)
df_labels = rq[cuts]
np.all(df_labels.ser_ev == eventnumbers_test)

True