In [2]:
import os
import sys
import numpy as np
import time
%matplotlib inline
import mpld3
mpld3.enable_notebook()
import matplotlib
import matplotlib.pyplot as plt 

plt.style.use('~/evanstyle.mplstyle')
plt.rcParams['figure.figsize'] = (10.0, 8.0)

## Dictionary of datasets on LLNL. Checks if they exist and how many files
### Note, the integer index of this dictionary matches the row number in the google-spreadsheet log
##### https://docs.google.com/spreadsheets/d/14KmVPS824ExjVpss9H6fMSNVhopGtHVclZNsw5kKLr8/edit?usp=sharing
##### Save this sheet to your own drive!

In [3]:
datatopdir = "/p/lustre1/angelico/hv-test-chamber/"
datasets = {2:"1-29-21/pmt-trig-filling-1800", \
                3:"1-29-21/pmt-trig-filling-1920",\
                4:"1-30-21/anode-crosstrig-1300",\
                5:"1-30-21/anode-crosstrig-1320",\
                6:"1-30-21/ignition-1500",\
                7:"1-30-21/ignition-10k-1520",\
                8:"1-31-21/glitch-1520",\
                9:"2-1-21/anode-100",\
                10:"2-1-21/anode-1340",\
                11:"2-1-21/glitch-1530",\
                12:"2-1-21/glitch-2230",\
                13:"2-2-21/anode-1030",\
                14:"2-2-21/corona-1300",\
                15:"2-2-21/glitch-1320",\
                16:"2-2-21/glitch-1430",\
                17:"2-2-21/anode-1720",\
                18:"2-3-21/glitch-1040",\
                19:"2-3-21/anode-1050",\
                20:"2-3-21/glitch-1810",\
                21:"2-3-21/anode-1820"}

#check that the datasets are indexible
print("Checking for datasets in top directory: " + datatopdir)
to_remove = [] #list of keys to remove due to not existing
for dno in datasets:
    print(str(dno)+": " + datasets[dno] + "\t\t", end = '')
    isdir = os.path.isdir(datatopdir+datasets[dno]+"/")
    if(isdir):
        numfiles = len([_ for _ in os.listdir(datatopdir+datasets[dno]+"/")])
        print("True, with " + str(numfiles) + " files")
    else:
        print("False")
        to_remove.append(dno)
        

for k in to_remove:
    del datasets[k]

Checking for datasets in top directory: /p/lustre1/angelico/hv-test-chamber/
2: 1-29-21/pmt-trig-filling-1800		True, with 3019 files
3: 1-29-21/pmt-trig-filling-1920		True, with 81718 files
4: 1-30-21/anode-crosstrig-1300		True, with 161 files
5: 1-30-21/anode-crosstrig-1320		False
6: 1-30-21/ignition-1500		True, with 3795 files
7: 1-30-21/ignition-10k-1520		True, with 10849 files
8: 1-31-21/glitch-1520		True, with 10398 files
9: 2-1-21/anode-100		True, with 15628 files
10: 2-1-21/anode-1340		True, with 208 files
11: 2-1-21/glitch-1530		True, with 5333 files
12: 2-1-21/glitch-2230		True, with 8129 files
13: 2-2-21/anode-1030		True, with 22247 files
14: 2-2-21/corona-1300		True, with 2817 files
15: 2-2-21/glitch-1320		True, with 14598 files
16: 2-2-21/glitch-1430		False
17: 2-2-21/anode-1720		True, with 21034 files
18: 2-3-21/glitch-1040		False
19: 2-3-21/anode-1050		True, with 59444 files
20: 2-3-21/glitch-1810		False
21: 2-3-21/anode-1820		True, with 156841 files


## Utility functions for parsing file names

In [4]:

def parse_timestamp_from_filename(infile):
    #infile looks like /path/to/data/file/pmt15.23.43.132.csv (hour, minute, second, milli)
    t = infile.split('/')[-1].split('.')[:-1]
    t[0] = t[0][-2:] #hour is always only two digits, this line just ignores file prefix 
    t = [int(_) for _ in t]
    #now is in form [hours, minutes, seconds, millis]
    milliseconds = t[3] + 1e3*t[2] + 1e3*60*t[1] + 1e3*60*60*t[0]
    return milliseconds

#looks at the input directory (a dataset) and
#finds all .csv files, separating them by file prefix
def get_separated_file_lists(indir, file_prefixes):
    #full list of .csv files
    file_list = [f for f in os.listdir(indir) if \
                 os.path.isfile(os.path.join(indir, f)) \
                 and f.endswith('.csv')]
    separate_file_lists = {}
    for pref in file_prefixes:
        #selects filenames by prefix. so separate_file_lists['pmt'] = ['pmt14.53.24.449', 'pmt10.34....', ...]
        separate_file_lists[pref] = list(filter(lambda x: x[:len(pref)] == pref, file_list))  
        #add the indir to the beginning of each filename so that
        #any file readers know what's up. 
        separate_file_lists[pref] = [indir+_ for _ in separate_file_lists[pref]]
    
    return separate_file_lists

#converts the dictionary of separated file lists into
#a dictionary of separated timestamps (units milliseconds)
def get_separated_timestamps(separated_file_lists):
    separated_timestamps = {}
    for pref in separated_file_lists:
        separated_timestamps[pref] = [parse_timestamp_from_filename(f) for f\
                                      in separated_file_lists[pref]]
       
        #sort both the timestamps lists and the filelists
        #simultaneously by the timestamps
        separated_timestamps[pref], separated_file_lists[pref] = \
        (list(t) for t in zip(*sorted(zip(separated_timestamps[pref], separated_file_lists[pref]))))
    
    return separated_timestamps, separated_file_lists

def get_sampling_period_from_file(infile):
    #parse header for the timestep
    f = open(infile, 'r', errors='ignore')
    ls = f.readlines()
    raw_sample_rate = ls[4]
    raw_sample_rate = raw_sample_rate.split(' ')[-1]
    raw_sample_rate = float(raw_sample_rate.split('H')[0])
    return (1.0/raw_sample_rate)*1e9 #nanoseconds


## Plotting random events

In [5]:
import pandas as pd 

#plotting sub-function
def plot_event(event_series):
    fig, ax = plt.subplots(nrows = 2, figsize=(10, 16))

In [9]:
import random
import sys

#the part of the filename before the timestamp. 
#used to distinguish the two oscilloscopes
file_prefixes = ["pmt", "anode"]

#number of plots to show for every execution of this block
n_random_plots = 10

for i in range(n_random_plots):
    print("Here")
    #pick a random number, a random dataset folder
    key, dataset = random.choice(list(datasets.items()))
    print("Picked " + str(key) + " : " + dataset)
    t0 = time.time()

    separated_file_lists = get_separated_file_lists(datatopdir+dataset, file_prefixes)
    print("Separation by prefix took " + str(time.time() - t0))
    """
    separated_timestamps, separated_file_lists = get_separated_timestamps(separated_file_lists)
    
    #print timing
    print("Took " + str(time.time() - t0) + " seconds to load ", end=' ')
    for pref in separated_timestamps:
        print(str(len(separated_timestamps[pref])) + " " + pref + " files,", end=' ')
    print("\n")
    #end print timing
    
    #pick a random file from the list of PMT files
    idx = np.random.randint(0, len(separated_file_lists['pmt'])) #is matched to timestamps too
    infile = separated_file_lists['pmt'][idx]
    event_series = pd.Series()
    event_series['Timestamps'] = [parse_timestamp_from_filename(infile)]*2 #in milliseconds since 00:00 (midnight)
    event_series['SamplingPeriods'] = [get_sampling_period_from_file(infile)]*2 #nanoseconds
    #load the file
    d = pd.read_csv(infile, header=None, skiprows=11, names=['ts','0','1'], encoding='iso-8859-1')
    event_series['Channels'] = ["pmt1", "pmt2"]
    event_series['ChannelTypes'] = ["pmt", "pmt"]
    data_map = [d['0'].to_numpy(), d['1'].to_numpy()] 
    event_series['Data'] = data_map
    
    #find the anode file that is closest in time
    pmt_time = separated_timestamps['pmt'][idx]
    anode_idx = (np.abs(np.array(separated_timestamps['anode']) - pmt_time).argmin())
    #load similar to the block above, but now appending to the event series
    infile = separated_file_lists['anode'][anode_idx]
    event_series['Timestamps'] += [parse_timestamp_from_filename(infile)]*2 #in milliseconds since 00:00 (midnight)
    event_series['SamplingPeriods'] += [get_sampling_period_from_file(infile)]*2 #nanoseconds
    #load the file
    d = pd.read_csv(infile, header=None, skiprows=11, names=['ts','0','1'], encoding='iso-8859-1')
    event_series['Channels'] += ["glitch", "anode"]
    event_series['ChannelTypes'] = ["glitch", "anode"]
    data_map = [d['0'].to_numpy(), d['1'].to_numpy()] 
    event_series['Data'] += data_map
    
    print(event_series['Data'])
    print(event_series['SamplingPeriods'])
    exit()
"""

Here
Picked 15 : 2-2-21/glitch-1320


KeyboardInterrupt: 