In [28]:
import os
import sys
import numpy as np
import time
%matplotlib inline
import mpld3
mpld3.enable_notebook()
import matplotlib
import matplotlib.pyplot as plt 
import time
from datetime import datetime, timedelta

plt.rcParams['figure.figsize'] = (10.0, 8.0)

In [85]:
datasets = {0: "./8-15-21/filter-test/30kV/", \
           1: "./8-15-21/5kV-pmts/", \
           2: "./8-15-21/5kV-glitch/"}

for key, filepath in datasets.items():
    if(os.path.isdir(filepath) == False):
        print("WARNING: " + filepath + " is not a directory")
        continue
    else:
        files = [name for name in os.listdir(filepath) if os.path.isfile(os.path.join(filepath, name))]
        files = [_ for _ in files if _[-3:] == "csv"]
        num_files = len(files)
        if(num_files == 0):
            print("WARNING: no csv files in " + filepath)
        else:
            print("Found : " + str(num_files) + " csv files in " + filepath + ", dataset " + str(key))

    


Found : 124 csv files in ./8-15-21/filter-test/30kV/, dataset 0
Found : 17129 csv files in ./8-15-21/5kV-pmts/, dataset 1
Found : 12 csv files in ./8-15-21/5kV-glitch/, dataset 2


## Utility functions for parsing file names

In [64]:

def parse_timestamp_from_filename(infile):
    #infile looks like /path/to/data/file/pmt06.15.23.43.132.csv (day, hour, minute, second, milli)
    fn = infile.split('/')[-1]
    t = fn.split(".")
    t[0] = t[0][-2:] #ignore the filetag prefix
    timestamp = datetime.strptime('.'.join(t[:-1]), "%d.%H.%M.%S.%f")
    return timestamp


#looks at the input directory (a dataset) and
#finds all .csv files, separating them by file prefix
def get_separated_file_lists(indir, file_prefixes, nevents=None):
    #full list of .csv files
    file_list = []
    if(nevents is not None):
        for i, f in enumerate(os.listdir(indir)):
            if(i > nevents):
                break
            if(os.path.isfile(os.path.join(indir, f)) \
                 and f.endswith('.csv')):
                file_list.append(f)
    else:
        file_list = [f for f in os.listdir(indir) if os.path.isfile(os.path.join(indir, f)) \
                 and f.endswith('.csv')]

    separate_file_lists = {}
    for pref in file_prefixes:
        #selects filenames by prefix. so separate_file_lists['pmt'] = ['pmt14.53.24.449', 'pmt10.34....', ...]
        separate_file_lists[pref] = list(filter(lambda x: x[:len(pref)] == pref, file_list))  
    
    return separate_file_lists

#converts the dictionary of separated file lists into
#a dictionary of separated timestamps (units milliseconds)
def get_separated_timestamps(separated_file_lists):
    separated_timestamps = {}
    for pref in separated_file_lists:
        separated_timestamps[pref] = [parse_timestamp_from_filename(f) for f\
                                      in separated_file_lists[pref]]
        
        #if there are none from one of the prefixes, return empty lists
        if(len(separated_timestamps[pref]) == 0):
            separated_file_lists[pref] = []
            continue
            
        #sort both the timestamps lists and the filelists
        #simultaneously by the timestamps
        separated_timestamps[pref], separated_file_lists[pref] = \
        (list(t) for t in zip(*sorted(zip(separated_timestamps[pref], separated_file_lists[pref]))))
    
    return separated_timestamps, separated_file_lists

def get_sampling_period_from_file(infile):
    #parse header for the timestep
    f = open(infile, 'r', errors='ignore')
    ls = f.readlines()
    raw_sample_rate = ls[4]
    raw_sample_rate = raw_sample_rate.split(' ')[-1]
    raw_sample_rate = float(raw_sample_rate.split('H')[0])
    return (1.0/raw_sample_rate)*1e9 #nanoseconds


## Demonstrate plotting a single datasets timestamps relative to the start of the entire run

In [86]:
#the part of the filename before the timestamp. 
#used to distinguish the two oscilloscopes
file_prefixes = ["pmt", "anode"]


#load some timestamps from a dataset of your choosing
dataset = datasets[2]
t0 = time.time()
nevents = 100000
separated_file_lists = get_separated_file_lists(dataset, file_prefixes, nevents)
separated_timestamps, separated_file_lists = get_separated_timestamps(separated_file_lists)

#print timing
print("Took " + str(time.time() - t0) + " seconds to load ", end=' ')
for pref in separated_timestamps:
    print(str(len(separated_timestamps[pref])) + " " + pref + " files,", end=' ')
print("\n")
#end print timing

Took 0.001737833023071289 seconds to load  6 pmt files, 6 anode files, 



In [88]:
#provides zeroed timestamps with millisecond resolution in units of
#hours, minutes, and seconds. the time zero is based on the run reference,
#which is a datetime object (as are the separated timestamps)
def convert_timestamps_to_realtime(separated_timestamps, run_reference, dataset_date):
    sep_times_h = {}
    sep_times_m = {}
    sep_times_s = {}
    reftime = separated_timestamps["pmt"][0]
    for pref, stamps in separated_timestamps.items():
        zeroed_timedeltas = [_ - reftime for _ in stamps]
        days = [_.days for _ in zeroed_timedeltas]
        seconds = [_.seconds for _ in zeroed_timedeltas]
        ms = [_.microseconds / 1000 for _ in zeroed_timedeltas]
        sep_times_h[pref] = [days[i]*24 + seconds[i]/3600 + ms[i]/3600/1000 for i in range(len(zeroed_timedeltas))]
        sep_times_m[pref] = [_*60 for _ in sep_times_h[pref]]
        sep_times_s[pref] = [_*60 for _ in sep_times_m[pref]]
    
    return sep_times_h, sep_times_m, sep_times_s
        
            
        


run_reference = datetime.strptime("8-15-21", "%m-%d-%y")
dataset_date = "8-15-21"
#this one you need to be careful and know info on the dataset, 
#see comment by "convert_timestamps_to_realtime"
sep_times_h, sep_times_m, sep_times_s = convert_timestamps_to_realtime(separated_timestamps, run_reference, dataset_date)

In [None]:
#makes a histogram of event times, binned to see "differential" rate.
#also makes a funny upsidedown, infinitessimal histogram to compare times across pmt/anode
fig, (axhist, axscat) = plt.subplots(nrows = 2, figsize=(10, 16))
binwidth = 2 #minutes

yticks = [0, 1]
pref_vals = file_prefixes
for i, pref in enumerate(file_prefixes):
    bins = np.arange(min(sep_times_m[pref]), max(sep_times_m[pref]), binwidth)
    axhist.hist(sep_times_m[pref], bins, label=pref)
    
    axscat.scatter(np.array(sep_times_s[pref]), [yticks[i]]*len(sep_times_s[pref]),  marker='|', s=5000)


axscat.set_xlabel("event time (seconds since run start)")
axhist.set_xlabel("event time (minutes since midnight)")
axhist.set_ylabel("events per " + str(binwidth) + " minute binwidth")
#for funny plot
axscat.set_ylim([-3, 3])
axscat.set_yticks(yticks)
axscat.set_yticklabels(pref_vals)
axscat.grid(False)

axhist.legend()

plt.show()

In [75]:
#correlation of timestamps
import pycorrelate

corr_dt = .1 #seconds to bin the time-lag correlation
corr_bins = np.arange(-50, 50, corr_dt)
corr = pycorrelate.pycorrelate.pcorrelate(sep_times_s['pmt'], sep_times_s['anode'], corr_bins, normalize=True)
shift = corr_bins[np.argmax(corr)]
fig, ax = plt.subplots(figsize=(10, 7))
ax.plot(corr_bins[:-1], corr, label="Max at " + str(shift))
ax.legend()
plt.show()


pmt_sep_times_s_shifted = sep_times_s['pmt'] - shift
fig, ax = plt.subplots(figsize=(10, 7))
ax.scatter(np.array(pmt_sep_times_s_shifted), [yticks[0]]*len(pmt_sep_times_s_shifted),  marker='|', s=5000)
ax.scatter(np.array(sep_times_s['anode']), [yticks[1]]*len(sep_times_s['anode']),  marker='|', s=5000)
ax.set_ylim([-3, 3])
ax.set_yticks(yticks)
ax.set_yticklabels(pref_vals)
ax.grid(False)
plt.show()

TypingError: Failed in nopython mode pipeline (step: nopython frontend)
[1m[1m[1m[1m[1m[1m[1m[1mFailed in nopython mode pipeline (step: nopython frontend)
[1m[1mUnknown attribute 'max' of type reflected list(float64)<iv=None>
[1m
File "../.local/lib/python3.8/site-packages/pycorrelate/pycorrelate.py", line 38:[0m
[1mdef pnormalize(G, t, u, bins):
    <source elided>
    """
[1m    duration = max((t.max(), u.max())) - min((t.min(), u.min()))
[0m    [1m^[0m[0m
[0m
[0m[1mDuring: typing of get attribute at /home/esiii_lxe/.local/lib/python3.8/site-packages/pycorrelate/pycorrelate.py (38)[0m
[1m
File "../.local/lib/python3.8/site-packages/pycorrelate/pycorrelate.py", line 38:[0m
[1mdef pnormalize(G, t, u, bins):
    <source elided>
    """
[1m    duration = max((t.max(), u.max())) - min((t.min(), u.min()))
[0m    [1m^[0m[0m

[0m[1mDuring: resolving callee type: type(CPUDispatcher(<function pnormalize at 0x7f60a36ce040>))[0m
[0m[1mDuring: typing of call at /home/esiii_lxe/.local/lib/python3.8/site-packages/pycorrelate/pycorrelate.py (113)
[0m
[0m[1mDuring: resolving callee type: type(CPUDispatcher(<function pnormalize at 0x7f60a36ce040>))[0m
[0m[1mDuring: typing of call at /home/esiii_lxe/.local/lib/python3.8/site-packages/pycorrelate/pycorrelate.py (113)
[0m
[0m[1mDuring: resolving callee type: type(CPUDispatcher(<function pnormalize at 0x7f60a36ce040>))[0m
[0m[1mDuring: typing of call at /home/esiii_lxe/.local/lib/python3.8/site-packages/pycorrelate/pycorrelate.py (113)
[0m
[0m[1mDuring: resolving callee type: type(CPUDispatcher(<function pnormalize at 0x7f60a36ce040>))[0m
[0m[1mDuring: typing of call at /home/esiii_lxe/.local/lib/python3.8/site-packages/pycorrelate/pycorrelate.py (113)
[0m
[1m
File "../.local/lib/python3.8/site-packages/pycorrelate/pycorrelate.py", line 113:[0m
[1mdef pcorrelate(t, u, bins, normalize=False):
    <source elided>
    if normalize:
[1m        G = pnormalize(G, t, u, bins)
[0m        [1m^[0m[0m
