In [1]:
%matplotlib inline
import time
import Waveform
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
from glob import glob
import pickle
import matplotlib.dates as md
import datetime as dt
import time
from matplotlib.gridspec import GridSpec
import zipfile
import datetime
import gc
import json

In [2]:
read_from_folder_instad_zip = True

zip_to_read = "Z:\\MPIK\\recordings_until_20180320.zip"
folder_to_read = "Z:\\MPIK\\recordings_until_20180320\\LeCroyHDO\\"
#folder_to_read = "Z:\\MPIK\\rec_selection\\"

pkl_to_write = "data_from_zip_{}.pkl".format(zip_to_read.split('_')[-1].split('.')[0])
json_to_write = "data_from_zip_{}.json".format(zip_to_read.split('_')[-1].split('.')[0])

channels_to_read = ['C1', 'C2', 'C3']
smoothing_window = [32, 4096*4, 4096*4]
modes = ['raw', 'diff1', 'diff2']

use_moving_average_instead_of_windowed_average = False

status_print_holdoff = 50

In [3]:
# build the needed data structures
WF_keys = ['unixtime']
for chan in channels_to_read:
    for mode in modes:
        WF_keys.append('{}_{}_max'.format(chan, mode))
        WF_keys.append('{}_{}_max_time'.format(chan, mode))
        WF_keys.append('{}_{}_min'.format(chan, mode))
        WF_keys.append('{}_{}_min_time'.format(chan, mode))

WF_data = {}
for key in WF_keys:
    WF_data[key] = []


zip_name_list = []
if read_from_folder_instad_zip:
    zip_name_list = glob(folder_to_read+'*'+channels_to_read[0]+'.bytes')
else:
    with zipfile.ZipFile(zip_to_read, 'r') as zf:
        # get the list of files in the zip
        zip_name_list = zf.namelist()

In [4]:
def extract_WFs_for_timestamp(timestamp):
    timestamp = str(timestamp)
    
    extracted_WFs = []
    # open the zip file
    with zipfile.ZipFile(zip_to_read, 'r') as zf:
        # start the extraction process
        files_to_extract = []
        for name in zip_name_list:
            if timestamp in name:
                files_to_extract.append(name)

        files_to_extract = sorted(files_to_extract)
        for name in files_to_extract:
            try:
                data = zf.read(name)
                extracted_WFs.append(Waveform.Waveform(data))
            except OSError as e:
                print("An OSError with the following message occured: {}".format(e))
                print("The that was to be extracted was: {}".format(name))
                print("Continuing anyways.")
    return extracted_WFs

def extract_WFs_from_zip(list_files):
    extracted_WFs = []
    with zipfile.ZipFile(zip_to_read, 'r') as zf:
        for name in list_files:
            try:
                data = zf.read(name)
                extracted_WFs.append(Waveform.Waveform(data))
            except OSError as e:
                print("An OSError with the following message occured: {}".format(e))
                print("The that was to be extracted was: {}".format(name))
                print("Continuing anyways.")
    return extracted_WFs

def extract_WFs_from_files(file_list):
    extracted_WFs = []
    for file in file_list:
        with open(file, "rb") as f:
            raw_WF = f.read()
        try:
            extracted_WFs.append(Waveform.Waveform(raw_WF))
        except Exception as e:
            # this may happen when an event was not completly written and the data is thus shorter than expected
            print("WARN: Skipping event!\tError while parsing file {}".format(file))
            print("Following exception occured: {}".format(e))
    return extracted_WFs

# function to do a moving average over the data
def avg_smoother(x, y, width):
    window = np.ones(width)/width
    y_smooth = np.convolve(y, window, mode='same')
    return x, y_smooth

# function to reduce the array size by averaging over fixed parts
def avg_reducer(x, y, width):
    # see: https://stackoverflow.com/a/26639037
    x = x[:len(x)//width * width].reshape(-1, width).mean(axis=1)
    y = y[:len(y)//width * width].reshape(-1, width).mean(axis=1)
    return x, y

In [5]:
events_to_read = len(zip_name_list)
print("Aproximate number of events to read: {:.2f} ".format(events_to_read))

start_time = time.time()
for i in range(len(zip_name_list)):
    
    # print the status first
    if (i % status_print_holdoff) == 0:
        if i == 0:
            continue
        current_step_time = time.time() - start_time
        estimated_time_per_event = current_step_time / status_print_holdoff
        estimated_left_over_time = (events_to_read - i)*estimated_time_per_event /60
        read_speed = 60*1/estimated_time_per_event
        message = "Progress: {:.3f} [%]\tEstimated time left: {:.2f} [min]\tRead speed: {:0.1f} [Events/min]"
        message = message.format(100*i/events_to_read, estimated_left_over_time, read_speed)
        print(message, end='\r')
        start_time = time.time()
    
    # check that the file is a measruement
    if ".bytes" in zip_name_list[i]:
        file_name = zip_name_list[i]
        unixtime = float(file_name.split('_')[-2])
        # check that we haven't already read the event
        if unixtime in WF_data['unixtime']:
            continue
        
        # find out which files need to be extracted
        files_to_extract = []
        for chan in channels_to_read:
            files_to_extract.append(file_name.replace(file_name.split('_')[-1].split('.')[0], chan))
        
        try:
            if read_from_folder_instad_zip:
                WFs = extract_WFs_from_files(files_to_extract)
            else:
                WFs = extract_WFs_from_zip(files_to_extract)
        except Exception as e:
            # this may happen when an event was not completly written and the data is thus shorter than expected
            print("WARN: Skipping event!\tError while parsing following event files: {}".format(files_to_extract))
            print("Following exception occured: {}".format(e))
            continue
        
        # do the analysis on the current WFs
        # smoothing
        for i in range(len(smoothing_window)):
            if use_moving_average_instead_of_windowed_average:
                WFs[i].time, WFs[i].data = avg_smoother(WFs[i].time, WFs[i].data, smoothing_window[i])
            else:
                WFs[i].time, WFs[i].data = avg_reducer(WFs[i].time, WFs[i].data, smoothing_window[i])
        
        # calculate diff 1 and 2
        diffs = {
            'diff1': [],
            'diff2': []
        }
        for i in range(len(smoothing_window)):
            grad = np.gradient(WFs[i].data)
            diffs['diff1'].append(grad)
            diffs['diff2'].append(np.gradient(grad))
            
        # calculate max and min
        # extract data from all wavforms from this event
        WF_data['unixtime'].append(unixtime)
        for WF in WFs:
            index = WF.get_channel() - 1
            chan = "C{}".format(WF.get_channel())
            for mode in modes:
                data_for_mode = 0
                if mode == 'raw':
                    data_for_mode = WF.data
                elif mode == 'diff1':
                    data_for_mode = diffs['diff1'][index]
                elif mode == 'diff2':
                    data_for_mode = diffs['diff2'][index]
                else:
                    raise(RuntimeError("Mode not supported! Aborting!"))
                
                WF_data['{}_{}_max'.format(chan, mode)].append(data_for_mode.max())
                WF_data['{}_{}_max_time'.format(chan, mode)].append(WF.time[data_for_mode.argmax()])
                WF_data['{}_{}_min'.format(chan, mode)].append(data_for_mode.min())
                WF_data['{}_{}_min_time'.format(chan, mode)].append(WF.time[data_for_mode.argmin()])

Aproximate number of events to read: 28315.00 
Progress: 99.947 [%]	Estimated time left: 0.07 [min]	Read speed: 221.6 [Events/min]]]

In [6]:
# save data
print("Saving json to disk")
with open(json_to_write, 'w') as outfile:
    json.dump(WF_data_lists_1, outfile)
    
# convert list to np.arrays and save as pkl
print("Converting read lists to numpy arrays")
for key in WF_keys:
    WF_data[key] = np.asarray(WF_data[key])

print("Saving pkl to disk")
with open(pkl_to_write, 'wb') as f:
    pickle.dump(WF_data, f, protocol=2)

Saving json to disk


NameError: name 'WF_data_lists_1' is not defined

In [88]:
i

1

In [7]:
for key in WF_data.keys():
    print(key, len(WF_data[key]))

unixtime 28314
C1_raw_max 0
C1_raw_max_time 0
C1_raw_min 0
C1_raw_min_time 0
C1_diff1_max 0
C1_diff1_max_time 0
C1_diff1_min 0
C1_diff1_min_time 0
C1_diff2_max 0
C1_diff2_max_time 0
C1_diff2_min 0
C1_diff2_min_time 0
C2_raw_max 28314
C2_raw_max_time 28314
C2_raw_min 28314
C2_raw_min_time 28314
C2_diff1_max 28314
C2_diff1_max_time 28314
C2_diff1_min 28314
C2_diff1_min_time 28314
C2_diff2_max 28314
C2_diff2_max_time 28314
C2_diff2_min 28314
C2_diff2_min_time 28314
C3_raw_max 56628
C3_raw_max_time 56628
C3_raw_min 56628
C3_raw_min_time 56628
C3_diff1_max 56628
C3_diff1_max_time 56628
C3_diff1_min 56628
C3_diff1_min_time 56628
C3_diff2_max 56628
C3_diff2_max_time 56628
C3_diff2_min 56628
C3_diff2_min_time 56628
