# Step 0

In [None]:
import sys
import os
from os.path import join
import time
from datetime import datetime
import importlib
import numpy as np
import pandas as pd
import h5py
import imageio
from scipy import ndimage
from scipy import interpolate
import skimage
from tqdm import tqdm
from tqdm import trange
from matplotlib import pyplot as plt
from matplotlib import cm as cm
from matplotlib import colors
from matplotlib import patches
import proplot as pplt

import energyVS06 as energy
from plotting import plot_profiles
from plotting import plot_image
from plotting import thresh_image

In [None]:
pplt.rc['grid'] = False
pplt.rc['cmap.discrete'] = False
pplt.rc['cmap.sequential'] = 'viridis'

In [None]:
cam = 'cam34'
datadir = '/Diagnostics/Data/Measurements/scan-xxpy-image-ypdE/2022-04-29/'
filenames = os.listdir(datadir)
filenames

In [None]:
filename = '220429190854-scan-xxpy-image-ypdE'

ndim, names = 3, []
for i in range(1, ndim + 1):
    names.extend([f'start{i}', f'stop{i}', f'speed{i}'])

df = pd.read_csv(join(datadir, filename + '.csv'), 
                 skiprows=25, sep='\s+', header=None, names=names)

act_pts = []
for i in range(1, ndim + 1):
    act_pts.append(df.loc[:, [f'start{i}', f'stop{i}']].values.ravel())
    
fig, axes = pplt.subplots(nrows=3, figsize=(7, 3))
for i, ax in enumerate(axes):
    ax.plot(act_pts[i], color='black')
plt.show()

In [None]:
file = h5py.File(join(datadir, filename + '.h5'), 'r')

# Errors and warnings from log
for i in range(file['log'].size):
    if not(file['/log'][i, 'level'] == 'INFO'.encode('utf')):
        timestr = datetime.fromtimestamp(file['/log'][0, 'timestamp']).strftime("%m/%d/%Y, %H:%M:%S")
        print(f"{timestr} {file['log'][i, 'message']}")

# Configuration data
for key in file['/config'].keys():
    print(f"{key}")
    print("--------------")
    for name in file['/config'][key].dtype.names:
        print(f"{name}: {file['config'][key][name]}")
    print()

In [None]:
data = file['/scandata']
attrs = data.dtype.names
sc_dtype = []
sc_attrs = list(attrs)
wf_dtype = []
wf_attrs = []
im_dtype = []
im_attrs = []
print('Moving the following columns:')
for i in reversed(range(len(attrs))):
    if '_Image' in attrs[i]:
        sc_attrs.pop(i)
        im_attrs.append(attrs[i])
        im_dtype.append((attrs[i], data.dtype[attrs[i]]))
        print(attrs[i])
    elif 'Profile' in attrs[i]:
        sc_attrs.pop(i)
        wf_attrs.append(attrs[i])
        wf_dtype.append((attrs[i], data.dtype[attrs[i]]))
        print(attrs[i])
    else:
        sc_dtype.append((attrs[i], data.dtype[attrs[i]]))
        
sc_dtype = np.dtype(sc_dtype)
wf_dtype = np.dtype(wf_dtype)
im_dtype = np.dtype(im_dtype)

print('\nscalars:')
print(sc_dtype)
print('\nwaveforms:')
print(wf_dtype)
print('\nimage:')
print(im_dtype)

In [None]:
acts = sc_attrs[5:]
print('Actuators:')
print(acts)

sdiag = list(np.array(sc_attrs[2:5]))
print('Scalar diagnostics:')
print(sdiag)

In [None]:
print('# iterations %i'%data[-1,'iteration'])
duration = data[-1, 'timestamp'] - data[0, 'timestamp']
print('%i points recorded over duration %.1f secondata (%.1f hours)'%(len(data),duration,duration/60/60))
print('Effective rep rate = %.2f'%(len(data) / duration))

iteration_duration = duration / data[-1, 'iteration']
print('Time per iteration: %.2f secondata'%iteration_duration)
npts_iteration = len(data) / data[-1, 'iteration']
print('Points per iteration: %.2f '%npts_iteration)

In [None]:
idx = np.arange(240)
times = [datetime.fromtimestamp(data[i, 'timestamp']) for i in range(len(data))]

In [None]:
fig, axes = pplt.subplots(nrows=3, figsize=(7, 4), spany=False, aligny=True)
for ax, act in zip(axes, acts):
    ax.plot(idx, data[idx, act], color='black')
    ax.set_ylabel(act)
plt.show()

In [None]:
dt = np.diff(data[:,'timestamp'])
print('reprate = %.2f Hz'%(1/np.median(dt)))

# -- mask out a long interruption
print('Pauses longer than 30 seconds:')
print(dt[dt > 30.0])
dt[dt > 30.0] = 0.2

duration = data[-1, 'timestamp'] - data[0, 'timestamp']
print('effective rep-rate = %.2f Hz'%(len(data) / duration))

hist,bins,pathces = plt.hist(dt,bins=21)
plt.ylabel('bin counts')
plt.xlabel('Pauses between datapoints')
plt.gca().set_yscale('log')
idx_bins = np.digitize(dt, bins)

idx_pause, = np.where(idx_bins>1)

print('most long pauses are %.2f seconds'%np.median(dt[idx_pause]))

In [None]:
idx_plot = np.arange(0,lds,1)

timeplot = []
for i in idx_plot:
    timeplot.append(datetime.fromtimestamp(ds[i,'timestamp']))

In [None]:
item = 'bcm04'
tmp_ = ds[:,item]
plt.figure(figsize=[16,3])
plt.plot(timeplot,tmp_[idx_plot],'.-'); 
plt.title(item)
plt.ylim([-28,-32])
plt.grid(axis='y')

print('Before masking, average BCM current = %.3f +- %.3f mA'%(np.mean(tmp_),np.std(tmp_)))

In [None]:
### Mask BCM current
bcm_limit=20
idx_mask2, = np.where(tmp_>-1*bcm_limit)
print(idx_mask2)
print('%i points masked due to bcm current < %.3f mA'%(len(idx_mask2),bcm_limit))

plt.figure()
plt.plot(idx_mask2,tmp_[idx_mask2],'.')
plt.ylabel('bcm current [mA]')
plt.xlabel('Point #')
plt.title('Points masked due to low BCM current')

# -- apply mask
timemask = []
for i in idx_mask2:
    timemask.append(datetime.fromtimestamp(ds[i,'timestamp']))


# -- 
idx_plot2, = np.where(~np.isin(idx_plot,idx_mask2))
timeplot2 = []
for i in idx_plot2:
    timeplot2.append(datetime.fromtimestamp(ds[i,'timestamp']))

print('After masking, average BCM current = %.3f +- %.3f mA'%(np.mean(tmp_[idx_plot2]),np.std(tmp_[idx_plot2])))



In [None]:
for item in sc_attrs:
    plt.figure(figsize=[16,3])
    plt.plot(timeplot,ds[idx_plot,item],'.-'); 
    plt.plot(timemask,ds[idx_mask2,item],'x')
    plt.title(item)


In [None]:
idx_peak = np.argmax(ds[:,'%s_Integral'%camname])

plt.figure(figsize=[10,5]); 
plt.subplot(121)
plt.title('%s:ProfileY'%camname)
plt.plot(ds[idx_peak,'%s_ProfileY'%camname],'.-')

plt.subplot(122)
plt.title('%s:ProfileY'%camname)
plt.semilogy(ds[idx_peak,'%s_ProfileY'%camname]/ds[idx_peak,'%s_ProfileY'%camname].max(),'.-')
#plt.ylim([.01,1.01])

plt.figure(figsize=[10,5]); 
plt.subplot(121)
plt.title('%s:ProfileX'%camname)
plt.plot(ds[idx_peak,'%s_ProfileX'%camname],'.-')

plt.subplot(122) 
plt.title('%s:ProfileX'%camname)
plt.semilogy(ds[idx_peak,'%s_ProfileX'%camname]/ds[idx_peak,'%s_ProfileX'%camname].max(),'.-')
#plt.ylim([.01,1.01])


pix_max = max(ds[idx_peak,'%s_Image'%camname].flatten())
print('peak at VT04=%.3f mm, VT06=%.3f mm, HZ04=%.3f mm'%(ds[idx_peak,'x_PositionSync'],
                                                        ds[idx_peak,'xp_PositionSync'],
                                                        ds[idx_peak,'y_PositionSync']))
print('Max pixel value %i'%(pix_max))
print('Saturation %.3f'%(ds[idx_peak,'%s_Saturation'%camname]))

In [None]:
idx_peak

# Divide into datasets by data-type, save new .h5 file


In [None]:
writer = h5py.File(join(datadir, 'preproc-'+ filename + '.h5'), 'w')
data_sc = writer.create_dataset('scalar_data', (len(data),), dtype=sc_dtype)
data_wf = writer.create_dataset('waveform_data', (len(data),), dtype=wf_dtype)
data_im = writer.create_dataset('image_data', (len(data),), dtype=im_dtype)
for i in trange(len(data)):
    for attr in sc_attrs:
        data_sc[i, attr] = data[i, attr]
    for attr in wf_attrs:
        data_wf[i, attr] = data[i, attr]
    for attr in im_attrs:
        data_im[i, attr] = data[i, attr]
writer.close()