# Step 0

In [None]:
import sys
import os
from os.path import join
import time
from datetime import datetime
import importlib
import numpy as np
import pandas as pd
import h5py
import imageio
from scipy import ndimage
from scipy import interpolate
import skimage
from tqdm import tqdm
from tqdm import trange
from matplotlib import pyplot as plt
from matplotlib import cm as cm
from matplotlib import colors
from matplotlib import patches
import proplot as pplt

# Local
sys.path.append('..')
from tools import energyVS06 as energy
from tools import image_processing as ip
from tools.plotting import plot_profiles
from tools.plotting import plot_image
from tools.plotting import plot_compare_images

In [None]:
pplt.rc['grid'] = False
pplt.rc['cmap.discrete'] = False
pplt.rc['cmap.sequential'] = 'viridis'

In [None]:
datadir = '/Diagnostics/Data/Measurements/scan-xxpy-image-ypdE/2022-04-29/'
filenames = os.listdir(datadir)
filenames

## Scan overview 

In [None]:
filename = '220429190854-scan-xxpy-image-ypdE'

In [None]:
file = h5py.File(join(datadir, filename + '.h5'), 'r')

# Errors and warnings from log
for i in range(file['log'].size):
    if not(file['/log'][i, 'level'] == 'INFO'.encode('utf')):
        timestr = datetime.fromtimestamp(file['/log'][0, 'timestamp']).strftime("%m/%d/%Y, %H:%M:%S")
        print(f"{timestr} {file['log'][i, 'message']}")

# Configuration data
for key in file['/config'].keys():
    print(f"{key}")
    print("--------------")
    for name in file['/config'][key].dtype.names:
        print(f"{name}: {file['config'][key][name]}")
    print()

In [None]:
# Create DataFrame of actuator start, stop, speed.
ndim, names = 3, []
for i in range(1, ndim + 1):
    names.extend([f'start{i}', f'stop{i}', f'speed{i}'])
df = pd.read_csv(join(datadir, filename + '.csv'), 
                 skiprows=25, sep='\s+', header=None, names=names)

# Extract actuator points.
act_pts = []
for i in range(1, ndim + 1):
    act_pts.append(df.loc[:, [f'start{i}', f'stop{i}']].values.ravel())
    
# Plot actuator points.
fig, axes = pplt.subplots(nrows=3, figsize=(7, 3))
for i, ax in enumerate(axes):
    ax.plot(act_pts[i], color='black')
plt.show()

In [None]:
data = file['/scandata']
for item in data.dtype.fields.items():
    print(item)

In [None]:
acts = data.dtype.names[-3:]

In [None]:
print('# iterations %i'%data[-1,'iteration'])
duration = data[-1, 'timestamp'] - data[0, 'timestamp']
print('%i points recorded over duration %.1f seconds (%.1f hours)'%(len(data), duration, duration / 3660))
print('Effective rep rate = %.2f'%(len(data) / duration))

iteration_duration = duration / data[-1, 'iteration']
print('Time per iteration: %.2f seconds'%iteration_duration)
npts_iteration = len(data) / data[-1, 'iteration']
print('Points per iteration: %.2f '%npts_iteration)

In [None]:
idx = np.arange(240)
times = [datetime.fromtimestamp(data[i, 'timestamp']) for i in range(len(data))]

In [None]:
fig, axes = pplt.subplots(nrows=3, figsize=(7, 4), spany=False, aligny=True)
for ax, act in zip(axes, acts):
    ax.plot(idx, data[idx, act], color='black')
    ax.set_ylabel(act)
plt.show()

In [None]:
dt = np.diff(data[:, 'timestamp'])
print('reprate = %.2f Hz'%(1/np.median(dt)))

# Mask long interruptions.
print('Pauses longer than 30 seconds:')
print(dt[dt > 30.0])
dt[dt > 30.0] = 0.2

duration = data[-1, 'timestamp'] - data[0, 'timestamp']
print('effective rep-rate = %.2f Hz'%(len(data) / duration))

hist,bins,pathces = plt.hist(dt,bins=21)
plt.ylabel('bin counts')
plt.xlabel('Pauses between datapoints')
plt.gca().set_yscale('log')
idx_bins = np.digitize(dt, bins)
idx_pause, = np.where(idx_bins>1)
print('most long pauses are %.2f seconds'%np.median(dt[idx_pause]))

In [None]:
# idx_plot = np.arange(0,lds,1)

# timeplot = []
# for i in idx_plot:
#     timeplot.append(datetime.fromtimestamp(ds[i,'timestamp']))

In [None]:
# item = 'bcm04'
# tmp_ = ds[:,item]
# plt.figure(figsize=[16,3])
# plt.plot(timeplot,tmp_[idx_plot],'.-'); 
# plt.title(item)
# plt.ylim([-28,-32])
# plt.grid(axis='y')

# print('Before masking, average BCM current = %.3f +- %.3f mA'%(np.mean(tmp_),np.std(tmp_)))

In [None]:
# ### Mask BCM current
# bcm_limit=20
# idx_mask2, = np.where(tmp_>-1*bcm_limit)
# print(idx_mask2)
# print('%i points masked due to bcm current < %.3f mA'%(len(idx_mask2),bcm_limit))

# plt.figure()
# plt.plot(idx_mask2,tmp_[idx_mask2],'.')
# plt.ylabel('bcm current [mA]')
# plt.xlabel('Point #')
# plt.title('Points masked due to low BCM current')

# # -- apply mask
# timemask = []
# for i in idx_mask2:
#     timemask.append(datetime.fromtimestamp(ds[i,'timestamp']))


# # -- 
# idx_plot2, = np.where(~np.isin(idx_plot,idx_mask2))
# timeplot2 = []
# for i in idx_plot2:
#     timeplot2.append(datetime.fromtimestamp(ds[i,'timestamp']))

# print('After masking, average BCM current = %.3f +- %.3f mA'%(np.mean(tmp_[idx_plot2]),np.std(tmp_[idx_plot2])))



In [None]:
# for item in sc_attrs:
#     plt.figure(figsize=[16,3])
#     plt.plot(timeplot,ds[idx_plot,item],'.-'); 
#     plt.plot(timemask,ds[idx_mask2,item],'x')
#     plt.title(item)


In [None]:
# idx_peak = np.argmax(data[:,'%s_Integral'%cam])

# plt.figure(figsize=[10,5]); 
# plt.subplot(121)
# plt.title('%s:ProfileY'%cam)
# plt.plot(data[idx_peak,'%s_ProfileY'%cam],'.-')

# plt.subplot(122)
# plt.title('%s:ProfileY'%cam)
# plt.semilogy(data[idx_peak,'%s_ProfileY'%cam]/data[idx_peak,'%s_ProfileY'%cam].max(),'.-')
# #plt.ylim([.01,1.01])

# plt.figure(figsize=[10,5]); 
# plt.subplot(121)
# plt.title('%s:ProfileX'%cam)
# plt.plot(data[idx_peak,'%s_ProfileX'%cam],'.-')

# plt.subplot(122) 
# plt.title('%s:ProfileX'%cam)
# plt.semilogy(data[idx_peak,'%s_ProfileX'%cam]/data[idx_peak,'%s_ProfileX'%cam].max(),'.-')
# #plt.ylim([.01,1.01])


# pix_max = max(data[idx_peak,'%s_Image'%cam].flatten())
# print('peak at VT04=%.3f mm, VT06=%.3f mm, HZ04=%.3f mm'%(data[idx_peak,'x_PositionSync'],
#                                                         data[idx_peak,'xp_PositionSync'],
#                                                         data[idx_peak,'y_PositionSync']))
# print('Max pixel value %i'%(pix_max))
# print('Saturation %.3f'%(data[idx_peak,'%s_Saturation'%cam]))

## Process images 

In [None]:
cam = 'cam34'
ny = 512
nx = 612

def get_image(index):
    return data[index, cam + '_Image'].reshape(ny, nx)

In [None]:
ipeak = np.argmax(data[cam + '_Integral'])

In [None]:
im = get_image(ipeak)

In [None]:
edges = dict(l=140, r=100, b=130, t=80)
im1 = ip.crop(im, **edges)

axes = plot_compare_images(im, im1)
for ax in axes[:, 0]:
    width = im.shape[1] - (edges['l'] + edges['r'])
    height = im.shape[0] - (edges['t'] + edges['b'])
    center = (edges['l'], edges['b'])
    ax.add_patch(patches.Rectangle(center, width, height, fill=False, ec='red'))
plt.show()

In [None]:
downscale_factor = 3
im = np.copy(im1)
im1 = skimage.transform.downscale_local_mean(
    im, 
    (downscale_factor, downscale_factor)
)
print(f'New image shape =', im1.shape)
print(f'im.nbytes / im_d.nbytes = {(im.nbytes / im1.nbytes):.3f}')

axes = plot_compare_images(im, im1)
axes.format(toplabels=['Original', f'Downscaled by factor {downscale_factor}'])

In [None]:
thresh = 1000
thresh_frac_peak = thresh / np.max(im)
im = np.copy(im1)
im1 = ip.thresh(im, thresh, mask=True)

axes = plot_compare_images(im, im1)

In [None]:
irow, icol = np.where(im == np.max(im))
i, j = irow[0], icol[0]
fig, axes = pplt.subplots(ncols=3, sharey=False)
axes[0].pcolormesh(im)
kws = dict(color='red', alpha=0.4)
axes[0].axhline(i, **kws)
axes[0].axvline(j, **kws)
axes[1].set_title(f'Row {i}')
axes[2].set_title(f'Column {j}')
kws = dict(color='black', lw=None)
axes[1].plot(np.arange(im.shape[1]), im[i, :] / np.max(im[i, :]), **kws)
axes[2].plot(np.arange(im.shape[0]), im[:, j] / np.max(im[:, j]), **kws)
axes[1].axhline(thresh / np.max(im[i, :]), color='black', alpha=0.1)
axes[2].axhline(thresh / np.max(im[:, j]), color='black', alpha=0.1)
axes[1:].format(yscale='log')
plt.show()

Now we process all the images.

In [None]:
images = []
for index in trange(len(data)):
    im = get_image(index)
    im = ip.crop(im, **edges)
    im = skimage.transform.downscale_local_mean(
        im, 
        (downscale_factor, downscale_factor),
    )
    # im[im < thresh] = 0  
    images.append(im)

## Divide into datasets by data-type, save new .h5 file

Create a new h5 file with three data sets: scalar (0d), waveform (1d), and image (2d). First, collect the appropriate dtypes.

In [None]:
attrs = data.dtype.names
sc_dtype, sc_attrs = [], list(attrs)
print(sc_attrs)
wf_dtype, wf_attrs = [], []
im_dtype, im_attrs = [], []
print('Moving the following columns:')
for i in reversed(range(len(attrs))):
    attr = attrs[i]
    if '_Image' in attr:
        sc_attrs.pop(i)
        im_attrs.append(attr)
        im_dtype.append((attr, data.dtype[attr]))
        print(attr)
    elif 'Profile' in attr:
        sc_attrs.pop(i)
        wf_attrs.append(attr)
        wf_dtype.append((attr, data.dtype[attr]))
        print(attr)
    else:
        sc_dtype.append((attr, data.dtype[attr]))
        
sc_dtype = np.dtype(sc_dtype)
wf_dtype = np.dtype(wf_dtype)
im_dtype = np.dtype(im_dtype)

# Override the image dtype. The original images had dtype '<i4', but they
# became floats if they were downsized.
im_dtype = np.dtype([(cam + '_Image', str(images[0].dtype), (images[0].size,))])

print('\nscalars:')
print(sc_dtype)
print('\nwaveforms:')
print(wf_dtype)
print('\nimage:')
print(im_dtype)

In [None]:
writer = h5py.File(join(datadir, 'preproc-' + filename + '.h5'), 'w')
data_sc = writer.create_dataset('scalardata', (len(data),), dtype=sc_dtype)
data_wf = writer.create_dataset('wfdata', (len(data),), dtype=wf_dtype)
data_im = writer.create_dataset('imagedata', (len(data),), dtype=im_dtype)
for i in trange(len(data)):
    for attr in sc_attrs:
        data_sc[i, attr] = data[i, attr]
    for attr in wf_attrs:
        data_wf[i, attr] = data[i, attr]
    for attr in im_attrs:
        # data_im[i, attr] = data[i, attr]
        data_im[i, attr] = images[i].ravel()
writer.close()

In [None]:
print(images[0].shape)  # I guess you'll just have to copy this to th next notebook.