In [None]:
%matplotlib inline

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Processing data

This notebook provides an example for how to process `H5Scan` and `H5Data` datasets.

In [None]:
from e11 import H5Scan, H5Data
from e11.tools import t_index
from e11.process import vrange, mean, total

## Simple processing

The functions in `e11.process` are designed to be applied to one or more datasets.

In [None]:
import os
fil = os.path.join(os.getcwd(), 'example_data', 'microwave_scan.h5')
scan = H5Scan(fil)

First, we load up the raw data to decide how to process it.

In [None]:
dat = scan.array('osc_0')
osc = scan.attrs('osc_0')

# plot
fig, ax = plt.subplots()

# data
dt = 1e6 * osc['dt']
xvals = np.arange(0, len(dat[0])*dt, dt)
yvals = dat[0]
ax.plot(xvals, yvals - np.mean(yvals[:100]))
yvals = dat[80]
ax.plot(xvals, yvals - np.mean(yvals[:100]))

# background
t_bk = (0.06, 0.18)
ax.axvline(t_bk[0], c='r', lw=1, ls='--')
ax.axvline(t_bk[1], c='r', lw=1, ls='--')
# signal 0
t_sig0 = (0.465, 0.510)
ax.axvline(t_sig0[0], c='k', lw=1, ls='--')
ax.axvline(t_sig0[1], c='k', lw=1, ls='--')
# signal 1
t_sig1 = (0.77, 0.85)
ax.axvline(t_sig1[0], c='g', lw=1, ls='--')
ax.axvline(t_sig1[1], c='g', lw=1, ls='--')

# format
ax.set_xlabel('time ($\mu$s)')
ax.set_ylabel('signal (V)')

# output
plt.show()

For this example, we will calculate the mean value of the scope data in the first window using `mean()`.  This provides the background offset values.

In [None]:
# background
background = mean(scan.array('osc_0'), subset=[slice(None), slice(*t_index(t_bk, dt=dt))])
background.columns = ['offset']

We subtract the background from each trace and then calculate the total value in the second and third windows using `total()`.  The ratio between these two values is the final result.

In [None]:
# signal
bksub = np.subtract(scan.array('osc_0'), background.values)
res0 = total(bksub, subset=[slice(None), slice(*t_index(t_sig0, dt=dt))])
res0.columns = [('total_0')]
res1 = total(bksub, subset=[slice(None), slice(*t_index(t_sig1, dt=dt))])
res1.columns = [('total_1')]

# map result to microwave frequency
df = scan.df('analysis').join(res0).join(res1)
# combine the two sums
df['f2'] = (df['total_1'] / df['total_0'])
# normalise f and f2
df['f1'] = (df['f'] - df['f'].min()) / ( df['f'].max() -  df['f'].min())
df['f2'] = (df['f2'] - df['f2'].min()) / ( df['f2'].max() -  df['f2'].min())
df.head()

In [None]:
# plot
fig, ax = plt.subplots()
ax.plot(df['var'], df['f1'], label='f1')
ax.plot(df['var'], df['f2'], label='f2')
ax.set_xlabel("microwave frequency (GHz)")
ax.set_ylabel("signal (arb. units)")
ax.legend()
plt.show()

## Advanced processing

Using the `e11.process` functions as demonstrated above with `H5Scan` would work for `H5Data` datasets too, however, it is usually better to use them with `H5Data.apply()`.  This will apply the function to each group, and the results are indexed using the `squid` to keep track of where each came from. 

### Vrange

Here, we are applying the `vrange` function to measure the vertical range of array data.

In [None]:
# load data
fil = os.path.join(os.getcwd(), 'example_data', 'array_data.h5')
data = H5Data(fil)

In [None]:
rng, info = data.apply(vrange, data.squids, ['OSC_0'], info=True, name='vr')
rng.head()

In [None]:
rng.describe()

In [None]:
# information about the processing
info

In [None]:
# plot
rng.reset_index().plot(subplots=True)

#output
plt.show()

### _apply()_ with multiple datasets

In [None]:
from e11.process import stats, mean

In [None]:
avg = data.apply(mean, data.squids, ['OSC_0', 'OSC_0'], keys=['av_A', 'av_B']).head()
avg.head()

In [None]:
sta = data.apply(stats, data.squids, ['OSC_0', 'OSC_0'], keys=['A', 'B'])
sta.head()

### _apply()_ with lambda functions

The _apply()_ method can accept lambda functions that return either a _pd.Series()_ or a _pd.DataFrame()_.

In [None]:
mx = data.apply(lambda arr: pd.Series(np.max(arr, axis=1), name="max"), data.squids, 'OSC_0')
mx.head()

The input to the function must include kwargs if you call _apply()_ with any.  If you call _apply()_ with multiple datasets they are passed to the function as an unpacked list.

In [None]:
func = lambda *arr, **kwargs: pd.DataFrame(np.array([np.max(arr[0], axis=1), np.min(arr[1], axis=1)]).T, columns=kwargs["columns"])
mxs = data.apply(func, data.squids, ['OSC_0', 'OSC_0'], columns=["max_0", "min_1"])
mxs.head()