In [1]:
from collections import OrderedDict
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
from functools import partial
import h5py
import multiprocessing as mp
import os
import re
import numpy as np
import time
import xarray as xr
from itertools import groupby


from calibration import parse_ids
from karabo_data import DataCollection, by_index, RunDirectory, stack_detector_data
from karabo_data.geometry2 import LPD_1MGeometry

import matplotlib.pyplot as plt
%matplotlib notebook

In [90]:
run = RunDirectory("/gpfs/exfel/exp/FXE/201931/p900089/raw/r0055").select([("*/DET/*CH0:xtdf", "image.data")])

Series([], dtype: uint64)

In [None]:
tid, data = run.train_from_index(20)

In [None]:
data["FXE_RR_DAQ/ADC/1:network"].keys()

In [None]:
dig = data["FXE_RR_DAQ/ADC/1:network"]["digitizers.channel_3_B.raw.samples"]
dig.shape

In [None]:
%matplotlib notebook
plt.plot(np.arange(5000), dig[10000:15000])

In [None]:
digitizer_data = run.get_array("FXE_RR_DAQ/ADC/1:network", "digitizers.channel_3_B.raw.samples")

In [None]:
digitizer_data.shape

In [None]:
%matplotlib notebook

plt.plot(range(digitizer_data.shape[1]), np.abs(digitizer_data.values[0, :]))


# Evaluate dark offset

In [2]:
def dark_offset(module_number, path, *, pulse_ids=None):
    """ Process Dark data

    Parameters
    ----------
    module_number: int
        Channel number between 0, 15
    path: str
        Path to Run folder
    pulse_ids: str
        For eg. ":" to select all pulses in a train
                "start:stop:step" to select indices with certain step size
                "1,2,3" comma separated pulse index to select specific pulses
                "1,2,3, 5:10" mix of above two
        Default: all pulses ":"
        
    Return
    ------
    out: ndarray
        Shape: (n_pulses, ..., slow_scan, fast_scan)
    """

    if not path or module_number not in range(16):
        return

    pattern = f"(.+)LPD{module_number:02d}(.+)"

    files = [os.path.join(path, f) for f in os.listdir(path)
             if f.endswith('.h5') and re.match(pattern, f)]

    if not files:
        return

    run = DataCollection.from_paths(files)

    module = [key for key in run.instrument_sources
              if re.match(r"(.+)/DET/(.+):(.+)", key)]

    if len(module) != 1:
        return
    
    run = run.select([(module[0], "image.data")])
    
    pulse_ids = ":" if pulse_ids is None else pulse_ids
    pulses = parse_ids(pulse_ids)
    
    mean_image = 0
    counts = 0
    for tid, data in run.trains():
        image = np.squeeze(data[module[0]]["image.data"], axis=1) # (pulses, 1, ss, fs)
        
        if image.shape[0] == 0:
            continue
        
        if pulses != [-1]:
            image = image[pulses, ...].astype(np.float32)
        else:
            image = image.astype(np.float32)
              
        mean_image += image
        counts += 1
    
    if counts != 0:
        return mean_image / counts


In [3]:
modules = "0:16"
pulse_ids = "6, 7, 8, 9, 10"

highgain_dark_folder = "/gpfs/exfel/exp/FXE/201931/p900089/raw/r0049"
mediumgain_dark_folder = "/gpfs/exfel/exp/FXE/201931/p900089/raw/r0050"
lowgain_dark_folder = "/gpfs/exfel/exp/FXE/201931/p900089/raw/r0051"

module_numbers = parse_ids(modules)

dark_data = {"high":{}, "medium":{}, "low":{}}

print(module_numbers)
print(parse_ids(pulse_ids))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
[6, 7, 8, 9, 10]


### High Gain dark evaluation

In [4]:
_dark_eval = partial(dark_offset, 
                     path=highgain_dark_folder, 
                     pulse_ids=pulse_ids)

t0 = time.perf_counter()

with ProcessPoolExecutor(max_workers=len(module_numbers)) as executor:
    for modno, ret in zip(module_numbers, executor.map(_dark_eval, module_numbers)):
        dark_data["high"][modno] = ret

print(f"Time to evaluate roi intensities: {time.perf_counter() - t0}")     

Time to evaluate roi intensities: 70.75002278015018


In [12]:
dark_data["high"][14].shape

(5, 256, 256)

### Medium Gain dark evaluation

In [5]:
_dark_eval = partial(dark_offset, 
                     path=mediumgain_dark_folder, 
                     pulse_ids=pulse_ids)

t0 = time.perf_counter()
with ProcessPoolExecutor(max_workers=len(module_numbers)) as executor:
    for modno, ret in zip(module_numbers, executor.map(_dark_eval, module_numbers)):
        dark_data["medium"][modno] = ret

print(f"Time to evaluate roi intensities: {time.perf_counter() - t0}")

Time to evaluate roi intensities: 70.08810187131166


### Low Gain dark evaluation

In [7]:
_dark_eval = partial(dark_offset, 
                     path=lowgain_dark_folder, 
                     pulse_ids=pulse_ids)

t0 = time.perf_counter()

with ProcessPoolExecutor(max_workers=len(module_numbers)) as executor:
    for modno, ret in zip(module_numbers, executor.map(_dark_eval, module_numbers)):
        dark_data["low"][modno] = ret

print(f"Time to evaluate roi intensities: {time.perf_counter() - t0}")

Time to evaluate roi intensities: 70.8599690310657


# Write dark offset to file

In [None]:
dark_data_file = "/home/kamile/data/dark_data.h5"

with h5py.File(dark_data_file, "w") as f:
    for gain in dark_data.keys():
        g = f.create_group(f"entry_1/instrument/gain_{gain}")
        for modno, data in dark_data[gain].items():
            if data is not None:
                h = g.create_group(f"module_{modno}")
                h.create_dataset('data', data=data)


In [8]:
%matplotlib notebook

plt.imshow(dark_data["high"][15][2])

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x2b54fa3be550>

# Delay scan

In [9]:
def module_roi_intensity(module_number, path, *, pulse_ids=None, rois=None):
    
    if not path or module_number not in range(16):
        return

    pattern = f"(.+)LPD{module_number:02d}(.+)"

    files = [os.path.join(path, f) for f in os.listdir(path)
             if f.endswith('.h5') and re.match(pattern, f)]

    if not files:
        return

    run = DataCollection.from_paths(files)

    module = [key for key in run.instrument_sources
              if re.match(r"(.+)/DET/(.+):(.+)", key)]

    if len(module) != 1:
        return
    
    run = run.select([(module[0], "image.data")])
    
    pulse_ids = ":" if pulse_ids is None else pulse_ids
    pulses = parse_ids(pulse_ids)
    
    intensities = []
    train_ids = []
    
    for tid, data in run.trains():
        image = np.squeeze(data[module[0]]["image.data"], axis=1) # (pulses, 1, ss, fs)
        
        if image.shape[0] == 0:
            continue
        
        if rois is not None:
            x0, x1, y0, y1 = rois
            image = image[..., x0:x1, y0:y1]
        
        if pulses != [-1] and image.shape[0] != 0:
            image = image[pulses, ...].astype(np.float32)
        else:
            image = image.astype(np.float32)
        
        intensities.append(np.mean(image, axis=(-1,-2)))
        train_ids.append(tid)
    
    if not intensities or not train_ids:
        return
    
    coords = {'trainId':np.array(train_ids)}
    dims = ['trainId', 'dim_0']
    data = xr.DataArray(np.stack(intensities), dims=dims, coords=coords)

    return data

In [131]:
modules = "0:16"
pulse_ids = "6, 7, 8, 9, 10"

rois = [32, 64, 0, 128]

highgain_run_folder = "/gpfs/exfel/exp/FXE/201931/p900089/raw/r0056"
mediumgain_run_folder = "/gpfs/exfel/exp/FXE/201931/p900089/raw/r0057"
lowgain_run_folder = "/gpfs/exfel/exp/FXE/201931/p900089/raw/r0058"

module_numbers = parse_ids(modules)

roi_intensities = {"high":{}, "medium":{}, "low":{}}


### High Gain ROI intensities

In [132]:
_roi_intensity_eval = partial(module_roi_intensity, 
                              path=highgain_run_folder, 
                              pulse_ids=pulse_ids,
                              rois=rois)
t0 = time.perf_counter()

with ProcessPoolExecutor(max_workers=len(module_numbers)) as executor:
    for modno, ret in zip(
        module_numbers, executor.map(_roi_intensity_eval, module_numbers)):
        roi_intensities["high"][modno] = ret
        
print(f"Time to evaluate roi intensities: {time.perf_counter() - t0}")

Time to evaluate roi intensities: 657.9133541956544


### Medium Gain ROI intensities

In [133]:
_roi_intensity_eval = partial(module_roi_intensity, 
                              path=mediumgain_run_folder, 
                              pulse_ids=pulse_ids,
                              rois=rois)
t0 = time.perf_counter()

with ProcessPoolExecutor(max_workers=len(module_numbers)) as executor:
    for modno, ret in zip(
        module_numbers, executor.map(_roi_intensity_eval, module_numbers)):
        roi_intensities["medium"][modno] = ret
        
print(f"Time to evaluate roi intensities: {time.perf_counter() - t0}")

Time to evaluate roi intensities: 744.2954788058996


### Low Gain ROI intensities

In [134]:
_roi_intensity_eval = partial(module_roi_intensity, 
                              path=lowgain_run_folder, 
                              pulse_ids=pulse_ids,
                              rois=rois)
t0 = time.perf_counter()

with ProcessPoolExecutor(max_workers=len(module_numbers)) as executor:
    for modno, ret in zip(
        module_numbers, executor.map(_roi_intensity_eval, module_numbers)):
        roi_intensities["low"][modno] = ret
        
print(f"Time to evaluate roi intensities: {time.perf_counter() - t0}")
       

Time to evaluate roi intensities: 672.3823870234191


### Write RoI intensities to file

In [None]:
roi_intensities_file = "/home/kamile/data/roi_intensities.h5"


with h5py.File(roi_intensities_file, "w") as f:
    for gain in roi_intensities.keys():
        g = f.create_group(f"entry_1/instrument/gain_{gain}")
        for modno, data in roi_intensities[gain].items():
            if data is not None:
                h = g.create_group(f"module_{modno}")
                h.create_dataset('trainId', data=data['trainId'])
                h.create_dataset('data', data=data)


# Get Delay arrays

In [135]:
delay_src = "FXE_RR_SYS/TSYS/UTC-1-S3"
delay_prop = "backTrg3.delay.value"

delay_data = {}


# High Gain delay
run = RunDirectory(highgain_run_folder)
# get delay data: xarray
delay_data['high'] = run.get_array(delay_src, delay_prop)
delay_data['high'] = delay_data['high'].expand_dims('dim_1', axis=1)


# Medium Gain delay
run = RunDirectory(mediumgain_run_folder)
# get delay data: xarray
delay_data['medium'] = run.get_array(delay_src, delay_prop)
delay_data['medium'] = delay_data['medium'].expand_dims('dim_1', axis=1)

# Low Gain delay
run = RunDirectory(lowgain_run_folder)
# get delay data: xarray
delay_data['low'] = run.get_array(delay_src, delay_prop)
delay_data['low'] = delay_data['low'].expand_dims('dim_1', axis=1)


print(delay_data['high'])

<xarray.DataArray (trainId: 4720, dim_1: 1)>
array([[6381521],
       [6381521],
       [6381521],
       ...,
       [6381521],
       [6381521],
       [6381521]], dtype=int32)
Coordinates:
  * trainId  (trainId) uint64 589035425 589035426 ... 589040143 589040144
Dimensions without coordinates: dim_1


In [None]:
delay_data.values[:].shape
%matplotlib notebook
plt.plot(range(len(delay_data.values[:, 0])), delay_data.values[:,0], '-o'  )

## Align delay data with ROI Intesities along TrainIds and plot


In [136]:
import  matplotlib.pyplot as plt
%matplotlib notebook

pulse = 2

for gain in roi_intensities.keys():
    fig, ax = plt.subplots(1, 1)
    for modno in module_numbers:
        if roi_intensities[gain][modno] is not None:
            roi_int, delay = xr.align(roi_intensities[gain][modno], delay_data[gain])

            s = list(zip(delay[:, 0].values, roi_int[:, pulse].values))
            roi_int_avg = []
            roi_int_std = []
            delay = []
            for key, group in groupby(sorted(s), lambda x: x[0]):
                x, y = zip(*group)
                avg, std = np.mean(np.array(y)), np.std(np.array(y))
                roi_int_avg.append(avg)
                roi_int_std.append(std)
                delay.append(x[0])        
            
            #ax.errorbar(delay, roi_int_avg, yerr=roi_int_std, uplims=True, lolims=True)
            ax.plot(delay, roi_int_avg, '-o', label=f"Mod: {modno}")
            ax.legend()
            ax.set_title(f"Gain: {gain}")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# LPD ROI intensity with Offset and Relative gain correction

In [250]:
def lpd_corrections(sequence, path, *, pulse_ids=None, rois=None, dark_data=None):

    if not path:
        return

    pattern = f"(.+)LPD(.+)-S{sequence}"

    files = [os.path.join(path, f) for f in os.listdir(path)
             if f.endswith('.h5') and re.match(pattern, f)]
    if not files:
        return
    
    devices = [("*/DET/*CH0:xtdf", "image.data")]
    run = DataCollection.from_paths(files).select(devices)
    
    pulse_ids = ":" if pulse_ids is None else pulse_ids
    pulses = parse_ids(pulse_ids)
    
    out_array = None
    quad_positions = [[11.4, 299],
                      [-11.5, 8],
                      [254.5, -16],
                      [278.5, 275]]
    filename = os.path.join(
        os.getcwd(),'calibration/geometries/lpd_mar_18_axesfixed.h5')

    geom = LPD_1MGeometry.from_h5_file_and_quad_positions(
        filename, quad_positions)
    
    mean = 0
    count = 0
    intensities = []
    train_ids = []
    
    for tid, data in run.trains():
        
        def _corrections(source):
            pattern = "(.+)/DET/(.+)CH0:xtdf"
            modno = int((re.match(pattern, source)).group(2).strip())
            try:
                image = np.squeeze(data[source]["image.data"], axis=1)
            except KeyError as e:
                return

            if pulses != [-1] and image.shape[0] != 0:
                image = image[pulses, ...].astype(np.float32)
            else:
                image = image.astype(np.float32)
            
            if dark_data is not None and image.shape[0] != 0:
                high = dark_data["high"][modno]
                medium = dark_data["medium"][modno]
                low = dark_data["low"][modno]
                if all([high is not None, medium is not None, low is not None]):
                    currim = np.zeros_like(image)
                    
                    # High gain
                    corrim = image - high
                    currim[image <= 4096] = corrim[image <= 4096]
                    
                    # Medium gain
                    corrim = (image - medium) * 9.85
                    currim[(image <= 8192) & (image > 4096)] = \
                        corrim[(image <= 8192) & (image >4096)]
                    
                    # Low gain
                    corrim = (image - low) * 9.85 * 7.44
                    currim[image > 8192] = corrim[image > 8192]
                    
                    image = currim
            
            data[source]["image.data"] = image
        
        with ThreadPoolExecutor(max_workers=len(data.keys())) as executor:
            for source in data.keys():
                executor.submit(_corrections, source)
        
        # assemble image        
        try:
            stacked_data = stack_detector_data(data, "image.data")  
        except (ValueError, IndexError, KeyError) as e:
            continue

        n_images = (stacked_data.shape[0], )
        if stacked_data.shape[0] == 0:
            continue
        
        image_dtype = stacked_data.dtype
        
        if out_array is None:
            out_array = geom.output_array_for_position_fast(
                extra_shape=n_images, dtype=image_dtype)
        
        assembled, centre = geom.position_all_modules(
                stacked_data, out=out_array)
        
        assembled[np.isnan(assembled)] = 0
        mean += assembled
        count += 1
        
        if rois is not None:
            x0, x1, y0, y1 = rois
            assembled = assembled[..., x0:x1, y0:y1]
            
        intensities.append(np.nanmean(assembled, axis=(-1,-2)))
        train_ids.append(tid)
    
    if not intensities or not train_ids:
        return
    coords = {'trainId':np.array(train_ids)}
    dims = ['trainId', 'dim_0']
    data = xr.DataArray(np.stack(intensities), dims=dims, coords=coords)
        
    return mean / count, data


### To evaluate mean_lpd image and roi intensities vs trainids

In [251]:
lpd_image_correlations_folder = "/gpfs/exfel/exp/FXE/201931/p900089/raw/r0090"

rois = []



pattern = f"(.+)LPD(.+)-S(.+).h5"
sequences = {re.match(pattern, f).group(3) for f in os.listdir(path)
             if f.endswith('.h5') and re.match(pattern, f)}
_lpd_corrections = partial(lpd_corrections, 
                           path=path,
                           pulse_ids="6:11",
                           dark_data=dark_data,
                          )
image = []
intensity = []
with ProcessPoolExecutor(max_workers=len(sequences)) as executor:
    for mean, data in executor.map(_lpd_corrections, list(sequences)):
         if mean is not None and data is not None:
                image.append(mean)
                intensity.append(data)

mean_image = np.nanmean(np.stack(image), axis=0)
lpd_intensities = xr.concat(intensity, dim='trainId')



In [252]:
lpd_intensities

<xarray.DataArray (trainId: 3725, dim_0: 5)>
array([[439.13312, 440.0693 , 433.73245, 438.47867, 475.93268],
       [428.9036 , 418.84238, 423.2371 , 440.62302, 458.4445 ],
       [423.62448, 422.60703, 424.29855, 442.37015, 449.45065],
       ...,
       [456.6185 , 445.8643 , 486.7384 , 479.55118, 492.7083 ],
       [455.82074, 450.44446, 478.12512, 460.72464, 510.3427 ],
       [468.66003, 480.69946, 470.48178, 469.98798, 514.62366]],
      dtype=float32)
Coordinates:
  * trainId  (trainId) int64 589871004 589871005 ... 589871770 589871771
Dimensions without coordinates: dim_0

In [263]:
mean_image.shape

(5, 1203, 1105)

In [275]:
%matplotlib notebook
plt.imshow(mean_image[0], vmin=-100, vmax=100)

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x2b557a6679e8>

# Digitizer data

In [187]:
from scipy.signal import find_peaks
from scipy.ndimage import gaussian_filter

In [266]:
run = RunDirectory(lpd_image_correlations_folder)

digitizer_source = "FXE_RR_DAQ/ADC/1:network"
digitizer_channel = "digitizers.channel_4_D.raw.samples"

digitizer_data = np.abs(run.get_array(digitizer_source, digitizer_channel))   

digitizer_data


<xarray.DataArray (trainId: 3725, dim_0: 600000)>
array([[14, 15, 14, ..., 14, 12, 15],
       [18, 18, 18, ..., 16, 13, 15],
       [14, 14, 15, ..., 12, 15, 13],
       ...,
       [15, 14, 15, ..., 15, 15, 14],
       [17, 17, 20, ..., 13, 12, 12],
       [19, 18, 19, ..., 12, 12, 11]], dtype=int16)
Coordinates:
  * trainId  (trainId) uint64 589869724 589869725 ... 589873447 589873448
Dimensions without coordinates: dim_0

### Find optimum height and distance to find peaks which will be used to evaluate normalizer

In [267]:
filtered = gaussian_filter(np.mean(digitizer_data.values[:, :], axis=0) , 1.5)
peaks, _ = find_peaks(filtered, height=100, distance=2000)
len(peaks)

16

In [268]:
%matplotlib notebook

fig, ax = plt.subplots(1, 1)
ax.plot(digitizer_data.dim_0, np.mean(digitizer_data.values[:, :], axis=0)  )
ax.plot(digitizer_data.dim_0[peaks], np.mean(digitizer_data.values[:, :], axis=0)[peaks], 'o')
ax.plot(digitizer_data.dim_0, filtered  )

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x2b557a563320>]

### Evaluate area under curve around peaks in digitizer data

In [269]:
def digitizer_normalizer(digitizer_data, height=100, distance=100):
    num_train = digitizer_data['trainId'].shape[0]
    
    data = np.mean(digitizer_data.values[:, :], axis=0)
    filtered = gaussian_filter(data, 1.5)
    peaks, _ = find_peaks(data, height=height, distance=distance)
    for train in range(num_train):
        normalizer_train = []
        for peak in peaks:
            integral = np.sum(digitizer_data.values[train, :][peak-50:peak+50])
            normalizer_train.append(integral)

        normalizer.append(np.stack(normalizer_train) )
        
    coords = {'trainId':digitizer_data['trainId'].values}
    dims = ['trainId', 'dim_1']
    return xr.DataArray(np.stack(normalizer), dims=dims, coords=coords)
        

auc = digitizer_normalizer(digitizer_data, height=100, distance=2000)
        

In [270]:
auc

<xarray.DataArray (trainId: 3725, dim_1: 16)>
array([[38704, 40686, 36099, ..., 20365, 20125, 19002],
       [35858, 34791, 35554, ..., 17171, 17465, 14955],
       [32208, 33249, 34030, ..., 16297, 21028, 15427],
       ...,
       [35394, 32579, 36311, ..., 10307, 11471, 10644],
       [36171, 31239, 33306, ..., 20946, 22491, 20647],
       [39996, 34952, 33916, ..., 19453, 19635, 16431]])
Coordinates:
  * trainId  (trainId) uint64 589869724 589869725 ... 589873447 589873448
Dimensions without coordinates: dim_1

### Align LPD intensity xarray and AUC (area under curve from digitizer data) and plot

In [274]:
pulse = 1


fig, ax = plt.subplots(1, 1)

lpd_intensities, auc = xr.align(lpd_intensities, auc)

ax.plot(lpd_intensities[:, pulse], auc[:, pulse], 'o', label=f"Pulse index: {pulse}")
ax.set_xlabel("AUC digitizer")
ax.set_ylabel("LPD ROI intensity")
ax.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x2b557c804fd0>