In [42]:
from collections import OrderedDict
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
from functools import partial
import h5py
import multiprocessing as mp
import os
import re
import numpy as np
import time
import xarray as xr
from itertools import groupby


from calibration import parse_ids
from karabo_data import DataCollection, by_index, RunDirectory, stack_detector_data
from karabo_data.geometry2 import LPD_1MGeometry

# Evaluate dark offset

In [43]:
def dark_offset(module_number, path, *, pulse_ids=None):
    """ Process Dark data

    Parameters
    ----------
    module_number: int
        Channel number between 0, 15
    path: str
        Path to Run folder
    pulse_ids: str
        For eg. ":" to select all pulses in a train
                "start:stop:step" to select indices with certain step size
                "1,2,3" comma separated pulse index to select specific pulses
                "1,2,3, 5:10" mix of above two
        Default: all pulses ":"
        
    Return
    ------
    out: ndarray
        Shape: (n_pulses, ..., slow_scan, fast_scan)
    """

    if not path or module_number not in range(16):
        return

    pattern = f"(.+)LPD{module_number:02d}(.+)"

    files = [os.path.join(path, f) for f in os.listdir(path)
             if f.endswith('.h5') and re.match(pattern, f)]

    if not files:
        return

    run = DataCollection.from_paths(files)

    module = [key for key in run.instrument_sources
              if re.match(r"(.+)/DET/(.+):(.+)", key)]

    if len(module) != 1:
        return
    
    run = run.select([(module[0], "image.data")])
    
    pulse_ids = ":" if pulse_ids is None else pulse_ids
    pulses = parse_ids(pulse_ids)
    
    mean_image = 0
    counts = 0
    for tid, data in run.trains():
        image = np.squeeze(data[module[0]]["image.data"], axis=1) # (pulses, 1, ss, fs)
        
        if image.shape[0] == 0:
            continue
        
        if pulses != [-1]:
            image = image[pulses, ...].astype(np.float32)
        else:
            image = image.astype(np.float32)
              
        mean_image += image
        counts += 1
    
    if counts != 0:
        return mean_image / counts


In [44]:
modules = "0:16"
pulse_ids = "0:32"

highgain_dark_folder = "/gpfs/exfel/exp/FXE/201931/p900089/raw/r0004"
mediumgain_dark_folder = "/gpfs/exfel/exp/FXE/201931/p900089/raw/r0004"
lowgain_dark_folder = "/gpfs/exfel/exp/FXE/201931/p900089/raw/r0004"

module_numbers = parse_ids(modules)

dark_data = {"high":{}, "medium":{}, "low":{}}

print(module_numbers)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]


### High Gain dark evaluation

In [20]:
_dark_eval = partial(dark_offset, 
                     path=highgain_dark_folder, 
                     pulse_ids=pulse_ids)

with ProcessPoolExecutor(max_workers=len(module_numbers)) as executor:
    for modno, ret in zip(module_numbers, executor.map(_dark_eval, module_numbers)):
        dark_data["high"][modno] = ret
       

### Medium Gain dark evaluation

In [22]:
_dark_eval = partial(dark_offset, 
                     path=mediumgain_dark_folder, 
                     pulse_ids=pulse_ids)

with ProcessPoolExecutor(max_workers=len(module_numbers)) as executor:
    for modno, ret in zip(module_numbers, executor.map(_dark_eval, module_numbers)):
        dark_data["medium"][modno] = ret

### Low Gain dark evaluation

In [23]:
_dark_eval = partial(dark_offset, 
                     path=lowgain_dark_folder, 
                     pulse_ids=pulse_ids)

with ProcessPoolExecutor(max_workers=len(module_numbers)) as executor:
    for modno, ret in zip(module_numbers, executor.map(_dark_eval, module_numbers)):
        dark_data["low"][modno] = ret

# Write dark offset to file

In [27]:
dark_data_file = "test.h5"

with h5py.File(dark_data_file, "w") as f:
    for gain in dark_data.keys():
        g = f.create_group(f"entry_1/instrument/gain_{gain}")
        for modno, data in dark_data[gain].items():
            if data is not None:
                h = g.create_group(f"module_{modno}")
                h.create_dataset('data', data=data)


# Delay scan

In [None]:
def module_roi_intensity(module_number, path, *, pulse_ids=None, rois=None):
    
    if not path or module_number not in range(16):
        return

    pattern = f"(.+)LPD{module_number:02d}(.+)"

    files = [os.path.join(path, f) for f in os.listdir(path)
             if f.endswith('.h5') and re.match(pattern, f)]

    if not files:
        return

    run = DataCollection.from_paths(files)

    module = [key for key in run.instrument_sources
              if re.match(r"(.+)/DET/(.+):(.+)", key)]

    if len(module) != 1:
        return
    
    run = run.select([(module[0], "image.data")])
    
    pulse_ids = ":" if pulse_ids is None else pulse_ids
    pulses = parse_ids(pulse_ids)
    
    intensities = []
    train_ids = []
    for tid, data in run.trains():
        image = np.squeeze(data[module[0]]["image.data"], axis=1) # (pulses, 1, ss, fs)
        
        if image.shape[0] == 0:
            continue
        
        if rois is not None:
            x0, x1, y0, y1 = rois
            image = image[..., x0:x1, y0:y1]
        
        if pulses != [-1] and image.shape[0] != 0:
            image = image[pulses, ...].astype(np.float32)
        else:
            image = image.astype(np.float32)
        
        intensities.append(np.mean(image, axis=(-1,-2)))
        train_ids.append(tid)
    
    coords = {'trainId':np.array(train_ids)}
    dims = ['trainId', 'dim_0']
    data = xr.DataArray(np.stack(intensities), dims=dims, coords=coords)
    
    return data

In [None]:
modules = "14, 15"
pulse_ids = "1:10:2"
run_folder = "/gpfs/exfel/exp/FXE/201931/p900089/raw/r0004"

rois = [0, 256, 0, 256]

module_numbers = parse_ids(modules)
print(module_numbers)

In [None]:
_roi_intensity_eval = partial(module_roi_intensity, 
                              path=run_folder, 
                              pulse_ids=pulse_ids,
                              rois=rois)

roi_intensities = {}
with ProcessPoolExecutor(max_workers=len(module_numbers)) as executor:
    for modno, ret in zip(
        module_numbers, executor.map(_roi_intensity_eval, module_numbers)):
        roi_intensities[modno] = ret
       

In [None]:
roi_intensities[15]

# Get Delay arrays

In [None]:
delay_src = "SA1_XTD2_XGM/DOOCS/MAIN"
delay_prop = "beamPosition.ixPos.value"

run = RunDirectory(run_folder)

# get delay data: xarray
delay_data = run.get_array(delay_src, delay_prop)

delay_data = delay_data.expand_dims('dim_1', axis=1)
print(delay_data)

## Align delay data with ROI Intesities along TrainIds and plot


In [None]:
import  matplotlib.pyplot as plt
%matplotlib notebook

pulse = 15

for modno in module_numbers:
    roi_int, delay = xr.align(roi_intensities[modno], delay_data)
    
    s = list(zip(delay[:, 0].values, roi_int[:, pulse].values))
    roi_int_avg = []
    roi_int_std = []
    delay = []
    for key, group in groupby(sorted(s), lambda x: x[0]):
        x, y = zip(*group)
        avg, std = np.mean(np.array(y)), np.std(np.array(y))
        roi_int_avg.append(avg)
        roi_int_std.append(std)
        delay.append(x[0])        
    
    fig, ax = plt.subplots(1, 1)
    ax.errorbar(delay, roi_int_avg, yerr=roi_int_std, uplims=True, lolims=True)
    


# LPD ROI intensity with Offset and Relative gain correction

In [40]:
def lpd_corrections(sequence, path, *, pulse_ids=None, rois=None, dark_data=None):
    
    if not path:
        return

    pattern = f"(.+)LPD(.+)-S{sequence}"

    files = [os.path.join(path, f) for f in os.listdir(path)
             if f.endswith('.h5') and re.match(pattern, f)]
    if not files:
        return
    
    devices = [("*/DET/*CH0:xtdf", "image.data")]
    run = DataCollection.from_paths(files).select(devices)
    
    pulse_ids = ":" if pulse_ids is None else pulse_ids
    pulses = parse_ids(pulse_ids)
    
    out_array = None
    quad_positions = [[11.4, 299],
                      [-11.5, 8],
                      [254.5, -16],
                      [278.5, 275]]
    filename = os.path.join(
        os.getcwd(),'calibration/geometries/lpd_mar_18_axesfixed.h5')

    geom = LPD_1MGeometry.from_h5_file_and_quad_positions(
        filename, quad_positions)
    
    mean = 0
    count = 0
    intensities = []
    train_ids = []
    
    for tid, data in run.trains():
        
        def _corrections(source):
            pattern = "(.+)/DET/(.+)CH0:xtdf"
            modno = (re.match(pattern, source)).group(2)
            
            try:
                image = np.squeeze(data[source]["image.data"], axis=1)
            except KeyError as e:
                return
            
            if pulses != [-1] and image.shape[0] != 0:
                image = image[pulses, ...].astype(np.float32)
            else:
                image = image.astype(np.float32)
                        
            if dark_data is not None and image.shape[0] != 0:
                high, medium, low = \
                    dark_data["high"][modno], dark_data["medium"][modno], dark_data["low"][modno]
                
                if all([high is not None, medium is not None, low is not None]):
                    currim = np.zeros_like(image)
                    
                    # High gain
                    corrim = image - high
                    currim[image <= 4096] = corrim[image <= 4096]
                    
                    # Medium gain
                    corrim = (image - medium) * 9.85
                    currim[(image <= 8192) & (image > 4096)] = \
                        corrim[(image <= 8192) & (image >4096)]
                    
                    # Low gain
                    corrim = (image - low) * 9.85 * 7.44
                    currim[image > 8192] = corrim[image > 8192]
                    
                    image = currim
            
            data[source]["image.data"] = image
        
        with ThreadPoolExecutor(max_workers=len(data.keys())) as executor:
            for source in data.keys():
                executor.submit(_corrections, source)
        
        # assemble image        
        try:
            stacked_data = stack_detector_data(data, "image.data")  
        except (ValueError, IndexError, KeyError) as e:
            continue

        n_images = (stacked_data.shape[0], )
        if stacked_data.shape[0] == 0:
            continue
        
        image_dtype = stacked_data.dtype
        
        if out_array is None:
            out_array = geom.output_array_for_position_fast(
                extra_shape=n_images, dtype=image_dtype)
        
        assembled, centre = geom.position_all_modules(
                stacked_data, out=out_array)
        
        if rois is not None:
            x0, x1, y0, y1 = rois
            assembled = assembled[..., x0:x1, y0:y1]
            
        mean += assembled
        count += 1
        
        intensities.append(np.nanmean(assembled, axis=(-1,-2)))
        train_ids.append(tid)
    
    coords = {'trainId':np.array(train_ids)}
    dims = ['trainId', 'dim_0']
    data = xr.DataArray(np.stack(intensities), dims=dims, coords=coords)
        
    return mean / count, data


In [41]:
path = "/gpfs/exfel/exp/FXE/201931/p900089/raw/r0004"

pattern = f"(.+)LPD(.+)-S(.+).h5"
sequences = {re.match(pattern, f).group(3) for f in os.listdir(path)
             if f.endswith('.h5') and re.match(pattern, f)}

_lpd_corrections = partial(lpd_corrections, 
                           path=path,
                           pulse_ids="1:10:2"
                          )

mean = _lpd_corrections(list(sequences)[0])

# with ProcessPoolExecutor(max_workers=len(sequences)) as executor:
#     ret = executor.map(_lpd_corrections, list(sequences) )


FXE_DET_LPD1M-1/DET/4CH0:xtdf 4
FXE_DET_LPD1M-1/DET/12CH0:xtdf 12
FXE_DET_LPD1M-1/DET/7CH0:xtdf 7
FXE_DET_LPD1M-1/DET/14CH0:xtdf 14
FXE_DET_LPD1M-1/DET/13CH0:xtdf 13
FXE_DET_LPD1M-1/DET/6CH0:xtdf 6
FXE_DET_LPD1M-1/DET/1CH0:xtdf 1
FXE_DET_LPD1M-1/DET/5CH0:xtdf 5
FXE_DET_LPD1M-1/DET/15CH0:xtdfFXE_DET_LPD1M-1/DET/10CH0:xtdf 10
FXE_DET_LPD1M-1/DET/2CH0:xtdf 2
 FXE_DET_LPD1M-1/DET/3CH0:xtdf 3
15
FXE_DET_LPD1M-1/DET/11CH0:xtdf 11FXE_DET_LPD1M-1/DET/8CH0:xtdf 8

FXE_DET_LPD1M-1/DET/9CH0:xtdf 9
FXE_DET_LPD1M-1/DET/4CH0:xtdf FXE_DET_LPD1M-1/DET/12CH0:xtdf 12
FXE_DET_LPD1M-1/DET/7CH0:xtdf 7
FXE_DET_LPD1M-1/DET/14CH0:xtdf 14
4
FXE_DET_LPD1M-1/DET/13CH0:xtdf 13
FXE_DET_LPD1M-1/DET/6CH0:xtdf 6
FXE_DET_LPD1M-1/DET/1CH0:xtdf 1
FXE_DET_LPD1M-1/DET/5CH0:xtdf 5
FXE_DET_LPD1M-1/DET/15CH0:xtdf 15
FXE_DET_LPD1M-1/DET/10CH0:xtdf 10
FXE_DET_LPD1M-1/DET/2CH0:xtdf 2
FXE_DET_LPD1M-1/DET/3CH0:xtdf FXE_DET_LPD1M-1/DET/11CH0:xtdf 11
3
FXE_DET_LPD1M-1/DET/8CH0:xtdf 8
FXE_DET_LPD1M-1/DET/9CH0:xtdf 9
FXE_DET_LPD1M-1/

KeyboardInterrupt: 

In [None]:
r = RunDirectory("/gpfs/exfel/exp/FXE/201931/p900089/raw/r0004")

In [None]:
r.info()

In [None]:
r.keys_for_source("SA1_XTD2_XGM/DOOCS/MAIN")
#r.get_array("SA1_XTD2_XGM/DOOCS/MAIN", 'pulseEnergy.photonFlux.value')
# dc = r.get_data_counts("FXE_DET_LPD1M-1/DET/12CH0:xtdf", "image.data")
# dc[dc != 0]
