# 2024C1-02 SMI WAXS TReXS processing notebook 

## Imports

In [None]:
import pathlib
import os
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
from PIL import Image
from tqdm.auto import tqdm 

import fabio
from smi_analysis import SMI_beamline

## Define paths & functions

In [None]:
-33955 - (-24956)

In [None]:
# Define a sample id to sample name dictionary
# sn = {
#     14: 'PM6_CB',
#     17: 'PM6_1CN-CB',
#     18: 'PM6_5CN-CB',
#     21: 'PM6_p5CN-CB',
#     22: 'PM6-Y6_CB',
#     23: 'PM6-Y6BO_CB',
#     26: 'PM6_CF',
#     29: 'PM6_1CN-CF',
#     30: 'PM6_5CN-CF',
#     33: 'PM6_p5CN-CF',
#     34: 'PM6-Y6_CF',
#     35: 'PM6-Y6BO_CF',
#     1: 'BareSiN_01',
#     3: 'BareSiN_03'
# }

In [None]:
# Define a sample id to sample name dictionary
sample_ids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
sample_names = ['PM6_10CN-CB_Si', 'PM6_10CN-CF_Si', 'PM6_CB_Si', 'PM6_CF_Si', 'PM6_p5CN-CB_Si', 'PM6_p5CN-CF_Si', 
                'PM6_p5CN-2CF-3CB_Si', 'PM6-Y6BO_CB_Si', 'PM6-Y6BO_p5CN-CB_Si', 'PM6-Y6_CB_Si', 'PM6-Y6_p5CN-CB_Si', 
                'Y6BO_CF_Si', 'Y6BO_p5CN-CF_Si', 'Y6_CF_Si', 'Y6_p5CN-CF_Si', 'PM6_5CN-CB_Si']

sn = {}
for k, v in zip(sample_ids, sample_names):
    sn[k] = v

sn

In [None]:
propPath = pathlib.Path('/nsls2/data/smi/proposals/2024-1/pass-313064')

rawPath = propPath.joinpath('900KW')

# analysisPath = pathlib.Path('/nsls2/users/alevin/rsoxs_suite/sst1_notebooks/SMI_tender_scattering/analysis_02')
# reducedPath = analysisPath.joinpath('reduced_waxs')

In [None]:
rawPath.exists()

### SMI function

In [None]:
def giwaxs_SMI_numpy_loading_wa0wa20wa40(path, filename_sublists):
    """
    Function adapted from Guillames SMI notebooks to process raw giwaxs tiffs:
    
    Returns filename_wa0_list, recip_list, recip_extents, caked_list, caked_extents
    """
    
    # Geometries, esc.
    geometry = 'Reflection'
    ai = 0.80
    # ai = 1.10
    bs_kind = None
    detector_waxs = 'Pilatus900kw'
    sdd_waxs = 277
    center_waxs = [95, 1256]
    bs_pos_waxs = [[97, 1070], [0, 0], [0, 0]]
  
    filename_wa0_list = []
    recip_list = []
    recip_extents = []
    caked_list = []
    caked_extents = []
    for dat in tqdm(filename_sublists, desc='Processing tiffs'):        
        idx = dat[0].find('eV')
        energy = 0.001*float(dat[0][idx-7:idx])
        wav = 1E-10 * (12.398/energy)

        waxs_angle = []
        for da in dat:
            idx2 = da.find('wa')
            idx3 = da.find('_bpm')
            waxs_angle = waxs_angle + [np.deg2rad(float(da[idx2+2:idx3]))]

        alphai=np.deg2rad(ai)

        #This part is to stitch the data
        SMI_waxs = SMI_beamline.SMI_geometry(geometry = geometry,
                                             detector = detector_waxs,
                                             sdd = sdd_waxs,
                                             wav = wav,
                                             alphai = alphai,
                                             center = center_waxs,
                                             bs_pos = bs_pos_waxs,
                                             det_angles = waxs_angle,
                                             bs_kind = None)


        SMI_waxs.open_data(path, dat)

        SMI_waxs.stitching_data(interp_factor=3)

        SMI_waxs.caking()

        filename_wa0_list.append(dat[0])
        recip_list.append(SMI_waxs.img_st)
        recip_extents.append([SMI_waxs.qp[0], SMI_waxs.qp[-1], SMI_waxs.qz[0], SMI_waxs.qz[-1]])

        caked_list.append(SMI_waxs.cake)
        caked_extents.append([SMI_waxs.q_cake[0], SMI_waxs.q_cake[-1], SMI_waxs.chi_cake[0], SMI_waxs.chi_cake[-1]])

    return filename_wa0_list, recip_list, recip_extents, caked_list, caked_extents

In [None]:
def giwaxs_SMI_numpy_loading_wa0wa20(path, filename_sublists):
    """
    Function adapted from Guillames SMI notebooks to process raw giwaxs tiffs:
    
    Returns filename_wa0_list, recip_list, recip_extents, caked_list, caked_extents
    """
    
    # Geometries, esc.
    geometry = 'Reflection'
    bs_kind = None
    detector_waxs = 'Pilatus900kw'
    sdd_waxs = 277
    center_waxs = [95, 1256]
    bs_pos_waxs = [[97, 1070], [0, 0], [0, 0]]
  
    filename_wa0_list = []
    recip_list = []
    recip_extents = []
    caked_list = []
    caked_extents = []
    for dat in tqdm(filename_sublists, desc='Processing tiffs'):        
        idx = dat[0].find('eV')
        energy = 0.001*float(dat[0][idx-7:idx])
        wav = 1E-10 * (12.398/energy)

        waxs_angle = []
        for da in dat:
            idx2 = da.find('wa')
            idx3 = da.find('_bpm')
            waxs_angle = waxs_angle + [np.deg2rad(float(da[idx2+2:idx3]))]

        alphai=np.deg2rad(0.8)

        #This part is to stitch the data
        SMI_waxs = SMI_beamline.SMI_geometry(geometry = geometry,
                                             detector = detector_waxs,
                                             sdd = sdd_waxs,
                                             wav = wav,
                                             alphai = alphai,
                                             center = center_waxs,
                                             bs_pos = bs_pos_waxs,
                                             det_angles = waxs_angle,
                                             bs_kind = None)


        SMI_waxs.open_data(path, dat)

        SMI_waxs.stitching_data(interp_factor=3)

        SMI_waxs.caking()

        filename_wa0_list.append(dat[0])
        recip_list.append(SMI_waxs.img_st)
        recip_extents.append([SMI_waxs.qp[0], SMI_waxs.qp[-1], SMI_waxs.qz[0], SMI_waxs.qz[-1]])

        caked_list.append(SMI_waxs.cake)
        caked_extents.append([SMI_waxs.q_cake[0], SMI_waxs.q_cake[-1], SMI_waxs.chi_cake[0], SMI_waxs.chi_cake[-1]])

    return filename_wa0_list, recip_list, recip_extents, caked_list, caked_extents

In [None]:
def giwaxs_SMI_numpy_loading_wa0(path, filename_list):
    """
    Function adapted from Guillames SMI notebooks to process raw giwaxs tiffs:
    
    Returns filename_wa0_list, recip_list, recip_extents, caked_list, caked_extents
    """
    
    # Geometries, esc./
    geometry = 'Reflection'
    bs_kind = None
    detector_waxs = 'Pilatus900kw'
    sdd_waxs = 277
    center_waxs = [95, 1256]
    bs_pos_waxs = [[97, 1070], [0, 0], [0, 0]]
  
    filename_wa0_list = []
    recip_list = []
    recip_extents = []
    caked_list = []
    caked_extents = []
    for da in tqdm(filename_list, desc='Processing tiffs'):        
        idx = da.find('eV')
        energy = 0.001*float(da[idx-7:idx])
        wav = 1E-10 * (12.398/energy)

        waxs_angle = []
        idx2 = da.find('wa')
        idx3 = da.find('_bpm')
        waxs_angle = waxs_angle + [np.deg2rad(float(da[idx2+2:idx3]))]

        alphai=np.deg2rad(0.8)

        #This part is to stitch the data
        SMI_waxs = SMI_beamline.SMI_geometry(geometry = geometry,
                                             detector = detector_waxs,
                                             sdd = sdd_waxs,
                                             wav = wav,
                                             alphai = alphai,
                                             center = center_waxs,
                                             bs_pos = bs_pos_waxs,
                                             det_angles = waxs_angle,
                                             bs_kind = None)


        SMI_waxs.open_data(path, [da])

        SMI_waxs.stitching_data(interp_factor=3)

        SMI_waxs.caking()

        filename_wa0_list.append(da)
        recip_list.append(SMI_waxs.img_st)
        recip_extents.append([SMI_waxs.qp[0], SMI_waxs.qp[-1], SMI_waxs.qz[0], SMI_waxs.qz[-1]])

        caked_list.append(SMI_waxs.cake)
        caked_extents.append([SMI_waxs.q_cake[0], SMI_waxs.q_cake[-1], SMI_waxs.chi_cake[0], SMI_waxs.chi_cake[-1]])

    return filename_wa0_list, recip_list, recip_extents, caked_list, caked_extents

## Load data & save zarrs

In [None]:
# first of Andrew's Si-# pos1 wa0 up to id598243 is only 1s exposure

In [None]:
# [float(f.name.split('_')[7][2:])>598243 for f in sorted(rawPath.glob('CD_Si*.tif'))]

In [None]:
[f.name for f in sorted(rawPath.glob('CD_Si*'))]

In [None]:
all_giwaxs = set(rawPath.glob('CD_Si-16_*pos1*.tif'))
# all_giwaxs = set(rawPath.glob('*50-tbapf6*pos1*.tif'))
# all_giwaxs = set(rawPath.glob('*180-tbapf6*pos1*ai8.50*.tif'))
# all_giwaxs = set(rawPath.glob('*180-blank-bd_*pos1*.tif'))
# all_giwaxs = set(rawPath.glob('*180-blank-bd-at-real_*pos1*.tif'))
# all_giwaxs = set(rawPath.glob('*180-tbapf6*pos1*.tif'))
#redo_giwaxs = set(rawPath.glob('*50-teacl-redo*pos1*.tif'))
# test_giwaxs = set(rawPath.glob('test*'))
# calib_giwaxs = set(rawPath.glob('calib*'))
wa40_giwaxs = set(rawPath.glob('*pos1*wa40*'))

In [None]:
list(reversed([f.name for f in sorted(all_giwaxs, key=lambda x: get_id(x.name))]))

In [None]:
def get_id(f):
    return float(f[f.find('_id')+1:].split('_')[0][2:])

In [None]:
# [f.name for f in sorted(all_giwaxs, key=lambda x: x.stem.split('_')[7])]
# len([f.name for f in sorted(all_giwaxs) if float(f.stem.split('_')[7][2:])>598242])  # Si-1 first night good scans
# len([f.name for f in sorted(all_giwaxs) if float(f.stem.split('_')[7][2:])>=601490])  # Si-1 second night good scans

In [None]:
for sample_id in tqdm(list(sn.keys())[:]):
    all_giwaxs = set(rawPath.glob(f'CD_Si-{sample_id}_*pos1*.tif'))
    wa40_giwaxs = set(rawPath.glob('*pos1*wa40*'))
    
    all_paths = all_giwaxs.difference(wa40_giwaxs)
    
    # Let's select the damage test repeats now, or solutions:
    # filename_list = [f.name for f in sorted(all_giwaxs)]
    filename_list = list(reversed([f.name for f in sorted(all_paths, key=lambda x: get_id(x.name))]))

    # We need to check that the number of energies for the 2 detector positions is consistent
    wa0_filename_list = [fname for fname in filename_list if 'wa0' in fname]
    wa0_filename_list = remove_duplicate_energies(wa0_filename_list)

    wa20_filename_list = [fname for fname in filename_list if 'wa20' in fname]
    wa20_filename_list = remove_duplicate_energies(wa20_filename_list)


    filename_list =  sorted(wa0_filename_list + wa20_filename_list)
    # print(len(filename_list))

    # Make sublists to stitch two waxs positions together
    group_size = 2
    filename_sublists = [filename_list[i:i + group_size] for i in range(0, len(filename_list), group_size)]
    # print(len(filename_sublists))
    # display(filename_sublists[:10], filename_sublists[-10:])
    # print('\n')
    
    names_list, recip_list, recip_extents, caked_list, caked_extents = giwaxs_SMI_numpy_loading_wa0wa20(rawPath, filename_sublists)
    
    # Define naming scheme:
    waxs_naming_scheme = ['project', 'sample_id', 'pos_energy_dir', 'energy', 'incident_angle', 
                          'detector_wa', 'bpm', 'id', 'misc', 'detector']
    md_naming_scheme = waxs_naming_scheme.copy()


    # Construct xarrays with full values along detector dimensions and the energy dimension
    # They contain sample name and theta value as well, as single values to be concatenated in later steps
    recip_DA_rows = []
    caked_DA_rows = []
    zipped_lists = zip(names_list, recip_list, recip_extents, caked_list, caked_extents)
    for filename, recip_arr, recip_extent, caked_arr, caked_extent in zipped_lists:

        attr_dict = {}
        md_list = filename.split('_')
        for i, md_item in enumerate(md_naming_scheme):
            attr_dict[md_item] = md_list[i]

        recip_DA = xr.DataArray(data = recip_arr, 
                                dims = ['pix_y', 'pix_x'],
                                attrs = attr_dict)
        recip_DA = recip_DA.assign_coords({
            'pix_x': recip_DA.pix_x.data,
            'pix_y': recip_DA.pix_y.data,
            'q_x': ('pix_x', np.linspace(recip_extent[0], recip_extent[1], len(recip_DA.pix_x.data))),
            'q_y': ('pix_y', np.linspace(recip_extent[3], recip_extent[2], len(recip_DA.pix_y.data)))
        })
        recip_DA = recip_DA.expand_dims({
            'energy': [float(recip_DA.energy[:-2])],
            'sample_id': [recip_DA.sample_id]
        })
        recip_DA_rows.append(recip_DA)

        caked_DA = xr.DataArray(data = caked_arr, 
                                dims = ['index_y', 'index_x'],
                                attrs = attr_dict)
        caked_DA = caked_DA.assign_coords({
            'index_x': caked_DA.index_x.data,
            'index_y': caked_DA.index_y.data,
            'q_r': ('index_x', np.linspace(caked_extent[0], caked_extent[1], len(caked_DA.index_x.data))),
            'chi': ('index_y', np.linspace(caked_extent[3], caked_extent[2], len(caked_DA.index_y.data)))
        }) 
        caked_DA = caked_DA.expand_dims({
            'energy': [float(caked_DA.energy[:-2])],
            'sample_id': [caked_DA.sample_id]
        })
        caked_DA_rows.append(caked_DA)

    recip_DA = xr.concat(recip_DA_rows, 'energy').sortby('energy')
    caked_DA = xr.concat(caked_DA_rows, 'energy').sortby('energy')
    
    # Quick plot check 
    cmap = plt.cm.turbo.copy()
    sliced_DA = recip_DA.sel(energy=2470, method='nearest').squeeze()
    cmin = sliced_DA.quantile(0.01)
    cmax = sliced_DA.quantile(0.99)
    ax = sliced_DA.plot.imshow(norm=plt.Normalize(cmin,cmax), cmap=cmap, x='q_x', y='q_y')
    ax.axes.set(aspect='equal')
    plt.show()
    plt.close('all')
    
    # Save sample zarr, load later to concatenate full zarr
    sampleZarrsPath = propPath.joinpath('processed_data/andrew/trgiwaxs_zarrs')

    recip_samp_zarr_name = 'recip_'+recip_DA.sample_id.values[0]+'.zarr'
    recip_DS = recip_DA.to_dataset(name='raw_intensity')
    recip_DS.to_zarr(sampleZarrsPath.joinpath(recip_samp_zarr_name), mode='w')

    caked_samp_zarr_name = 'caked_'+caked_DA.sample_id.values[0]+'.zarr'
    caked_DS = caked_DA.to_dataset(name='raw_intensity')
    caked_DS.to_zarr(sampleZarrsPath.joinpath(caked_samp_zarr_name), mode='w')

In [None]:
# # filename_list = [f.name for f in sorted(all_giwaxs) if float(f.name.split('_')[7][2:])>598243]
# # filename_list = [f.name for f in sorted(all_giwaxs)]
# # filename_list = [f.name for f in sorted(all_giwaxs) if float(f.stem.split('_')[7][2:])>598242]
# filename_list = [f.name for f in sorted(all_giwaxs) if float(f.stem.split('_')[7][2:])>=601490]

# # Make sublists (of sample filenames) IF YOU HAVE wa0 AND wa20
# group_size = 2
# filename_sublists = [filename_list[i:i + group_size] for i in range(0, len(filename_list), group_size)]

In [None]:
# filename_sublists

In [None]:
def remove_duplicate_energies(filename_list):
    file_energies = []
    kept_files = []

    for filename in filename_list:
        file_energy = filename[:filename.find('eV')].split('_')[-1]
        if file_energy in file_energies:
            pass
        else:
            file_energies.append(file_energy)
            kept_files.append(filename)

    return kept_files

In [None]:
# Run SMI loading code (this produces some fabio and divide by zero errors)
# names_list, recip_list, recip_extents, caked_list, caked_extents = giwaxs_SMI_numpy_loading_wa0wa20wa40(rawPath, filename_sublists)
names_list, recip_list, recip_extents, caked_list, caked_extents = giwaxs_SMI_numpy_loading_wa0wa20(rawPath, filename_sublists)
# names_list, recip_list, recip_extents, caked_list, caked_extents = giwaxs_SMI_numpy_loading_wa0(rawPath, filename_list[:])

In [None]:
[f.name for f in all_paths][0]

In [None]:
# Define naming scheme:
# waxs_naming_scheme = ['project0', 'project', 'sample_info', 'detector_height_pos', 'energy', 'incident_angle', 
#                       'detector_wa', 'bpm', 'id', 'misc', 'detector']
waxs_naming_scheme = ['project', 'sample_id', 'pos_energy_dir', 'energy', 'incident_angle', 
                      'detector_wa', 'bpm', 'id', 'misc', 'detector']
md_naming_scheme = waxs_naming_scheme.copy()


# Construct xarrays with full values along detector dimensions and the energy dimension
# They contain sample name and theta value as well, as single values to be concatenated in later steps
recip_DA_rows = []
caked_DA_rows = []
zipped_lists = zip(names_list, recip_list, recip_extents, caked_list, caked_extents)
for filename, recip_arr, recip_extent, caked_arr, caked_extent in zipped_lists:

    attr_dict = {}
    md_list = filename.split('_')
    for i, md_item in enumerate(md_naming_scheme):
        attr_dict[md_item] = md_list[i]

    recip_DA = xr.DataArray(data = recip_arr, 
                            dims = ['pix_y', 'pix_x'],
                            attrs = attr_dict)
    recip_DA = recip_DA.assign_coords({
        'pix_x': recip_DA.pix_x.data,
        'pix_y': recip_DA.pix_y.data,
        'q_x': ('pix_x', np.linspace(recip_extent[0], recip_extent[1], len(recip_DA.pix_x.data))),
        'q_y': ('pix_y', np.linspace(recip_extent[3], recip_extent[2], len(recip_DA.pix_y.data)))
    })
    recip_DA = recip_DA.expand_dims({
        'energy': [float(recip_DA.energy[:-2])],
        'sample_id': [recip_DA.sample_id]
    })
    recip_DA_rows.append(recip_DA)

    caked_DA = xr.DataArray(data = caked_arr, 
                            dims = ['index_y', 'index_x'],
                            attrs = attr_dict)
    caked_DA = caked_DA.assign_coords({
        'index_x': caked_DA.index_x.data,
        'index_y': caked_DA.index_y.data,
        'q_r': ('index_x', np.linspace(caked_extent[0], caked_extent[1], len(caked_DA.index_x.data))),
        'chi': ('index_y', np.linspace(caked_extent[3], caked_extent[2], len(caked_DA.index_y.data)))
    }) 
    caked_DA = caked_DA.expand_dims({
        'energy': [float(caked_DA.energy[:-2])],
        'sample_id': [caked_DA.sample_id]
    })
    caked_DA_rows.append(caked_DA)

recip_DA = xr.concat(recip_DA_rows, 'energy').sortby('energy')
caked_DA = xr.concat(caked_DA_rows, 'energy').sortby('energy')

In [None]:
recip_DA

In [None]:
cmap = plt.cm.turbo.copy()
sliced_DA = recip_DA.sel(energy=2470, method='nearest').squeeze()
cmin = sliced_DA.quantile(0.01)
cmax = sliced_DA.quantile(0.99)
ax = sliced_DA.plot.imshow(norm=plt.Normalize(cmin,cmax), cmap=cmap, x='q_x', y='q_y')
ax.axes.set(aspect='equal')
plt.show()
plt.close('all')

In [None]:
# cmap = plt.cm.turbo.copy()
# sliced_DA = caked_DA.sel(energy=2450, method='nearest').squeeze()
# cmin = sliced_DA.quantile(0.0001)
# cmax = sliced_DA.quantile(0.99)
# ax = sliced_DA.plot.imshow(norm=plt.Normalize(cmin,cmax), cmap=cmap, x='q_r', y='chi')
# # ax.axes.set(aspect='equal')
# plt.show()

In [None]:
# SULFUR

# Save sample zarr, load later to concatenate full zarr
sampleZarrsPath = propPath.joinpath('processed_data/andrew/trgiwaxs_zarrs')

recip_samp_zarr_name = 'recip_'+recip_DA.sample_id.values[0]+'.zarr'
recip_DS = recip_DA.to_dataset(name='raw_intensity')
recip_DS.to_zarr(sampleZarrsPath.joinpath(recip_samp_zarr_name), mode='w')

caked_samp_zarr_name = 'caked_'+caked_DA.sample_id.values[0]+'.zarr'
caked_DS = caked_DA.to_dataset(name='raw_intensity')
caked_DS.to_zarr(sampleZarrsPath.joinpath(caked_samp_zarr_name), mode='w')

In [None]:
# # CHLORINE

# # Save sample zarr, load later to concatenate full zarr
# sampleZarrsPath = propPath.joinpath('processed_data/casey/zarrs')

# recip_samp_zarr_name = 'recip_Cl-'+recip_DA.sample_info.values[0]+'.zarr'
# recip_DS = recip_DA.to_dataset(name='raw_intensity')
# recip_DS.to_zarr(sampleZarrsPath.joinpath(recip_samp_zarr_name), mode='w')

# caked_samp_zarr_name = 'caked_Cl-'+caked_DA.sample_info.values[0]+'.zarr'
# caked_DS = caked_DA.to_dataset(name='raw_intensity')
# caked_DS.to_zarr(sampleZarrsPath.joinpath(caked_samp_zarr_name), mode='w')

In [None]:
# # PHOSPHORUS

# # Save sample zarr, load later to concatenate full zarr
# sampleZarrsPath = propPath.joinpath('processed_data/casey/zarrs')

# recip_samp_zarr_name = 'recip_P-'+recip_DA.sample_info.values[0]+'.zarr'
# recip_DS = recip_DA.to_dataset(name='raw_intensity')
# recip_DS.to_zarr(sampleZarrsPath.joinpath(recip_samp_zarr_name), mode='w')

# caked_samp_zarr_name = 'caked_P-'+caked_DA.sample_info.values[0]+'.zarr'
# caked_DS = caked_DA.to_dataset(name='raw_intensity')
# caked_DS.to_zarr(sampleZarrsPath.joinpath(caked_samp_zarr_name), mode='w')

In [None]:
# # PHOSPHORUS ai 8.5

# # Save sample zarr, load later to concatenate full zarr
# sampleZarrsPath = propPath.joinpath('processed_data/casey/zarrs')

# recip_samp_zarr_name = 'recip_P-ai8.5-'+recip_DA.sample_info.values[0]+'.zarr'
# recip_DS = recip_DA.to_dataset(name='raw_intensity')
# recip_DS.to_zarr(sampleZarrsPath.joinpath(recip_samp_zarr_name), mode='w')

# caked_samp_zarr_name = 'caked_P-ai8.5-'+caked_DA.sample_info.values[0]+'.zarr'
# caked_DS = caked_DA.to_dataset(name='raw_intensity')
# caked_DS.to_zarr(sampleZarrsPath.joinpath(caked_samp_zarr_name), mode='w')