In [1]:
import pyref as ref
import polars as pl
from pathlib import Path
import pandas as pd

# Data cleaning and processing for reflectivity data

In [2]:
beamtime = Path(
    "C:/Users/hduva/Washington State University (email.wsu.edu)/Carbon Lab Research Group - Documents/Synchrotron Logistics and Data/ALS - Berkeley/Data/BL1101/2024Apr/XRR/Processed"
)

ccd_dirs = [file for file in beamtime.glob("**/CCD*") if file.is_dir()]
print(ccd_dirs)

[WindowsPath('C:/Users/hduva/Washington State University (email.wsu.edu)/Carbon Lab Research Group - Documents/Synchrotron Logistics and Data/ALS - Berkeley/Data/BL1101/2024Apr/XRR/Processed/ZnPcOLD_/CCD Scan 83703'), WindowsPath('C:/Users/hduva/Washington State University (email.wsu.edu)/Carbon Lab Research Group - Documents/Synchrotron Logistics and Data/ALS - Berkeley/Data/BL1101/2024Apr/XRR/Processed/ZnPcOLD_/CCD Scan 83704'), WindowsPath('C:/Users/hduva/Washington State University (email.wsu.edu)/Carbon Lab Research Group - Documents/Synchrotron Logistics and Data/ALS - Berkeley/Data/BL1101/2024Apr/XRR/Processed/ZnPcOLD_/CCD Scan 83705'), WindowsPath('C:/Users/hduva/Washington State University (email.wsu.edu)/Carbon Lab Research Group - Documents/Synchrotron Logistics and Data/ALS - Berkeley/Data/BL1101/2024Apr/XRR/Processed/ZnPcOLD_/CCD Scan 83706'), WindowsPath('C:/Users/hduva/Washington State University (email.wsu.edu)/Carbon Lab Research Group - Documents/Synchrotron Logistics

In [3]:
def process_all(ccd_dirs: list[Path]) -> pl.DataFrame | None:
    from warnings import filterwarnings

    filterwarnings("ignore")
    all_data = []
    for ccd_dir in ccd_dirs:
        scan_id = ccd_dir.stem
        data_files = list(ccd_dir.glob("*/*"))
        for files in data_files:
            energy = files.parent.stem
            pol = files.stem
            try:
                data = ref.Refl(files)
                df = data.refl
            except Exception as e:
                print(f"Error: {e}")
                print(f"In scan {energy} & {pol} of scan {scan_id}")
                df = pd.DataFrame()
            if not df.empty:
                df["scan_id"] = scan_id
                df["energy"] = energy
                df["polarization"] = pol
                df = pl.from_pandas(df)
                all_data.append(df)
    if len(all_data) > 0:
        return pl.concat(all_data)
    else:
        return None


df = process_all(ccd_dirs)
df.head()

Error: list index out of range
In scan 250 & 100 of scan CCD Scan 83703
Error: list index out of range
In scan 250 & 100 of scan CCD Scan 83704
Error: list index out of range
In scan 250 & 100 of scan CCD Scan 83705
Error: list index out of range
In scan 250 & 100 of scan CCD Scan 83706
Error: list index out of range
In scan 250 & 100 of scan CCD Scan 83707
Error: list index out of range
In scan 250 & 100 of scan CCD Scan 83708
Error: list index out of range
In scan 250 & 100 of scan CCD Scan 83709
Error: Unable to coerce to DataFrame, shape must be (1, 15): given (4, 15)
In scan 283 & 100 of scan CCD Scan 83711
Error: operands could not be broadcast together with shapes (3,) (4,) 
In scan 283 & 100 of scan CCD Scan 83712
Error: list index out of range
In scan 250 & 100 of scan CCD Scan 83714
Error: list index out of range
In scan 250 & 190 of scan CCD Scan 83781
Error: list index out of range
In scan 250 & 190 of scan CCD Scan 83782
Error: list index out of range
In scan 250 & 190 of 

Energy,Theta,Current,HOS,POL,Exposure,Intensity,Background,RawRefl,Refl,Err,Q,k,lam,lamErr,scan_id,energy,polarization
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,str,str
250.0062,0.616,500.3415,8.9,100.0,0.001,274839.0,58611.0,216228.0,0.863552,0.002024,0.0027,586.849521,500.445471,0.09094,"""CCD Scan 83710…","""250""","""100"""
250.0024,1.23,500.1958,8.9,100.0,0.001,227302.0,58257.0,169045.0,0.675314,0.001583,0.0054,586.849521,500.445471,0.09094,"""CCD Scan 83710…","""250""","""100"""
250.0005,1.846,500.0272,8.9,100.0,0.001,216157.0,58903.0,157254.0,0.628422,0.001474,0.0082,586.849521,500.445471,0.09094,"""CCD Scan 83710…","""250""","""100"""
249.9986,2.462,500.2766,8.9,100.0,0.001,255292.0,59154.0,196138.0,0.78342,0.001837,0.0109,586.849521,500.445471,0.09094,"""CCD Scan 83710…","""250""","""100"""
249.9986,3.076,500.1187,8.9,100.0,0.001,226899.0,58839.0,168060.0,0.671482,0.001575,0.0136,586.849521,500.445471,0.09094,"""CCD Scan 83710…","""250""","""100"""


In [8]:
# Plot the dataframe as Refl vs Q for each scan label by energy and polarization as log scale
df.plot(
    x="Q",
    y="Refl",
    by=["scan_id", "polarization"],
    groupby = "energy",
    kind="scatter",
    title="April Beam Time",
    widget_location='top',
    logy=True,
    size=1,
)


BokehModel(combine_events=True, render_bundle={'docs_json': {'5ee408ee-f6b8-4391-ae4e-32fd11386166': {'version…