# Loading In From The Igor Pro Interface

Because Igor Pro version 9 is nice hdf5 directory this makes loading clustered data more
easy than it once was.


In [1]:
import xarray as xr
import pandas as pd
from pathlib import Path

dataset_path = Path().cwd() / "output" / "initial_filter.h5xp"
dt = xr.open_datatree(dataset_path, engine="h5netcdf", phony_dims="sort")[
    "/Packed Data/clustering/AmplitudeFitting/Alpha_62/TensorMisc"
].to_dataset()
clusters = dt["pw2DFit"].to_pandas()
clusters = clusters.reset_index()
clusters.columns = [
    "phony_dim_31",  # Dummy for the original index
    "Transition Energy (eV)",
    "Transition Amplitude (a.u.)",
    "Transition Width (eV)",
    "TDM_x",
    "TDM_y",
    "TDM_z",
    "TDM Theta (degrees)",
    "Transition Symmetry",
    "TDM_xx",
    "TDM_yy",
    "TDM_zz",
    "TDM_xy",
    "TDM_xz",
    "TDM_yz",
    "Originating atom",
    "Originating MO",
    # "Final Cluster transition was placed in" # This column is not present in the original df based on its shape (19, 16) vs 17 names
]
clusters = clusters.drop(columns=["phony_dim_31"])
# apply the energy shift of .44 to the transition energy
clusters["Transition Energy (eV)"] += 0.44
# rename the index to cluster_idx
clusters = clusters.rename_axis("cluster_idx").reset_index().set_index("cluster_idx")
print(clusters.to_markdown(index=False, floatfmt=".3f", tablefmt="pipe"))

|   Transition Energy (eV) |   Transition Amplitude (a.u.) |   Transition Width (eV) |   TDM_x |   TDM_y |   TDM_z |   TDM Theta (degrees) |   Transition Symmetry |   TDM_xx |   TDM_yy |   TDM_zz |   TDM_xy |   TDM_xz |   TDM_yz |   Originating atom |   Originating MO |
|-------------------------:|------------------------------:|------------------------:|--------:|--------:|--------:|----------------------:|----------------------:|---------:|---------:|---------:|---------:|---------:|---------:|-------------------:|-----------------:|
|                  284.282 |                         0.013 |                   0.232 |   0.006 |   0.006 |   5.000 |                 0.000 |                 1.000 |    0.000 |    0.000 |    0.008 |    0.000 |    0.000 |    0.000 |              2.000 |          117.000 |
|                  285.082 |                         0.034 |                   0.228 |   0.003 |   0.003 |   2.000 |                 0.000 |                 1.000 |    0.000 |    0.000 | 

In [2]:
# Load the 1D parameter wave (pwFinal) and parameter names from the xarray Dataset
pwfinal_series = dt[
    "pwFinal"
].to_pandas()  # This is a pandas Series, likely named 'pwFinal'
param_names_array = dt["paramNames"].to_numpy()

# Create an initial DataFrame combining pwFinal values and their labels
# pwfinal_series is already named 'pwFinal', so pd.DataFrame(pwfinal_series) works.
source_df = pd.DataFrame(pwfinal_series)
source_df["Label"] = param_names_array

transitions_df = source_df.iloc[8:].reset_index(drop=True)

transitions_df["cluster_idx"] = (
    transitions_df["Label"].str.extract(r"(\d+)$").astype(int)
)

# Extract the base parameter type from the 'Label' column.
# E.g., "peakEnergy_0" -> "peakEnergy"
transitions_df["param_type"] = transitions_df["Label"].str.replace(
    r"_\d+$", "", regex=True
)

# Pivot the DataFrame to have one row per transition (cluster_idx)
# and parameter types as columns, populated with 'pwFinal' values.
pivot_df = transitions_df.pivot(
    index="cluster_idx", columns="param_type", values="pwFinal"
)

column_rename_map = {
    "peakEnergy": "Transition Energy (eV)",
    "maxAmplitude": "Transition Amplitude (a.u.)",
    "peakWidth": "Transition Width (eV)",
    "OSxx": "TDM_xx",
    "OSyy": "TDM_yy",
    "OSzz": "TDM_zz",
    "OSxy": "TDM_xy",
    "OSxz": "TDM_xz",
    "OSyz": "TDM_yz",
}


columns_to_reconstruct = [
    col for col in column_rename_map.keys() if col in pivot_df.columns
]
reconstructed_clusters_df = pivot_df[columns_to_reconstruct].rename(
    columns=column_rename_map
)

refined_1d = pwfinal_series

print(
    reconstructed_clusters_df.to_markdown(index=False, floatfmt=".3f", tablefmt="pipe")
)

|   Transition Energy (eV) |   Transition Amplitude (a.u.) |   Transition Width (eV) |   TDM_xx |   TDM_yy |   TDM_zz |   TDM_xy |   TDM_xz |   TDM_yz |
|-------------------------:|------------------------------:|------------------------:|---------:|---------:|---------:|---------:|---------:|---------:|
|                  284.222 |                         0.014 |                   0.292 |    0.000 |    0.000 |    0.008 |    0.000 |    0.000 |    0.000 |
|                  285.062 |                         0.023 |                   0.270 |    0.000 |    0.000 |    0.020 |    0.000 |    0.000 |    0.000 |
|                  285.509 |                         0.007 |                   0.335 |    0.000 |    0.000 |    0.013 |    0.000 |    0.000 |    0.000 |
|                  286.102 |                         0.004 |                   0.310 |    0.000 |    0.000 |    0.001 |    0.000 |    0.000 |    0.000 |
|                  286.744 |                         0.005 |                   0.2

In [3]:
import warnings

# suppress warnings
warnings.filterwarnings("ignore")
# Define the parameters of interest
parameters_of_interest = [
    "Transition Energy (eV)",
    "Transition Amplitude (a.u.)",
    "Transition Width (eV)",
    "TDM_xx",
    "TDM_yy",
    "TDM_zz",
]

# Define identifier columns from the 'clusters' DataFrame
identifier_columns = ["Originating atom", "Originating MO"]

# Select the required columns from the 'clusters' DataFrame for initial values
# This includes the identifier columns and the parameters of interest
initial_data_cols = identifier_columns + parameters_of_interest
initial_data = clusters[initial_data_cols]
initial_data["cluster"] = initial_data.index
initial_data["kind"] = "initial"

# Select the required columns from 'reconstructed_clusters_df' for refined values
# This includes only the parameters of interest, as identifiers are in 'initial_data'
refined_data = reconstructed_clusters_df[parameters_of_interest]
refined_data["cluster"] = refined_data.index
refined_data["kind"] = "refined"
refined_data["Originating atom"] = clusters["Originating atom"].values
refined_data["Originating MO"] = clusters["Originating MO"].values

# concatenate the initial and refined data
final_df = pd.concat([initial_data, refined_data], ignore_index=True)
# Save to csv
output_path = Path().cwd() / "output" / "clusters.csv"
final_df.to_csv(output_path)
final_df

Unnamed: 0,Originating atom,Originating MO,Transition Energy (eV),Transition Amplitude (a.u.),Transition Width (eV),TDM_xx,TDM_yy,TDM_zz,cluster,kind
0,2.0,117.0,284.281921,0.013375,0.231543,1.363145e-08,1.363145e-08,0.007647,0,initial
1,3.0,119.0,285.082336,0.034317,0.228318,3.2114e-08,3.2114e-08,0.019748,1,initial
2,1.0,117.0,285.642609,0.023115,0.221112,2.923272e-08,2.923272e-08,0.013206,2,initial
3,4.0,122.0,286.322937,0.003105,0.236072,2.226856e-09,2.226856e-09,0.001434,3,initial
4,4.0,124.0,286.723145,0.010494,0.212838,1.073769e-08,1.073769e-08,0.005706,4,initial
5,2.0,124.0,287.163361,0.01319,0.226125,0.0005929034,0.0005929034,0.006489,5,initial
6,1.0,120.0,287.563568,0.006921,0.226147,0.0004888347,0.0004888347,0.002638,6,initial
7,3.0,133.0,288.12384,0.007747,0.251038,0.0005274433,0.0005274433,0.003644,7,initial
8,3.0,138.0,288.524048,0.000545,0.233686,0.000148189,0.000148189,0.000171,8,initial
9,1.0,124.0,288.844208,0.016666,0.216757,0.0002435459,0.0002435459,0.007749,9,initial


In [None]:
#  Calculae the mass absorption coefficient from the transition amplitude
def calculate_mass_absorption_coefficient(amplitude, energy):
