# Init

In [None]:
import re
import h5py
import pandas as pd

import gvar

from nucleon_elastic_ff.data.h5io import get_dsets

# Read & parse the file

In [None]:
file = "a09m310_e_avg.h5"

In [None]:
pattern = "(?P<parity>proton|proton\_np)"
pattern += "_(?P<isospin>DD|UU)"
pattern += "_(?P<spin>dn_dn|up_up)"
pattern += "_tsep_[\-]*(?P<tsep>[0-9]+)"
pattern += ".*(?P<current>A3|V4).*cfgs\_srcs"

In [None]:
columns = ["nucleon", "current", "tsep", "cfg", "t", "isospin", "parity", "spin", "corr"]

In [None]:
data_frames = []

with h5py.File(file, "r") as h5f:
    dsets = get_dsets(h5f)

    for key, dset in dsets.items():
        match = re.search(pattern, key)
        if match:
            info = match.groupdict()

            nucleon_parity = info.pop("parity").split("_")
            info["nucleon"] = nucleon_parity[0]
            info["parity"] = -1 if len(nucleon_parity) == 2 else 1
            
            isospin = info.pop("isospin")
            info["isospin"] = 1 if isospin == "UU" else -1            

            current_key = key.replace("cfgs_srcs", "local_curr")
            curr_dset = h5f[current_key]

            cfgs = dset[:, 0]
            corr = (
                curr_dset[()].real if info["current"] in ["V4"] else curr_dset[()].imag
            )
            ts = range(corr.shape[-1])

            tmp_df = (
                pd.DataFrame(index=cfgs, columns=ts, data=corr)
                .unstack()
                .reset_index()
                .rename(columns={"level_0": "t", "level_1": "cfg", 0: "corr"})
            )
            for key, val in info.items():
                tmp_df[key] = val
            data_frames.append(tmp_df.astype({"tsep": int}))



df = pd.concat(
    data_frames, 
    ignore_index=True, 
).reindex(columns, axis=1).sort_values(columns).reset_index(drop=True)
df.head()

# Spin average

In [None]:
spin_avg_df = df.groupby(
    ["nucleon", "current", "tsep", "cfg", "t", "isospin", "parity"], as_index=False
)["corr"].mean()

spin_avg_df.head()

# Parity average (minus sign)

In [None]:
tmp = spin_avg_df.copy()
tmp["corr"] *= tmp["parity"]
spin_parity_avg_df = tmp.groupby(
    ["nucleon", "current", "tsep", "cfg", "t", "isospin",  ], as_index=False
)[["corr"]].mean()

spin_parity_avg_df.head()

# Isospin sum (minus sign)

In [None]:
tmp = spin_parity_avg_df.copy()
tmp["corr"] *= tmp["isospin"]
isospin_spin_parity_avg_df = (
    tmp.groupby(["nucleon", "current", "tsep", "cfg",  "t"], as_index=False)["corr"]
    .sum()
)
isospin_spin_parity_avg_df.head()

# Statistical average

In [None]:
def avg_data(arg):
    corr_avg = gvar.dataset.avg_data(
        arg.pivot(index="cfg", columns="t", values="corr").values
    )
    return pd.Series(corr_avg)


group = isospin_spin_parity_avg_df.groupby(["nucleon", "current", "tsep"])
corr_df = (
    group.apply(avg_data)
    .reset_index(level=-1)
    .rename(columns={"level_3": "t", 0: "corr"})
    .reset_index()
    .set_index(["nucleon", "current", "tsep", "t"])
)

corr_df.head()