In [2]:
import numpy as np
import gvar as gv
import re 
import pandas as pd 
import sys
import copy
import tables as h5
import h5py
import os 
import time
import re
sys.path.insert(0, '/home/gbradley/nucleon_elastic_FF')
from nucleon_elastic_ff.data.h5io import get_dsets 


In [3]:

# import fitter.corr_functions as cf
# import fitter.fit_twopt

directory = '/home/gbradley/c51_corr_analysis/tests/data/C13/'
N_cnf = len([name for name in os.listdir(directory) if os.path.isfile(name)])

dirs = os.listdir( directory )

cnf_abbr = [files.split(".ama.h5",0) for files in dirs]

# data_file_list = os.path.realpath(dirs)
data_file_list = list()
for dirpath,_,filenames in os.walk(directory):
    for f in filenames:
        data_file_list.append(os.path.abspath(os.path.join(dirpath, f)))
file = data_file_list[0]


NUCL: nucleon
U: quark bilinear operator inserted on up-quark; D will be used for down-quark
MIXED: "mixed" type of spin projection is used
NONREL: non-relativistic proton is used
l0:  when inserting the quark bilinear oprator, the separation of the quarks of the bilinear operator is zero (local operator); you might see some l1 (quark bilinear operator separated by 1 lattice unit) data as well
g13: the gamma matrix of the quark bilinear operator is "13" in Chroma convention. Page 6 and 7 of the attached pdf shows the Chroma gamma matrix convention and its indexing; their indexing is summarized below:
 
0: scalar; I
15: pseudoscalar; g_5
1: vector;  g_x
2: vector;  g_y
4: vector;  g_z
8: vector;  g_t
14: axial;   g_x g_5
13: axial;  -g_y g_5
11: axial;   g_z g_5
7: axial;  -g_t g_5
9: tensor;  g_x g_t
10: tensor;  g_y g_t
12: tensor;  g_z g_t
3: tensor;  g_x g_y
6: tensor;  g_y g_z
5: tensor;  g_x g_z

In [16]:


string = (
    "3pt_tsep12/NUCL_D_MIXED_NONREL_l0_g0/src5.0_snk5.0/qz+0_qy+0_qx+2/C13.b_5682/AMA"
)

patterns = [
    "3pt",
    "_tsep(?P<tsep>[0-9]|[0-9]+)",  # must match `_tsep` and stores the following numbers (any length)
    "/NUCL_(?P<quark>U|D)",  # Store U or D in quark
    "_MIXED_NONREL",  # Not sure if this changes. Not stored for now
    "_l(?P<l>[0-9]+)",  # action parameters?
    "_g(?P<g>[0-15]+)",
    "/src(?P<src>[0-9\.]+)",  # Stores numbers + . to store decimals. Must escape .
    "_snk(?P<snk>[0-9\.]+)",  # Stores numbers + . to store decimals. Must escape .
    "/qz(?P<qz>[\+\-0-9]+)", 
    "_qy(?P<qy>[\+\-0-9]+)", 
    "_qx(?P<qx>[\+\-0-9]+)", 
    
]

for n in range(len(patterns)):
    pattern = "".join(patterns[:n+1])
    match = re.match(pattern, string)
    if not match:
        print(pattern)
        break

if match:
    print(match.groupdict())
# 3pt_tsep8/NUCL_U_MIXED_NONREL_l0_g9/src5.0_snk5.0/qz-3_qy+0_qx+1/C13.b_5682/AMA
# 2pt/ext_current/src5.0_snk5.0/ext_axial_A1_A1/C13.b_5682/AMA
# 2pt/ext_current/src5.0_snk5.0/ext_axial_A1_A1_px1_py0_pz0/C13.b_5682/AMA
# 2pt/ext_current/src5.0_snk5.0/ext_axial_A3_P_px1_py0_pz0/C13.b_5682/AMA
# 2pt/ext_current/src5.0_snk5.0/ext_axial_A4_A4_px3_py1_pz0/C13.b_5682/AMA
# 2pt/ext_current/src5.0_snk5.0/ext_axial_A4_P/C13.b_5682/AMA
# 2pt/ext_current/src5.0_snk5.0/ext_axial_A4_P_px1_py0_pz0/C13.b_5682/AMA
# 2pt/ext_current/src5.0_snk5.0/ext_vector_T12_T12_px1_py0_pz0/C13.b_5682/AMA
# 2pt/ext_current_SP/src5.0_snk5.0/ext_vector_V2_V2_px1_py1_pz1/C13.b_5682/AMA
# 2pt/pion/src5.0_snk5.0/pion_px1_py0_pz0/C13.b_5682/AMA
# 2pt/pion_SP/src5.0_snk5.0/pion_px1_py0_pz0/C13.b_5682/AMA
# 2pt/proton/src5.0_snk5.0/proton_px1_py0_pz0/C13.b_5682/AMA
# 2pt/proton_SP/src5.0_snk5.0/proton_px1_py0_pz0/C13.b_5682/AMA

{'tsep': '12', 'quark': 'D', 'l': '0', 'g': '0', 'src': '5.0', 'snk': '5.0', 'qz': '+0', 'qy': '+0', 'qx': '+2'}


In [17]:
columns = ["tsep", "quark", "l", "g", "src", "snk","qz","qy","qx"]


In [18]:
data_frames = []

with h5py.File(file, "r") as h5f:
    dsets = get_dsets(h5f)
    for key, dset in dsets.items():
        match = re.search(pattern, key)
        if match:
            info = match.groupdict()
            # print(info)
#             corr = info.pop("tsep")

            quark = info.pop("quark")
            # print(quark)
            info["quark"] = quark[0]
# #             # info["parity"] = -1 if len(nucleon_parity) == 2 else 1
            
            gamma = info.pop("g")
            if gamma in ["g1","g2","g4","g8"]:
                info["gamma"] = "vector"
            elif gamma in ["g0"]:
                info["gamma"] = "scalar"
            elif gamma in ["g5"]:
                info["gamma"] = "pseudoscalar"
            elif gamma in ["g14","g13","g11","g7"]:
                info["gamma"] = "axial"
            elif gamma in ["g14","g13","g11","g7"]:
                info["gamma"] = "axial"
            elif gamma in ["g9","g10","g12","g3","g6","g5"]:
                info["gamma"] = "tensor"

            # current_key = key.replace("g", "")
            curr_dset = h5f[key]

            cfgs = dset[:]
            # print(cfgs)
            corr = (
                curr_dset[()].real 
                # if info["current"] in ["V4"] else curr_dset[()].imag
            )
            # print(corr.shape[-1])
            ts = range(corr.shape[-1])
            # print(ts)
            tmp_df = (
                pd.DataFrame(index=cfgs, columns=ts, data=corr)
                .unstack()
                .reset_index()
                .rename(columns={"level_0": "t", "level_1": "quark", 
                        "level_2" : "gamma" , "level_3": "momentum", 0: "corr"})
            )
            for key, val in info.items():
                tmp_df[key] = val
            data_frames.append(tmp_df.astype({"tsep": int}))
            # print(data_frames)



df = pd.concat(
    data_frames, 
    ignore_index=True, 
).reindex(columns, axis=1).sort_values(columns).reset_index(drop=True)
df.head()

[2022-07-22 12:37:47,325|nucleon_elastic_ff@INFO] Locating all dsets of h5 file `/home/gbradley/c51_corr_analysis/tests/data/C13/C13-b_5682.ama.h5`


Unnamed: 0,tsep,quark,l,g,src,snk,qz,qy,qx
0,8,D,0,,5.0,5.0,0,0,0
1,8,D,0,,5.0,5.0,0,0,0
2,8,D,0,,5.0,5.0,0,0,0
3,8,D,0,,5.0,5.0,0,0,0
4,8,D,0,,5.0,5.0,0,0,0


## statistical average ##


In [None]:
def avg_data(arg):
    corr_avg = gvar.dataset.avg_data(
        arg.pivot(index="cfg", columns="t", values="corr").values
    )
    return pd.Series(corr_avg)


group = isospin_spin_parity_avg_df.groupby(["nucleon", "current", "tsep"])
corr_df = (
    group.apply(avg_data)
    .reset_index(level=-1)
    .rename(columns={"level_3": "t", 0: "corr"})
    .reset_index()
    .set_index(["nucleon", "current", "tsep", "t"])
)

corr_df.head()

## momentum average ##

In [None]:
def mom_avg(h5_data,state,mom_lst,weights=False):
    '''
    perform a momentum average of a state from an open h5 file
    data file is assumed to be of shape [Nt,Nz,Ny,Nx,[re,im]]
    data_mom = h5_data[state][:,pz,py,px]
    '''
    d_lst = []
    w = []
    for mom in mom_lst:
        px,py,pz = mom['momentum']
        w.append(mom['weight'])
        #print(state)
        d_lst.append(h5_data[state][:,pz,py,px])
    d_lst = np.array(d_lst)
    w = np.array(w)
    if weights:
        for wi,we in enumerate(w):
            d_lst[wi] = we*d_lst[wi]
        d_avg = np.sum(d_lst,axis=0) / np.sum(w)
    else:
        d_avg = np.mean(d_lst,axis=0)
    return d_avg
# mom_avg('/home/gbradley/c51_corr_analysis/tests/data/C13/C13-b_5178.ama.h5', state, mom_lst)

mom_lst = []
for px in range(-2,3):
    for py in range(-2,3):
        for pz in range(-2,3):
            if px**2 + py**2 + pz**2 <= 5:
                mom_lst.append('pz'+str(pz)+'_py'+str(py)+'_px'+str(px))