In [None]:
import os
import re
import pandas as pd
import numpy as np

In [None]:
DATA = "/Users/ckoerber/data/nuc/dens"
files = os.listdir(DATA)

In [None]:
files[0]

In [None]:
patterns = (
    r"compton-dens-(?P<nuc>[0-9A-z]+)",
    r"(?P<potential>[a-z0-9]+)",
    r"(?:(?P<empot>(?:empot)))?",
    r"(?:(?P<tnf>(?:[a-z]+)))?",
    r"(?:Lamnum=(?P<lambda>(?:[0-9\.e\+]+)))?",
    r"(?:tnfcut=(?P<tnfcut>(?:[0-9]+)))?",
    r"om=(?P<omega>(?:[0-9\.]+E[\+\-][0-9]+))",
    r"th=(?P<theta>(?:[0-9\.E\+]+))",
    r"nx=(?P<nx>(?:[0-9]+))",
    r"nphi=(?P<nphi>(?:[0-9]+))",
    r"np12\=np34\=(?P<np12_np34>(?:[0-9\+]+))",
    r"np3\=(?P<np3>(?:[0-9\+]+))",
    r"nq4\=nq=(?P<nq4_nq>(?:[0-9\+]+))",
    r"j12max=(?P<j12max>(?:[0-9]+))",
    r"lmax=(?P<lmax>(?:[0-9]+))",
    r"lsummax=(?P<lsummax>(?:[0-9]+))",
    r"tau4max=(?P<tau4max>(?:[0-9]+))",
    r"rho1b\.dat",
)
pattern = re.compile("-".join(patterns))
pattern.search(files[0]).groupdict()

In [None]:
dtypes = {
    int: ["tnfcut", "nx", "nphi", "j12max", "lmax", "lsummax", "tau4max"],
    float: ["lambda", "omega", "theta"],
}

In [None]:
data = [pattern.search(f).groupdict() for f in files]
df = pd.DataFrame(data)
for dtype, cols in dtypes.items():
    for col in cols:
        df[col] = df[col].astype(dtype)

df["file"] = files
df.head()

Only varying quantities are omega and theta

In [None]:
pp = r"MAXRHO1BINDEX\s+\=\s+(?P<max_rho_index>[0-9]+)"
pp += r".*"
pp += r"RHO1BINDX\s+\=(?P<rho_index>[0-9\*\,\-\s]+)"
pp += r".*"
pp += r"\/\s+(?P<om_theta>[0-9\.\-\+E ]+\n)"
pp += r"\s+(?P<rho>[0-9\.\-\+E\s]+\n)"


def parse_fortran_funny(string):
    for pat, subs in {
        f"{key}*{val}": ", ".join([val] * int(key))
        for key, val in set(
            re.findall(r"([0-9]+)\*([\-0-9]+)", re.sub(r"\s+", " ", string))
        )
    }.items():
        string = string.replace(pat, subs)

    arr = np.array(list(map(int, string.split(","))))
    nd = len(arr) // 8
    return arr.reshape([nd, 8])


parse = {
    "max_rho_index": int,
    "om_theta": lambda el: np.array([float(ee) for ee in el.split(" ") if ee]),
    "rho": lambda el: np.array([float(ee) for ee in el.split(" ") if ee]),
    "rho_index": parse_fortran_funny,
}


def parse_1bd(address):
    with open(address, "r") as inp:
        t = inp.read()
    dd = re.search(pp, t, re.MULTILINE | re.DOTALL).groupdict()
    for key, val in parse.items():
        dd[key] = val(dd[key])
    return dd

In [None]:
parse_1bd(os.path.join(DATA, files[0]))