In [None]:
from pathlib import Path

import zarr
import xarray as xr

In [None]:
from IPython.display import Markdown, display

In [None]:
import rpy2
import rpy2.robjects as ro
import rpy2.robjects.packages as rpackages
from rpy2.robjects import numpy2ri
from rpy2.robjects import pandas2ri
numpy2ri.activate()
pandas2ri.activate()

In [None]:
utils = rpackages.importr('utils')
# select a mirror for R packages
utils.chooseCRANmirror(ind=1) # select the first mirror in the list
if not rpackages.isinstalled('anthro'):
    # environ["R_LIBS"] = ""
    # utils.install_packages('anthro')
    utils.install_packages('anthro', lib=str(Path.home() / "R"))
anthro = rpackages.importr('anthro')

In [None]:
growthstandards = {
    n.removeprefix("growthstandards_").removesuffix("anthro"):
    ro.conversion.rpy2py(getattr(anthro, n)) 
    for n in dir(anthro) if n.startswith("growthstandards_")
}
growthstandards

In [None]:
df = growthstandards["bmi"]
df["loh"] = df["loh"].astype("category")
df

In [None]:
coord_attr_map = dict(
    age=dict(long_name="Age", units="days"),
    length=dict(long_name="Recumbent Length", units="cm"),
    height=dict(long_name="Standing Height", units="cm"),
    # lorh=dict(long_name="Parameterized by Recumbent Length or Standing Height"),
)

In [None]:
var_attr_map = dict(
    ac=dict(name="arm_c", long_name="Arm Circumference", units="cm"),
    hc=dict(name="head_c", long_name="Head Circumference", units="cm"),
    bmi=dict(name="bmi", long_name="Body Mass Index", units="kg/m^2"),
    len=dict(name="len_hi", units="cm"),
    ss=dict(name="ss", long_name="Subscapular Skinfold", units="mm"),
    ts=dict(name="ts", long_name="Triceps Skinfold", units="mm"),
    wei=dict(name="weight", long_name="Weight", units="kg"),
    wfl=dict(name="wfl", long_name="Weight for Length", units="kg"),
    wfh=dict(name="wfh", long_name="Weight for Height", units="kg"),
)

In [None]:
def _fixup_gen():
    for k, gdf in growthstandards.items():
        gds = gdf.set_index(["sex", gdf.columns[1]]).to_xarray()
        attr_map = var_attr_map[k].copy()
        name = attr_map.pop("name", k)
        gds = gds.assign_attrs(**attr_map)
        for c in gds.coords:
            if c in coord_attr_map:
                gds.coords[c].attrs.update(coord_attr_map[c])
        if "sex" in gds.coords:
            gds = gds.reset_index("sex").rename_vars({"sex": "sex_enum"}).assign_coords(
                sex=lambda ds: [{1: "Male", 2: "Female"}[s.item()] for s in ds.sex_enum])
        if "loh" in gds.keys():
            gds = gds.rename_vars({"loh": "lorh"})
        if "lorh" in gds.keys():
            gds = gds.set_coords("lorh")
            if k in ("wfl", "wfh"):
                yield name, gds.drop("lorh")
            elif k == "len":
                yield "length", gds.where(lambda ds: ds.lorh == "L", drop=True).drop("lorh").assign_attrs(long_name="Recumbent Length")
                yield "height", gds.where(lambda ds: ds.lorh == "H", drop=True).drop("lorh").assign_attrs(long_name="Standing Height")
            elif k == "bmi":
                yield "bmi_length", gds.where(lambda ds: ds.lorh == "L", drop=True).drop("lorh").assign_attrs(long_name="Body Mass Index (Recumbent Length)")
                yield "bmi_height", gds.where(lambda ds: ds.lorh == "H", drop=True).drop("lorh").assign_attrs(long_name="Body Mass Index (Standing Height)")
            else:
                raise NotImplementedError(k, gds)
        else:
            yield name, gds

growthstandards_dss = dict(_fixup_gen())
for name, gds in growthstandards_dss.items():
    _long_name = gds.attrs.get("long_name", "")
    display(Markdown(f"#### {name}  [\"{_long_name}\"]"), gds)

In [None]:
for name, gds in growthstandards_dss.items():
    gds.to_zarr("growthstandards.zarr", group=name)