In [None]:
import json
from pathlib import Path
from flatten_dict import flatten, unflatten
from mpcontribs.client import ureg, Client

In [None]:
name = "sandbox"
client = Client(project="sandbox")

In [None]:
with Path("/Users/patrick/Downloads/1000mofs.json").open() as f:
    data = json.load(f)

**get full list of columns and their (clean) units**

In [None]:
# flatten all contributions into one dict and use last value as representative to deduce unit
reduced = {k: v for c in data for k, v in flatten(c["data"], reducer="dot").items()}

In [None]:
# function to get a clean value/unit from str/float/int
def get_value_unit(v):
    value, unit = None, None
    if isinstance(v, str) and v.strip():
        v = v.strip()
        try:
            float(v)
            value, unit = v, ""
        except:
            vs = v.split()
            if len(vs) > 1:
                try:
                    raw = vs[-1].replace("^3", "³")
                    if raw in ureg:
                        value, unit = vs[0], raw
                except Exception as e:
                    value, unit = v, None
            else:
                value, unit = vs[0], None
    elif isinstance(v, (float, int)):
        value, unit = v, ""
    else:
        raise ValueError(f"{k} = {v} not a know value type")

    if value is None:
        raise ValueError(f"failed parsing {v}")
        
    return value, unit

In [None]:
columns = {k: get_value_unit(v)[1] for k, v in reduced.items()}

In [None]:
client.init_columns({}) # force reset columns
client.init_columns(columns)

**parse contributions and ensure clean units**

In [None]:
contributions = []

for d in data:
    contrib = {
        "identifier": d["identifier"], "formula": d["formula"], "data": {}
    }
    
    for k, v in flatten(d["data"], reducer="dot").items():
        value, unit = get_value_unit(v)
        contrib[f"data.{k}"] = f"{value} {unit}" if unit else value
        
    contributions.append(unflatten(contrib, splitter="dot"))

In [None]:
client.submit_contributions(contributions)