In [None]:
from mpcontribs.client import Client
from monty.serialization import loadfn
from json import loads
from pymatgen.core.structure import Molecule, Structure
from pathlib import Path
from time import time

In [None]:
name = "sandbox"  # open_catalyst_project
client = Client()

In [None]:
client.get_project(name)

In [None]:
authors = "Facebook AI Research (FAIR) and Carnegie Mellon University’s (CMU) Department of Chemical Engineering"
# client.projects.update_entry(pk=name, project={
#     "authors": authors, "unique_identifiers": False # use data.id as unique ID in this project
# }).result()

In [None]:
client.init_columns(name, {
    "id": None, # id
    "energy": "meV", # adsorption_energy
    "smiles": None, # adsorbate_smiles
    "formulas.IUPAC": None,
    "formulas.bulk": None,
    "formulas.trajectory": None,
    "surface.miller": None,
    "surface.top": None,
    "surface.shift": ""
})

In [None]:
p = Path("/Users/patrick/GoogleDriveLBNL/MaterialsProject/gitrepos/mpcontribs-data/ocp-sample")
jsons = list(p.glob("*.json.gz"))

In [None]:
def get_miller(indices):
    return f"[{indices[0]}{indices[1]}{indices[2]}]"

def get_contribution(path):
    
    if path.stat().st_size / 1024 < 150:
    
        data = loadfn(path)
        struct = data['trajectory'][-1]
        struct.add_site_property('tags', [int(t) for t in data['tags']])

        mol = Molecule.from_sites([site for site in struct if site.properties['tags'] == 2])
        iupac_formula = mol.composition.iupac_formula
        bulk_struct = Structure.from_sites([site for site in struct if site.properties['tags'] != 2])
        bulk_formula = bulk_struct.composition.reduced_formula

        search_data = {
            "id": data['id'],
            "energy": f'{data["adsorption_energy"]} meV',
            "smiles": data["adsorbate_smiles"],
            "formulas.IUPAC": iupac_formula,
            "formulas.bulk": bulk_formula,
            "formulas.trajectory": struct.composition.reduced_formula,
            "surface.miller": get_miller(data["surface_miller_indices"]),
            "surface.top": str(data["surface_top"]),
            "surface.shift": data["surface_shift"]
        }

        contribution = {
            "project": name,
            "identifier": data["bulk_id"],
            "data": search_data,
            "structures": [struct],
            "attachments": [path]
        }

        return contribution

In [None]:
contributions = [get_contribution(json) for json in jsons[:50]]
len(contributions)

In [None]:
client.submit_contributions(contributions[:5])

In [None]:
all_ids = client.get_all_ids(
    {"project": name},
    include=["structures", "attachments"],
    data_id_fields={name: "id"}
).get(name)

In [None]:
cids = list(all_ids["ids"])
sids = list(all_ids["structures"]["ids"])
aids = list(all_ids["attachments"]["ids"])

In [None]:
client.get_contribution(cids[0])

In [None]:
client.delete_contributions(name)