In [30]:
import ase
from ase import Atoms
from colabfit.tools.database import MongoDatabase, load_data
from colabfit.tools.property_settings import PropertySettings
from colabfit.tools.configuration import AtomicConfiguration
from colabfit.tools.property_definitions import potential_energy_pd, atomic_forces_pd, cauchy_stress_pd
import numpy as np
from pathlib import Path


In [9]:

DATASET_FP = Path("/Users/piper/Code/colabfit/data/zeo-1/npz/")
client = MongoDatabase('test',drop_database=True)



In [16]:

def reader(file):
    npz = np.load(file)
    name = file.stem
    atoms = []
    for xyz, lattice, energy, stress, gradients, charges in zip(
        npz["xyz"], npz["lattice"], npz["energy"],
        npz["stress"], npz["gradients"], npz["charges"]
        ):
        atoms.append(
            Atoms(
                numbers=npz["numbers"],
                positions=xyz,
                cell=lattice,
                pbc=True,
                info={
                    "name": name,
                    "potential_energy": energy,
                    "cauchy_stress": stress,
                    "nuclear_gradients": gradients,
                    "partial_charges": charges,
                },
            )
        )
    return atoms


In [28]:
configurations = load_data(
    file_path=DATASET_FP,
    file_format='folder', 
    name_field='name',  
    elements=['O', 'Si', 'Ge', 'Li', 'H', 'Al', 'K', 'Ca', 'C', 'N', 'Na', 'F', 'Ba', 'Cs', 'Be'],
    reader=reader,
    glob_string='*.npz',
    generator=False,
)
    

226it [00:12, 18.37it/s]


In [31]:
client.insert_property_definition(potential_energy_pd)
client.insert_property_definition(atomic_forces_pd)
client.insert_property_definition(cauchy_stress_pd)



In [32]:
metadata = { 'software': {'value':'Amsterdam Modeling Suite'},
            'method': {'value':'revPBE'}
}
property_map = {
    'potential-energy': [{
        'energy':   {'field': 'potential_energy',  'units': 'a.u.'},
        'per-atom': {'value': False, 'units': None},
        '_metadata': metadata
    }],
    
    'cauchy-stress': [{
        'stress': {'field': 'cauchy_stress', 'units': 'a.u.'},
        '_metadata': metadata
    }]
}

In [33]:
ids = list(client.insert_data(
    configurations,
    property_map=property_map,
    generator=False,
    verbose=True
))

all_co_ids, all_do_ids = list(zip(*ids))

Preparing to add configurations to Database: 100%|██████████| 12930/12930 [00:40<00:00, 317.73it/s]


In [36]:
co_ids = client.get_data(
    'configurations',
    fields='hash',
    query={'hash': {'$in': all_co_ids}},
    ravel=True
).tolist()

desc = "All configurations from Zeo-1 dataset"
cs_ids = []
cs_id = client.insert_configuration_set(co_ids, description=desc,name='Zeo-1')
cs_ids.append(cs_id)

In [37]:
ds_id = client.insert_dataset(
    cs_ids,
    all_do_ids,
    name='Zeo-1_sd_2022',
    authors=[
        'A. Christensen, O. A. von Lilienfeld'
    ],
    links=[
        'https://archive.materialscloud.org/record/2021.171',
        'https://www.nature.com/articles/s41597-022-01160-5'
    ],
    description = '130,000 configurations of zeolite from the '
    'Database of Zeolite Structures. Calculations performed using '
    'Amsterdam Modeling Suite software.',
    verbose=True,
)

Aggregating configuration info: 100%|██████████| 12930/12930 [00:05<00:00, 2356.30it/s]
Aggregating data_object info: 100%|██████████| 12930/12930 [00:00<00:00, 59162.16it/s]
Updating CA->DS relationships: 100%|██████████| 12930/12930 [00:00<00:00, 34158.88it/s]
