In [30]:
from colabfit.tools.database import MongoDatabase, load_data
from colabfit.tools.property_settings import PropertySettings
from colabfit.tools.configuration import AtomicConfiguration
from colabfit.tools.property_definitions import potential_energy_pd, atomic_forces_pd
import ase
from pathlib import Path


In [22]:
client = MongoDatabase('test',drop_database=True)

In [41]:
def reader(file_path):
    file_name=file_path.stem
    atoms=ase.io.read(file_path, index=':')
    for atom in atoms:
        atom.info['name'] = file_name
    return atoms

In [45]:
configurations = load_data(
    file_path='/Users/piper/Code/colabfit/data/liquid_solid_water/', #Data can be downloaded here: https://figshare.com/articles/dataset/A_dataset_of_DFT_energies_and_forces_for_carbon_allotropes_of_monolayer_graphene_bilayer_graphene_graphite_and_diamond/12649811
    file_format='folder', 
    name_field='name',  
    elements=['H', 'O'],
    reader=reader,
    glob_string='*.xyz',
    generator=False,
)

1it [00:03,  3.14s/it]


In [46]:
#Load from colabfit's definitions
client.insert_property_definition(potential_energy_pd)
client.insert_property_definition(atomic_forces_pd)

In [47]:
metadata = { 'software': {'value':['LAMMPS', 'i-PI']},
            'method': {'value':['revPBE0-D3', 'DFT']}
}
property_map = {
    'potential-energy': [{
        'energy':   {'field': 'TotEnergy',  'units': 'eV'},
        'per-atom': {'value': False, 'units': None},
        '_metadata': metadata
    }],

    'atomic-forces': [{
        'forces':   {'field': 'force',  'units': 'eV/Ang'},
            '_metadata': metadata

    }],
}

In [50]:
ids = list(client.insert_data(
    configurations,
    property_map=property_map,
    generator=False,
    verbose=True
))

all_co_ids, all_do_ids = list(zip(*ids))

Preparing to add configurations to Database: 100%|██████████| 1593/1593 [00:10<00:00, 157.29it/s]


In [51]:
hashes = client.get_data('configurations', fields=['hash'])
name = 'HO_pnas_2019'

In [53]:
cs_ids = []
co_ids = client.get_data(
    'configurations',
    fields='hash',
    query={'hash': {'$in': hashes}},
    ravel=True
).tolist()

print('Configuration set ', f'({name}):'.rjust(22), f'{len(co_ids)}'.rjust(7))

cs_id = client.insert_configuration_set(co_ids, description="Liquid and solid H2O/water thermodynamics", name=name)

cs_ids.append(cs_id)

Configuration set         (HO_pnas_2019):    1588


In [54]:
ds_id = client.insert_dataset(
    cs_ids,
    all_do_ids,
    name='HO_pnas_2019',
    authors=[
        'B. Cheng, E. Engel, J. Behler, C. Dellago, M. Ceriotti'
    ],
    links=[
        'https://archive.materialscloud.org/record/2018.0020/v1',
        'https://www.pnas.org/doi/full/10.1073/pnas.1815117116'
    ],
    description = "1590 configurations of H2O/water "
    "with potential energy and forces calculated using "
    "a hybrid approach, DFT and revPBE0-D3 ",
    verbose=True,
)

Aggregating configuration info: 100%|██████████| 1588/1588 [00:00<00:00, 1593.47it/s]
Aggregating data_object info: 100%|██████████| 1588/1588 [00:00<00:00, 37829.45it/s]
Updating CA->DS relationships: 100%|██████████| 1588/1588 [00:00<00:00, 5014.07it/s]
