In [1]:
from colabfit.tools.database import MongoDatabase, load_data
from colabfit.tools.configuration import AtomicConfiguration
from colabfit.tools.property_definitions import atomic_forces_pd
import property_definitions_additional as pda
from collections import defaultdict
import numpy as np

In [2]:
DATASET_FP = "/Users/piper/Code/colabfit/data/brass_data/"

In [3]:
client = MongoDatabase('test', drop_database=True)

In [4]:
def read_npz(filepath):
    data = defaultdict(list)
    with np.load(filepath, allow_pickle=True) as f:
        for key in f.files:
            data[key] = f[key]
    return data

def reader(filepath):
    name = "alpha-brass-nanoparticles"
    data = read_npz(filepath)
    old_keys = ('coords', 'latt', 'z', 'F', 'E', 'E_coh',
        'comp', 'cmts', 'theory', 'name', 'citation')

    new_keys = ("coords", "lattice", "atomic_num", "forces", 
        "total_energy", "cohesive_energy", "composition_dict", 
        "comments", "vasp_pbe", "citation")
    for old, new in zip(old_keys, new_keys):
        data[new] = data.pop(old)

    atoms = [AtomicConfiguration(
        names=[name],
        positions=data['coords'][i],
        cell=data['lattice'][i],
        numbers=data['atomic_num'][i],
        pbc=True,
        ) for i, val in enumerate(data['coords'])]
    using_keys = ('forces', 'total_energy')
    for i, atom in enumerate(atoms):
        for key in using_keys:
            atom.info[key] = data[key][i]
        atom.info['name'] = name
    return atoms

In [5]:
configurations = load_data(
    #Data can be downloaded here: 'https://archive.materialscloud.org/record/2021.153'
    file_path=DATASET_FP, 
    file_format='folder', 
    name_field='name',  
    elements=['Cu', 'Zn'],
    reader=reader,
    glob_string='*.npz',
    generator=False,
)

1it [00:33, 33.97s/it]


In [9]:
pds = [atomic_forces_pd, pda.total_energy_pd]
for pd in pds:
    client.insert_property_definition(pd)

In [10]:
metadata = { 'software': {'value':['LAMMPS', 'VASP']},
            'method': {'value':['DFT', 'PBE']}
}
property_map = {
    'total-energy': [{
        'energy': {'field': 'total_energy', 'units': 'meV'},
        'per-atom': {'value': True, 'units': None}    ,
        '_metadata': metadata
        }],
    'atomic-forces' : [{
        'forces': {'field': 'forces', 'units': 'meV Å^-1'},
        '_metadata': metadata
    }],
    
}

In [11]:
ids = list(client.insert_data(
    configurations,
    property_map=property_map,
    generator=False,
    verbose=True
))

all_co_ids, all_do_ids = list(zip(*ids))

Preparing to add configurations to Database: 100%|██████████| 300/300 [00:01<00:00, 264.86it/s]


In [12]:
hashes = client.get_data('configurations', fields=['hash'])

name = "alpha-brass-nanoparticles"
cs_ids = []
co_ids = client.get_data(
    'configurations',
    fields='hash',
    query={'hash': {'$in': hashes}},
    ravel=True
).tolist()


print(f'Configuration set ', f'({name}):'.rjust(22), f'{len(co_ids)}'.rjust(7))

cs_id = client.insert_configuration_set(co_ids, description=f"Set of {name}", name=name)
cs_ids.append(cs_id)

# Gather copper-only set
name='Cu-only-alpha-brass-nanoparticles'
cu_ids = client.get_data(
    'configurations',
    fields=['hash', 'nelements'],
    query={'hash': {'$in': hashes}, 
        'nelements': {'$eq': 1}},
        ravel=True
        )['hash']
print(f'Configuration set ', f'({name}):'.rjust(22), f'{len(cu_ids)}'.rjust(7))

cs_id = client.insert_configuration_set(cu_ids,
    description="Set from alpha-brass nanoparticles dataset containing only copper",
    name=name)

cs_ids.append(cs_id)

Configuration set  (alpha-brass-nanoparticles):     296
Configuration set  (Cu-only-alpha-brass-nanoparticles):     273


In [13]:
cuzn_ids = cu_ids = client.get_data(
    'configurations',
    fields=['hash', 'nelements'],
    query={'hash': {'$in': hashes}, 
        'nelements': {'$eq': 2}},
        ravel=True
        )['hash']
cs_id = client.insert_configuration_set(cuzn_ids,
    description="Set from alpha-brass nanoparticles dataset containing copper and zinc (i.e., no copper-only molecules)",
    name='CuZn-only-alpha-brass-nanoparticles')
cs_ids.append(cs_id)

In [14]:
ds_id = client.insert_dataset(
    cs_ids,
    all_do_ids,
    name='alpha_brass_nanoparticles',
    authors=[
        'J. Weinreich, A. Römer, M.L. Paleico, J. Behler'
    ],
    links=[
        "http://doi.org/10.1021/acs.jpcc.0c00559",
    "https://doi.org/10.24435/materialscloud:94-aq"
    ],
    description = "53,841 structures of alpha-brass (less than 40% Zinc)."
    " Includes atomic forces and total energy. Calculated using VASP at "
    "the DFT level of theory.",
    verbose=True,
)


Aggregating configuration info: 100%|██████████| 296/296 [00:00<00:00, 4282.53it/s]
Aggregating data_object info: 100%|██████████| 297/297 [00:00<00:00, 9666.02it/s]
Updating CA->DS relationships: 100%|██████████| 297/297 [00:00<00:00, 9715.09it/s]
