In [1]:
from colabfit.tools.database import MongoDatabase, load_data
from colabfit.tools.property_settings import PropertySettings
from colabfit.tools.configuration import AtomicConfiguration
from colabfit.tools.property_definitions import potential_energy_pd, atomic_forces_pd, free_energy_pd
import ase
from pathlib import Path

In [2]:
client = MongoDatabase('test2_e2e',drop_database=True)

In [3]:
def reader(file_path):
    file_name=file_path.stem
    atom=ase.io.read(file_path)
    atom.info['name'] = file_name
    yield atom

In [4]:
configurations = load_data(
    file_path='/Users/piper/Code/colabfit/data/edmond_scheurer_lps_2022/',
    file_format='folder', 
    name_field='name',  
    elements=['Li', 'S', 'P'],
    reader=reader,
    glob_string='glass.xyz',
    generator=False,
)

2it [00:00,  9.32it/s]


In [5]:
configurations[1]

AtomicConfiguration(symbols='Li186P62S248', pbc=True, cell=[[19.415730802212945, 7.349643431219205e-06, -8.36694044658518e-05], [9.4120792879234e-06, 24.865789944603044, -0.00022831956945719805], [-9.075212126220114e-05, -0.00019337265211474058, 21.060587485283342]], forces=...)

## Define properties and setup property mapping(s)

In [6]:
#Load from colabfit's definitions
# Need to figure out which properties are actually in this dataset
client.insert_property_definition(potential_energy_pd)
client.insert_property_definition(atomic_forces_pd)
client.insert_property_definition(free_energy_pd)
free_energy_pd

{'property-id': 'tag:staff@noreply.colabfit.org,2022-05-30:property/free-energy',
 'property-name': 'free-energy',
 'property-title': 'Free energy from a static calculation',
 'property-description': 'Free energy from a calculation of a static configuration. Energies must be specified to be per-atom or supercell. If a reference energy has been used, this must be specified as well.',
 'energy': {'type': 'float',
  'has-unit': True,
  'extent': [],
  'required': False,
  'description': 'The free energy of the system.'},
 'per-atom': {'type': 'bool',
  'has-unit': False,
  'extent': [],
  'required': True,
  'description': 'If True, "energy" is the total energy of the system, and has NOT been divided by the number of atoms in the configuration.'},
 'reference-energy': {'type': 'float',
  'has-unit': True,
  'extent': [],
  'required': False,
  'description': 'If provided, then "energy" is the energy (either of the whole system, or per-atom) LESS the energy of a reference configuration (E 

In [7]:
# Will the units ever change? Or where can I check for unit type
# 

metadata = { 'software': {'value':['LAMMPS', 'QUIP']},
            'method': {'value':'GAP'}
}
property_map = {
    'potential-energy': [{
        'energy':   {'field': 'energy',  'units': 'eV'},
        'per-atom': {'field': 'per-atom', 'units': None},
        '_metadata': metadata
    }],

    'atomic-forces': [{
        'forces':   {'field': 'forces',  'units': 'eV/Ang'},
            '_metadata': metadata

    }],

    'free-energy': [{
        'energy':   {'field': 'free_energy',  'units': 'eV'},
        'per-atom': {'field': 'per-atom', 'units': None},
        '_metadata': metadata
    }],
}

## Insert configurations and properties into database

In [8]:
ids = list(client.insert_data(
    configurations,
    property_map=property_map,
    generator=False,
    verbose=True
))

all_co_ids, all_do_ids = list(zip(*ids))

Preparing to add configurations to Database: 100%|██████████| 2/2 [00:00<00:00, 133.68it/s]


In [9]:
cs_regexes = [
    ['lithium_thiophosphate',
     '.*',
     'All configurations'
    ],
    ['tetrahedral_lithium_thiophosphate',
     'Li3PS4',
     'All Li3PS4 configurations'
    ],
    # ['fully_bridged_lithium_thiophosphate',
    #  'Li4P2S7',
    #  'All Li4P2S7 configurations'
    # ],
    ['mixed_tetrahedral_bridged_lithium_thiophosphate',
     'Li7P3S11',
     'All Li7P3S11 configurations'
    ]
]

cs_ids = []

for i, (name, regex, desc) in enumerate(cs_regexes):
    co_ids = client.get_data(
        'configurations',
        fields='hash',
        query={'hash': {'$in': all_co_ids}, 'chemical_formula_reduced': {'$regex': regex}},
        ravel=True
    ).tolist()

    print(f'Configuration set {i}', f'({name}):'.rjust(22), f'{len(co_ids)}'.rjust(7))

    cs_id = client.insert_configuration_set(co_ids, description=desc, name=name)

    cs_ids.append(cs_id)

Configuration set 0 (lithium_thiophosphate):       2
Configuration set 1 (tetrahedral_lithium_thiophosphate):       1
Configuration set 2 (mixed_tetrahedral_bridged_lithium_thiophosphate):       1


In [10]:
ds_id = client.insert_dataset(
    cs_ids,
    all_do_ids,
    name='LPS_nanomaterials2022',
    authors=[
        'C.G. Staack', 'Tabea Huss', 'J.T. Margraf', 'K. Reuter', 'C. Scheurer'
    ],
    links=[
        'https://edmond.mpdl.mpg.de/dataset.xhtml?persistentId=doi:10.17617/3.VZHSXS'
    ],
    description = 'This dataset contains several representative glass structures' \
        'for stoichiometries Li3PS4, Li4P2S7, and Li7P3S11 from the class' \
        'of Li2S-P2S5 (LPS) solid-state electrolytes (SSE). Configurations' \
        'were generated with a Gaussian Approximation Potential (GAP) model' \
        'using QUantum mechanics and Interatomic Potentials (QUIP) and' \
        'Large-scale Atomic/Molecular Massively Parallel Simulator (LAMMPS)' \
        'software.',
    verbose=True,
)
client.datasets.find_one()

Aggregating configuration info: 100%|██████████| 2/2 [00:00<00:00, 211.03it/s]
Aggregating data_object info: 100%|██████████| 2/2 [00:00<00:00, 407.59it/s]
Updating CA->DS relationships: 100%|██████████| 2/2 [00:00<00:00, 3855.06it/s]


{'_id': ObjectId('63c19efd436308b0bd90ff60'),
 'hash': '5677587112426681685580343186289953865416382253707281715799645492910390739638457659157781404953777592961657652925943072084919385957126366125470789356647551',
 'aggregated_info': {'nconfigurations': 2,
  'nsites': 979,
  'nelements': 3,
  'chemical_systems': ['LiPS'],
  'elements': ['Li', 'P', 'S'],
  'individual_elements_ratios': {'Li': [0.38, 0.33],
   'P': [0.12, 0.14],
   'S': [0.5, 0.52]},
  'total_elements_ratios': {'Li': 0.35444330949948927,
   'P': 0.1338100102145046,
   'S': 0.5117466802860061},
  'chemical_formula_reduced': ['Li3PS4', 'Li7P3S11'],
  'chemical_formula_anonymous': ['A11B7C3', 'A4B3C'],
  'chemical_formula_hill': ['Li186P62S248', 'Li161P69S253'],
  'nperiodic_dimensions': [3],
  'dimension_types': [[1, 1, 1]],
  'property_types': ['potential-energy', 'atomic-forces'],
  'property_types_counts': [2, 2]},
 'authors': ['C.G. Staack',
  'Tabea Huss',
  'J.T. Margraf',
  'K. Reuter',
  'C. Scheurer'],
 'colabfit-i