# Carbon GAP-20 Example

In [1]:
from ase.io import read
from colabfit.tools.database import MongoDatabase, load_data
from colabfit.tools.property_definitions import (
    atomic_forces_pd,
    potential_energy_pd,
    cauchy_stress_pd
)

### Connect to Mongo Client

In [2]:
DATABASE_NAME = "test_db"
IP = "localhost" # Specify IP of machine on which the mongod 
                 # process is running-can be localhost
client = MongoDatabase(DATABASE_NAME,
                       uri=f"mongodb://{IP}:27017",
                       drop_database=True)# Start from fresh database

### Load Data

In [3]:
DATASET_FP = "/home/eric/Downloads/Carbon_GAP_20/Carbon_Data_Set_Total.xyz" # Data location

configurations = load_data(
    file_path=DATASET_FP,
    file_format="xyz", 
    name_field="config_type", # ase.Atoms.info key to use as a name
    elements=["C"], # Element types in data
    #reader=, # Can define custom reader function if necessary
    generator=False,
    verbose=True,
)

Loading data: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17525/17525 [00:03<00:00, 5593.77it/s]


### Setup Property Information

In [4]:
# Insert predefined property definitions into database
client.insert_property_definition(atomic_forces_pd)
client.insert_property_definition(potential_energy_pd)
client.insert_property_definition(cauchy_stress_pd)
atomic_forces_pd #

{'property-id': 'tag:staff@noreply.colabfit.org,2022-05-30:property/atomic-forces',
 'property-name': 'atomic-forces',
 'property-title': 'Atomic forces from a static calculation',
 'property-description': 'Atomic forces from a calculation of a static configuration.',
 'forces': {'type': 'float',
  'has-unit': True,
  'extent': [':', 3],
  'required': True,
  'description': 'The [x,y,z] components of the force on each particle.'}}

In [5]:
# Setup property mappings---these inform colabfit-tools where
# to find necssary values and provides additional information
prop_metadata = {
    "software": {"value": "VASP"}, # "value" indicates static quantity
    "method": {"value": "DFT optB88-vdW"},
     "kpoints_density": {"field": "kpoints_density"}, # "field" indicates key where colabfit can find dynamic value
     "cutoff": {"field": "cutoff"},
     "nneightol": {"field": "nneightol"},
     "kpoints": {"field": "kpoints"},
    }

property_map = {
    "cauchy-stress": [
        {
            "stress": {"field": "virial", "units": "eV"},
            "volume-normalized": {"value": True, "units": None},
            "_metadata": prop_metadata,
        }
    ],
    "potential-energy": [
        {
            "energy": {"field": "energy", "units": "eV"},
            "per-atom": {"value": False, "units": None},
            "_metadata": prop_metadata,
        }
    ],
    "atomic-forces": [
        {
            "forces": {"field": "forces", "units": "eV/A"},
            "_metadata": prop_metadata,
        }
    ],
}

### Insert Configurations, Property Instances, Data Objects, and Metadata

In [6]:
ids = client.insert_data(
    configurations,
    property_map=property_map,
    verbose=True,
    #co_md_map={}, # Can also specify mapping for CO metadata if necessary 
    ) # returns hashes (identifiers) for all inserted COs and DOs 

all_cos, all_dos = list(zip(*ids))

Preparing to add configurations to Database: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17525/17525 [00:30<00:00, 570.98it/s]


### Insert Configuration Sets if Desired

In [7]:
# Configuration Sets are organized groupings over Configurations
# As an example we will construct a CS from all graphene structures in the dataset
cs_info = {
    "name":"Graphene",
    "description": "All graphene configurations from the total Carbon Gap-20 dataset"
}

cs_id = client.query_and_insert_configuration_set(
    co_hashes=all_cos,
    query={'names':cs_info['name']}, # find all COs with name=="Graphene"
    name=cs_info['name'],
    description=cs_info['description']
)

Inserting configuration set            (Graphene):   16907


Aggregating configuration info: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16907/16907 [00:02<00:00, 5940.45it/s]


### Insert Dataset

In [8]:
client.insert_dataset(
    do_hashes=all_dos, # List of DOs to attach to Dataset
    cs_ids=cs_id, # Any CSs to attach to dataset
    name="Carbon-Gap20",
    authors=[
        "Patrick Rowe",  
        "Volker L. Deringer",
        "Piero Gasparotto",  
        "Gábor Csányi",
        "Angelos Michaelides"
    ],
    links=[
        "https://doi.org/10.17863/CAM.54529",
        "https://doi.org/10.1063/5.0005084",
    ],
    description="Approximately 17,000 configurations of carbon, each containing 1 to 240 atoms/cell.\
    A variety of structure types are represented, including graphite, graphene,\
    cubic and hexagonal diamond, fullerenes, and nanotubes, as well as some\
    defect structures.",
    verbose=True,
    )

Aggregating data_object info: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17227/17227 [00:00<00:00, 113945.56it/s]
Aggregating configuration info: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16907/16907 [00:02<00:00, 6221.27it/s]
Updating DO->DS relationships: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17227/17227 [00:00<00:00, 88673.97it/s]


'DS_fc9hn9jaz8o5_0'