# Uncomment for Google Colab

# Basic Example

## Initialize the `DatabaseClient`

In [1]:
from colabfit.tools.client import HDF5Client

client = HDF5Client('basic_example.hdf5', 'w')

## Attaching a property definition

In [2]:
client.insert_property_definition({
    'property-id': 'energy-forces',
    'property-title': 'A default property for storing energies and forces',
    'property-description': 'Energies and forces computed using DFT',
    'energy': {'type': 'float', 'has-unit': True, 'extent': [], 'required': True, 'description': 'Cohesive energy'},
    'forces': {'type': 'float', 'has-unit': True, 'extent': [':',3], 'required': True, 'description': 'Atomic forces'},
})



In [3]:
client.get_property_definition('energy-forces')

{'last_modified': '2021-12-13T15:39:59Z',
 'definition': {'property-id': 'energy-forces',
  'property-title': 'A default property for storing energies and forces',
  'property-description': 'Energies and forces computed using DFT',
  'energy': {'type': 'float',
   'has-unit': True,
   'extent': [],
   'required': True,
   'description': 'Cohesive energy'},
  'forces': {'type': 'float',
   'has-unit': True,
   'extent': [':', 3],
   'required': True,
   'description': 'Atomic forces'}}}

## Attaching property settings

In [4]:
from colabfit.tools.property_settings import PropertySettings

pso = PropertySettings(
    method='VASP',
    description='A basic VASP calculation',
    files=None,
    labels=['PBE', 'GGA'],
)

pso_id = client.insert_property_settings(pso)
pso_id

'-8136543487582617353'

## Adding data

### Generating configurations

#### Manually

In [5]:
import numpy as np
from ase import Atoms

images = []
for i in range(1, 1000):
    atoms = Atoms('H'*i, positions=np.random.random((i, 3)))

    atoms.info['_name'] = 'configuration_' + str(i)
    
    atoms.info['dft_energy'] = i*i
    atoms.arrays['dft_forces'] = np.random.normal(size=(i, 3))
    

    images.append(atoms)

#### Using `load_data()`

In [6]:
from ase.io import write

outfile = '/content/example.extxyz'   # use this line for ColabFit
outfile = '/tmp/example.extxyz'   # use this line for local runs

write(outfile, images)  # use this line for local runs

In [7]:
from colabfit.tools.client import load_data

images = load_data(
    file_path=outfile,  # use this line for local runs
    file_format='xyz',
    name_field='_name',
    elements=['H'],
    default_name=None,
)

### Defining a `property_map`

In [8]:
property_map = {
    'energy-forces': {
        'energy': {'field': 'dft_energy', 'units': 'eV'},
        'forces': {'field': 'dft_forces', 'units': 'eV/Ang'},
    }
}

### `insert_data()`

In [10]:
help(client.insert_data)

Help on method insert_data in module colabfit.tools.client:

insert_data(configurations, property_map=None, property_settings=None, generator=False) method of colabfit.tools.client.HDF5Client instance
    A wrapper to Database.insert_data() which also adds important queryable
    metadata about the configurations into the Client's server.
    
    Note that when adding the data, the Mongo server will store the
    bi-directional relationships between the data. For example, a property
    will point to its configurations, but those configurations will also
    point back to any linked properties.
    
    Args:
    
        configurations (list or Configuration):
            The list of configurations to be added.
    
        property_map (dict):
            A dictionary that is used to specify how to load a defined
            property off of a configuration. Note that the top-level keys in
            the map must be the names of properties that have been
            previously defin

In [9]:
client.insert_data(
    images,
    property_map=property_map,
    property_settings={'energy-forces': pso_id},
    verbose=True
)

TypeError: insert_data() got an unexpected keyword argument 'verbose'

In [None]:
from colabfit.tools.configuration import Configuration

client.configurations = [
    Configuration.from_ase(atoms) for atoms in images
]

## Creating a `Dataset` from scratch

In [None]:
from colabfit.tools.dataset import Dataset

In [None]:
dataset = Dataset(name='example')

dataset.authors = [
    'J. E. Lennard-Jones',
]

dataset.links = [
    'https://en.wikipedia.org/wiki/John_Lennard-Jones'
]

dataset.description = "This is an example dataset"


In [None]:
_ = [print(co) for co in dataset.configurations[:5]]

## Applying labels to configurations

In [None]:
# Labels can be specified as lists or single strings (which will be wrapped in a list).
dataset.configuration_label_regexes = {
    'configuration_[1-10]': 'small',
    '.*': 'random',
}

## Building configuration sets

In [None]:
dataset.configuration_set_regexes = {
    'configuration_[1-499]':   "The first configuration set",
    'configuration_[500-999]': "The second configuration set",
}

## Synchronizing the dataset

`Dataset.resync()` must be called in order to make sure that the dataset is self-consistent by applying configuration labels, building configuration sets, etc.

In [None]:
print(dataset.configurations[0].info['_labels'])
dataset.print_configuration_sets()

In [None]:
dataset.resync()

In [None]:
print(dataset.configurations[0].info['_labels'])
dataset.print_configuration_sets()

## Parsing the data

In [None]:
dataset.property_map = {
    'default': {
        'energy': {'field': 'dft_energy', 'units': 'eV'},
        'forces': {'field': 'dft_forces', 'units': 'eV/Ang'},
    }
}

In [None]:
dataset.parse_data(convert_units=False, verbose=True)

## Visualizing the data

In [None]:
dataset.plot_histograms(['energy', 'forces'])

## Providing calculation metadata

In [None]:
from colabfit.tools.property_settings import PropertySettings

dataset.property_settings_regexes = {
    '.*':
        PropertySettings(
            method='VASP',
            description='energy/force calculations',
            # files=['/path/to/INCAR'],
            labels=['PBE', 'GGA'],
        )
}

In [None]:
dataset.resync()

In [None]:
dataset.data[0].settings