# Functions and Imports - no pyiron dependence

In [1]:
from h5io_browser import Pointer
from h5io_browser.base import _open_hdf, _read_hdf
import h5py
import posixpath
import os

In [2]:
def list_groups_and_nodes(hdf, h5_path):
    """
    Get the list of groups and list of nodes from an open HDF5 file

    Args:
        hdf (h5py.File): file handle of an open HDF5 file
        h5_path (str): path inside the HDF5 file

    Returns:
        list, list: list of groups and list of nodes
    """
    groups = set()
    nodes = set()
    try:
        h = hdf[h5_path]
        for k in h.keys():
            if isinstance(h[k], h5py.Group):
                groups.add(k)
            else:
                nodes.add(k)
    except KeyError:
        pass
    return list(groups), list(nodes)

In [3]:
def read_dict_from_hdf(
    file_name, h5_path, recursive=False, group_paths=[], slash="ignore"
):
    """
    Read data from HDF5 file into a dictionary - by default only the nodes are converted to dictionaries, additional
    sub groups can be specified using the group_paths parameter.
    Args:
       file_name (str): Name of the file on disk
       h5_path (str): Path to a group in the HDF5 file from where the data is read
       recursive (bool): Load all subgroups recursively
       group_paths (list): list of additional groups to be included in the dictionary, for example:
                           ["input", "output", "output/generic"]
                           These groups are defined relative to the h5_path.
       slash (str): 'ignore' | 'replace' Whether to replace the string {FWDSLASH} with the value /. This does
                    not apply to the top level name (title). If 'ignore', nothing will be replaced.
    Returns:
       dict:     The loaded data. Can be of any type supported by ``write_hdf5``.
    """

    def get_dict_from_nodes(store, h5_path, slash="ignore"):
        """
        Load all nodes from an HDF5 path into a dictionary
        Args:
            store (str): Name of the file on disk, or file-like object.  Note: for files created with the 'core'
                         driver, HDF5 still requires this be non-empty.:
            h5_path (str): Path to a group in the HDF5 file from where the data is read
            slash (str): 'ignore' | 'replace' Whether to replace the string {FWDSLASH} with the value /. This does
                         not apply to the top level name (title). If 'ignore', nothing will be replaced.
        Returns:
            dict:        The loaded data. Can be of any type supported by ``write_hdf5``.
        """
        return {
            n: _read_hdf(
                hdf_filehandle=store,
                h5_path=get_h5_path(h5_path=h5_path, name=n),
                slash=slash,
            )
            for n in list_groups_and_nodes(hdf=store, h5_path=h5_path)[1]
        }

    def resolve_nested_dict(group_path, data_dict):
        """
        Turns a dict with a key containing slashes into a nested dict.  {'/a/b/c': 1} -> {'a': {'b': {'c': 1}
        Args:
            group_path (str): path inside the HDF5 file the data_dictionary was loaded from
            data_dict (dict): dictionary with data loaded from the HDF5 file
        Returns:
            dict: hierarchical dictionary
        """
        groups = group_path.split("/")
        nested_dict = data_dict
        for g in groups[::-1]:
            nested_dict = {g: nested_dict}
        return nested_dict

    def get_groups_hdf(hdf, h5_path):
        """
        Get all sub-groups of a given HDF5 path
        Args:
            hdf (str): Name of the file on disk, or file-like object.  Note: for files created with the 'core'
                       driver, HDF5 still requires this be non-empty.:
            h5_path (str): Path to a group in the HDF5 file from where the data is read
        Returns:
            list: list of HDF5 groups
        """
        try:
            h = hdf[h5_path]
            group_lst = []
            for group in [h[k].name for k in h.keys() if isinstance(h[k], h5py.Group)]:
                group_lst += [group] + get_groups_hdf(hdf=hdf, h5_path=group)
            return group_lst
        except KeyError:
            return []

    def merge_dict(main_dict, add_dict):
        """
        Merge two dictionaries recursively

        Args:
            main_dict (dict): The primary dictionary, the secondary dictionary is merged into
            add_dict (dict): The secondary dictionary which is merged in the primary dictionary

        Returns:
            dict: The merged dictionary with all keys
        """
        for k, v in add_dict.items():
            if k in main_dict.keys() and isinstance(v, dict):
                main_dict[k] = merge_dict(main_dict=main_dict[k], add_dict=v)
            else:
                main_dict[k] = v
        return main_dict

    if recursive and len(group_paths) > 0:
        raise ValueError(
            "Loading subgroups can either be defined by the group paths ",
            group_paths,
            " or by the recursive ",
            recursive,
            " parameter. Specifying both lead to this ValueError.",
        )

    with _open_hdf(file_name, mode="r") as store:
        output_dict = get_dict_from_nodes(store=store, h5_path=h5_path, slash=slash)
        if h5_path == "/" and recursive:
            group_paths = [g[1:] for g in get_groups_hdf(hdf=store, h5_path=h5_path)]
        elif h5_path[0] != "/" and recursive:
            group_paths = [
                g[len("/" + h5_path) + 1 :]
                for g in get_groups_hdf(hdf=store, h5_path="/" + h5_path)
            ]
        elif recursive:
            group_paths = [
                g[len(h5_path) + 1 :]
                for g in get_groups_hdf(hdf=store, h5_path=h5_path)
            ]
        for group_path in group_paths:
            output_dict = merge_dict(
                main_dict=output_dict,
                add_dict=resolve_nested_dict(
                    group_path=group_path,
                    data_dict=get_dict_from_nodes(
                        store=store,
                        h5_path=get_h5_path(h5_path=h5_path, name=group_path),
                        slash=slash,
                    ),
                ),
            )
    return output_dict

In [4]:
def get_h5_path(h5_path, name):
    """
    Combine the current h5_path with the relative path

    Args:
        h5_path (str): absolute path of the node in the hdf5 file
        name (str): relative path to be added to the absolute path

    Returns:
        str: combined path
    """
    return posixpath.join(h5_path, name)

# Create LAMMPS job

In [5]:
from pyiron_atomistics import Project

In [6]:
pr = Project("test")

In [7]:
pr.remove_jobs(recursive=True, silently=True)

  0%|          | 0/1 [00:00<?, ?it/s]

In [8]:
structure = pr.create.structure.ase.bulk("Al", cubic=True)

In [9]:
structure.set_repeat([9,9,9])

  structure.set_repeat([9,9,9])


In [10]:
len(structure)

2916

In [11]:
job = pr.create.job.Lammps("lmp")

In [12]:
job.structure =structure

In [13]:
job.potential = '2002--Mishin-Y--Ni-Al--LAMMPS--ipr1'

In [14]:
job.calc_md(n_ionic_steps=1000, n_print=10, temperature=500.0)

In [15]:
job.run()

The job lmp was saved and received the ID: 1


# Read job dictionary

In [16]:
job_dict = read_dict_from_hdf(
    file_name=job.project_hdf5.file_name,
    h5_path="/",
    recursive=True,
    group_paths=[],
    slash='ignore',
)

# Get LAMMPS schema

In [17]:
file_name = job.project_hdf5.file_name
file_name

'/home/jovyan/test/lmp.h5'

In [18]:
key_lst = []
def collect_attrs(name, obj):
    key_lst.append(name)
    
with h5py.File(file_name, 'r') as f:
    f.visititems(collect_attrs)

In [19]:
node_lst = [g for i, g in enumerate(key_lst) if len([k for k in key_lst[i:] if g in k]) == 1]
node_lst

['lmp/DICT_VERSION',
 'lmp/HDF_VERSION',
 'lmp/NAME',
 'lmp/OBJECT',
 'lmp/TYPE',
 'lmp/VERSION',
 'lmp/executable',
 'lmp/input/control_inp/DICT_VERSION',
 'lmp/input/control_inp/NAME',
 'lmp/input/control_inp/OBJECT',
 'lmp/input/control_inp/TYPE',
 'lmp/input/control_inp/VERSION',
 'lmp/input/control_inp/data_dict',
 'lmp/input/generic/DICT_VERSION',
 'lmp/input/generic/NAME',
 'lmp/input/generic/OBJECT',
 'lmp/input/generic/TYPE',
 'lmp/input/generic/VERSION',
 'lmp/input/generic/data_dict',
 'lmp/input/generic_dict',
 'lmp/input/interactive',
 'lmp/input/potential_inp/DICT_VERSION',
 'lmp/input/potential_inp/NAME',
 'lmp/input/potential_inp/OBJECT',
 'lmp/input/potential_inp/TYPE',
 'lmp/input/potential_inp/VERSION',
 'lmp/input/potential_inp/data_dict',
 'lmp/input/potential_inp/potential/Citations',
 'lmp/input/potential_inp/potential/Config',
 'lmp/input/potential_inp/potential/Filename',
 'lmp/input/potential_inp/potential/Model',
 'lmp/input/potential_inp/potential/Name',
 'l

# Write LAMMPS job

In [20]:
def get_node_from_job_dict(job_dict, node):
    node_name_lst = node.split("/")
    tmp_dict = job_dict
    for group in node_name_lst:
        tmp_dict = tmp_dict[group]
    return tmp_dict

In [21]:
node_dict = {node: get_node_from_job_dict(job_dict=job_dict, node=node) for node in node_lst}

In [22]:
os.remove(file_name)

In [23]:
with Pointer(file_name=file_name) as hdf_file:
    hdf_file.write_dict(node_dict)

# Reload job

In [24]:
job = pr.load(job.job_name)

In [25]:
job.input.control

Unnamed: 0,Parameter,Value,Comment
0,units,metal,
1,dimension,3,
2,boundary,p p p,
3,atom_style,atomic,
4,read_data,structure.inp,
5,include,potential.inp,
6,fix___ensemble,all nvt temp 500.0 500.0 0.1,
7,variable___dumptime,equal 10,
8,variable___thermotime,equal 10,
9,timestep,0.001,


In [26]:
job.input.potential

Unnamed: 0,Parameter,Value,Comment
0,pair_style,eam/alloy,
1,pair_coeff,* * NiAl02.eam.alloy Ni Al,


In [27]:
job.output.energy_tot

array([-9427.29630088, -9430.14696684, -9432.82349157, -9433.14423094,
       -9432.46197988, -9431.11779442, -9429.84887403, -9428.59786876,
       -9427.38813359, -9426.70956719, -9426.59728248, -9426.49789307,
       -9426.12272411, -9425.59921798, -9425.20912449, -9424.90209584,
       -9424.49520497, -9423.99938741, -9423.57006459, -9423.24848256,
       -9422.96494608, -9422.73886581, -9422.67498343, -9422.7595396 ,
       -9422.83422591, -9422.80728218, -9422.74305075, -9422.77967415,
       -9422.94528544, -9423.13258663, -9423.2946694 , -9423.5107131 ,
       -9423.84002756, -9424.21163482, -9424.54837222, -9424.86419284,
       -9425.20921977, -9425.60463137, -9426.04263383, -9426.51076414,
       -9427.00943396, -9427.53249825, -9428.0367887 , -9428.46758844,
       -9428.83561118, -9429.21325813, -9429.67512814, -9430.21492958,
       -9430.73337493, -9431.12233644, -9431.37229576, -9431.54923363,
       -9431.70507951, -9431.83867285, -9431.96559155, -9432.12935211,
      