In [4]:
import piplite
await piplite.install('chemiscope')
await piplite.install('ase')


In [15]:
import chemiscope
import ase

In [16]:
import warnings
from collections import Counter

import numpy as np

try:
    import ase

    HAVE_ASE = True
except ImportError:
    HAVE_ASE = False


def _ase_valid_structures(frames):
    frames_list = list(frames)
    if HAVE_ASE and isinstance(frames_list[0], ase.Atoms):
        for frame in frames_list:
            assert isinstance(frame, ase.Atoms)
        return frames, True
    elif HAVE_ASE and isinstance(frames_list[0], ase.Atom):
        # deal with the user passing a single frame
        return [frames], True
    else:
        return frames, False


def _ase_list_atom_properties(frames):
    IGNORED_ASE_ARRAYS = ["positions", "numbers", "center_atoms_mask"]
    # extract the set of common properties between all frames
    all_names = set()
    extra = set()

    for name in frames[0].arrays.keys():
        if name in IGNORED_ASE_ARRAYS:
            continue
        all_names.add(name)

    for frame in frames[1:]:
        for name in frame.arrays.keys():
            if name in IGNORED_ASE_ARRAYS:
                continue

            if name not in all_names:
                extra.add(name)

        remove = []
        for name in all_names:
            if name not in frame.arrays.keys():
                remove.append(name)

        for name in remove:
            all_names.remove(name)
            extra.add(name)

    if len(extra) != 0:
        warnings.warn(
            "the following atomic properties properties are only defined "
            f"for a subset of frames: {list(sorted(extra))}; they will be ignored"
        )

    return all_names


def _ase_list_structure_properties(frames):
    # extract the set of common properties between all frames
    all_names = set()
    extra = set()

    for name in frames[0].info.keys():
        all_names.add(name)

    for frame in frames[1:]:
        for name in frame.info.keys():
            if name not in all_names:
                extra.add(name)

        remove = []
        for name in all_names:
            if name not in frame.info.keys():
                remove.append(name)

        for name in remove:
            all_names.remove(name)
            extra.add(name)

    if len(extra) != 0:
        warnings.warn(
            "the following structure properties properties are only defined "
            f"for a subset of frames: {list(sorted(extra))}; they will be ignored"
        )

    return all_names


def _ase_atom_properties(frames, only, atoms_mask=None):
    all_names = _ase_list_atom_properties(frames)
    if only is not None:
        all_names = [name for name in all_names if name in only]

    # create property in the format expected by create_input
    properties = {
        name: {"target": "atom", "values": value}
        for name, value in frames[0].arrays.items()
        if name in all_names
    }

    for frame in frames[1:]:
        for name, value in frame.arrays.items():
            if name not in all_names:
                continue
            properties[name]["values"] = np.concatenate(
                [properties[name]["values"], value]
            )

    _remove_invalid_properties(properties, "ASE")

    if atoms_mask is not None:
        # only include values for requested atoms
        for property in properties.values():
            property["values"] = property["values"][atoms_mask]

    return properties


def _ase_structure_properties(frames, only=None):
    all_names = _ase_list_structure_properties(frames)
    if only is not None:
        all_names = [name for name in all_names if name in only]

    # create property in the format expected by create_input
    properties = {name: {"target": "structure", "values": []} for name in all_names}

    for frame in frames:
        for name, value in frame.info.items():
            if name in all_names:
                properties[name]["values"].append(value)

    _remove_invalid_properties(properties, "ASE")

    return properties


def _ase_extract_properties(frames, only=None, environments=None):
    """implementation of ``extract_properties`` for ASE"""

    properties = _ase_structure_properties(frames, only)

    if environments is not None:
        atoms_mask = [[False] * len(f) for f in frames]
        for structure, center, _ in environments:
            atoms_mask[structure][center] = True

        atoms_mask = np.concatenate(atoms_mask)
    else:
        atoms_mask = None

    atom_properties = _ase_atom_properties(frames, only, atoms_mask)

    for name, values in atom_properties.items():
        if name in properties:
            warnings.warn(
                f"a property named '{name}' is defined for both atoms and structures, "
                "the atom one will be ignored"
            )
        else:
            properties[name] = values

    return properties


def _ase_all_atomic_environments(frames, cutoff):
    "Extract all atomic environments out of a set of ASE Atoms objects"
    environments = []
    for structure_i, frame in enumerate(frames):
        for atom_i in range(len(frame)):
            environments.append((structure_i, atom_i, cutoff))
    return environments


def _ase_librascal_atomic_environments(frames, cutoff):
    """
    Extract atomic environments out of a set of ASE Atoms objects,
    using the same convention as librascal
    """
    environments = []
    for structure_i, frame in enumerate(frames):
        if "center_atoms_mask" in frame.arrays:
            atoms_iter = np.where(frame.arrays["center_atoms_mask"])[0]
        else:
            # use all atoms
            atoms_iter = range(len(frame))

        for atom_i in atoms_iter:
            environments.append((structure_i, atom_i, cutoff))

    return environments


def _ase_composition_properties(frames, environments=None):
    all_elements = set()
    for frame in frames:
        all_elements.update(frame.symbols)
    all_elements = set(all_elements)

    composition = []
    elements_count = {element: [] for element in all_elements}
    for frame in frames:
        composition.append(str(frame.symbols))

        dict_composition = dict(Counter(frame.symbols))
        for element in all_elements:
            if element in dict_composition:
                elements_count[element].append(dict_composition[element])
            else:
                elements_count[element].append(0)

    properties = {
        f"n_{element}": {"values": values, "target": "structure"}
        for element, values in elements_count.items()
    }

    properties["composition"] = {"values": composition, "target": "structure"}

    if environments is not None:
        atoms_mask = [[False] * len(f) for f in frames]
        for structure, center, _ in environments:
            atoms_mask[structure][center] = True
    else:
        atoms_mask = None

    symbols = []
    numbers = []
    for i, frame in enumerate(frames):
        if atoms_mask is None:
            frame_symbols = list(frame.symbols)
            frame_numbers = list(frame.numbers)
        else:
            frame_symbols = frame.symbols[atoms_mask[i]]
            frame_numbers = frame.numbers[atoms_mask[i]]

        symbols.extend(frame_symbols)
        numbers.extend(frame_numbers)

    properties["symbol"] = {"values": symbols, "target": "atom"}
    properties["number"] = {"values": numbers, "target": "atom"}

    return properties


def _ase_to_json(frame):
    """Implementation of frame_to_json for ase.Atoms"""
    data = {}
    data["size"] = len(frame)
    data["names"] = list(frame.symbols)
    data["x"] = [float(value) for value in frame.positions[:, 0]]
    data["y"] = [float(value) for value in frame.positions[:, 1]]
    data["z"] = [float(value) for value in frame.positions[:, 2]]

    if (frame.cell.lengths() != [0.0, 0.0, 0.0]).all():
        data["cell"] = list(np.concatenate(frame.cell))

    return data


def _remove_invalid_properties(properties, origin):
    """
    Remove invalid properties from the ``properties`` dictionary. ``origin`` is
    used in error messages as the property origin
    """
    to_remove = []
    for name, property in properties.items():
        for value in property["values"]:
            if not _is_convertible_to_property(value):
                warnings.warn(
                    f"value '{value}' of type '{type(value)}' for the '{name}' "
                    f"property from {origin} is not convertible to float, array or "
                    "string, this property will be ignored."
                )
                to_remove.append(name)
                break

    for name in to_remove:
        del properties[name]


def _is_convertible_to_property(value):
    """
    Check whether a value is convertible to a chemiscope property, i.e. if it is
    a string or something convertible to float.
    """
    if isinstance(value, (bytes, str)):
        # string types
        return True
    else:
        # everything convertible to float
        try:
            float(value)
            return True
        except Exception:
            try:
                np.array(value, dtype=float)
                return True
            except Exception:
                return False


In [17]:
import gzip
import json
import os
import warnings

import numpy as np


def create_input(
    frames=None,
    meta=None,
    properties=None,
    environments=None,
    settings=None,
):
    """
    Create a dictionary that can be saved to JSON using the format used by
    the default chemiscope visualizer.

    :param list frames: list of atomic structures. For now, only `ase.Atoms`_
                        objects are supported
    :param dict meta: optional metadata of the dataset, see below
    :param dict properties: optional dictionary of properties, see below
    :param list environments: optional list of (structure id, atom id, cutoff)
        specifying which atoms have properties attached and how far out
        atom-centered environments should be drawn by default. Functions like
        :py:func:`all_atomic_environments` or :py:func:`librascal_atomic_environments`
        can be used to generate the list of environments in simple cases.
    :param dict settings: optional dictionary of settings to use when displaying
        the data. Possible entries for the ``settings`` dictionary are documented
        in the chemiscope input file reference.

    The dataset metadata should be given in the ``meta`` dictionary, the
    possible keys are:

    .. code-block:: python

        meta = {
            'name': '...',         # str, dataset name
            'description': '...',  # str, dataset description
            'authors': [           # list of str, dataset authors, OPTIONAL
                '...',
            ],
            'references': [        # list of str, references for this dataset,
                '...',             # OPTIONAL
            ],
        }

    Properties can be added with the ``properties`` parameter. This parameter
    should be a dictionary containing one entry for each property. Properties
    can be extracted from structures with :py:func:`extract_properties` or
    :py:func:`composition_properties`, or manually defined by the user.

    Each entry in the ``properties`` dictionary contains a ``target`` attribute
    (``'atom'`` or ``'structure'``) and a set of values. ``values`` can be a
    Python list of float or string; a 1D numpy array of numeric values; or a 2D
    numpy array of numeric values. In the later case, multiple properties will
    be generated along the second axis. For example, passing

    .. code-block:: python

        properties = {
            'cheese': {
                'target': 'atom',
                'values': np.zeros((300, 4)),
                # optional: property unit
                'unit': 'random / fs',
                # optional: property description
                'description': 'a random property for example',
            }
        }

    will generate four properties named ``cheese[1]``, ``cheese[2]``,
    ``cheese[3]``,  and ``cheese[4]``, each containing 300 values.

    It is also possible to pass shortened representation of the properties, for
    instance:

    .. code-block:: python

        properties = {
            'cheese':  np.zeros((300, 4)),
            }
        }

    In this case, the type of property (structure or atom) would be deduced
    by comparing the numbers atoms and structures in the dataset to the
    length of provided list/np.ndarray.

    .. _`ase.Atoms`: https://wiki.fysik.dtu.dk/ase/ase/atoms.html
    """

    data = {
        "meta": _normalize_metadata(meta if meta is not None else {}),
    }

    if settings is not None:
        # dump/load as json to catch possible json incompatibility in settings
        # early
        if not isinstance(settings, dict):
            raise ValueError(
                f"expected 'settings' to be a dict, got {type(settings)} instead"
            )

        data["settings"] = json.loads(json.dumps(settings))

    data["structures"] = []
    n_structures = None

    if frames is not None:
        data["structures"] = frames_to_json(frames)
        n_structures = len(data["structures"])
        n_atoms = sum(s["size"] for s in data["structures"])
    else:
        n_atoms = 0

        # if frames are not given, we create a dataset with only properties.
        # In that case, all properties should be structure properties
        for name, value in properties.items():
            if not isinstance(value, dict):
                if n_structures is None:
                    n_structures = len(value)
                else:
                    if len(value) != n_structures:
                        raise ValueError(
                            f"wrong size for property '{name}': expected "
                            f"{n_structures} elements, but got an array with "
                            f"{len(value)} entries"
                        )
            else:
                if value["target"] != "structure":
                    raise ValueError(
                        f"property '{name}' has a non-structure target, "
                        "which is not allowed if frames are not provided"
                    )

                n_structures = len(value["values"])

    if environments is not None:
        if "structures" not in data:
            raise ValueError("can not have environments without structures")

        data["environments"] = _normalize_environments(environments, data["structures"])
        n_atoms = len(data["environments"])

    data["properties"] = {}
    if properties is not None:
        properties = _expand_properties(properties, n_structures, n_atoms)
        for name, value in properties.items():
            data["properties"].update(_linearize(name, value, n_structures, n_atoms))

    # Check to tell the user they might have forgotten some properties coming
    # from the frames (that chemiscope used to automatically extract). This code
    # should be removed in version 0.6 of chemiscope.
    if frames is not None:
        found_one_from_frame = False
        atom_properties = _list_atom_properties(frames)
        for name in atom_properties:
            if name in data["properties"]:
                found_one_from_frame = True

        structure_properties = _list_structure_properties(frames)
        for name in structure_properties:
            if name in data["properties"]:
                found_one_from_frame = True

        if not found_one_from_frame:
            properties_list = ""

            if len(structure_properties) != 0:
                properties_list += "[" + ", ".join(structure_properties) + "]"

            if len(atom_properties) != 0:
                if len(properties_list) != 0:
                    properties_list += " and "
                properties_list += "[" + ", ".join(atom_properties) + "]"

            if len(properties_list) != 0:
                warnings.warn(
                    "chemiscope behavior changed to no longer include properties "
                    "from the structure objects. Use `chemiscope.extract_properties` "
                    f"to also visualize these properties ({properties_list})"
                )

    return data


def write_input(
    path,
    frames,
    meta=None,
    properties=None,
    environments=None,
    settings=None,
):
    """
    Create the input JSON file used by the default chemiscope visualizer, and
    save it to the given ``path``.

    :param str path: name of the file to use to save the json data. If it ends
                     with '.gz', a gzip compressed file will be written
    :param list frames: list of atomic structures. For now, only `ase.Atoms`_
                        objects are supported
    :param dict meta: optional metadata of the dataset
    :param dict properties: optional dictionary of additional properties
    :param list environments: optional list of (structure id, atom id, cutoff)
        specifying which atoms have properties attached and how far out
        atom-centered environments should be drawn by default.
    :param dict settings: optional dictionary of settings to use when displaying
        the data. Possible entries for the ``settings`` dictionary are documented
        in the chemiscope input file reference.

    This function uses :py:func:`create_input` to generate the input data, see
    the documentation of this function for more information.

    Here is a quick example of generating a chemiscope input reading the
    structures from a file that `ase <ase-io_>`_ can read, and performing PCA
    using `sklearn`_ on a descriptor computed with another package.

    .. code-block:: python

        import ase
        from ase import io
        import numpy as np
        import sklearn
        from sklearn import decomposition
        import chemiscope

        frames = ase.io.read('trajectory.xyz', ':')

        # example property 1: list containing the energy of each structure,
        # from calculations performed beforehand
        energies = np.loadtxt('energies.txt')

        # example property 2: PCA projection computed using sklearn.
        # X contains a multi-dimensional descriptor of the structure
        X = np.array( ... )
        pca = sklearn.decomposition.PCA(n_components=3).fit_transform(X)

        properties = {
            "PCA": {
                "target": "atom",
                "values": pca,
                "description": "PCA of per-atom representation of the structures",
            },
            "energies": {
                "target": "structure",
                "values": energies,
                "units": "kcal/mol",
            },
        }

        # additional properties coming from the trajectory
        frame_properties = chemiscope.extract_properties(
            frames,
            only=["temperature", "classification"]
        )

        # merge all properties together
        properties.extend(frame_properties)

        chemiscope.write_input(
            path="chemiscope.json.gz",
            frames=frames,
            properties=properties,
            # This is required to display properties with `target: "atom"`
            environments=chemiscope.all_atomic_environments(frames),
        )

    .. _ase-io: https://wiki.fysik.dtu.dk/ase/ase/io/io.html
    .. _sklearn: https://scikit-learn.org/
    """

    if not (path.endswith(".json") or path.endswith(".json.gz")):
        raise Exception("path should end with .json or .json.gz")

    data = create_input(
        frames=frames,
        meta=meta,
        properties=properties,
        environments=environments,
        settings=settings,
    )

    if "name" not in data["meta"] or data["meta"]["name"] == "<unknown>":
        data["meta"]["name"] = os.path.basename(path).split(".")[0]

    if path.endswith(".gz"):
        with gzip.open(path, "w", 9) as file:
            file.write(json.dumps(data).encode("utf8"))
    else:
        with open(path, "w") as file:
            json.dump(data, file, indent=2)


def _normalize_environments(environments, structures):
    cleaned = []
    for environment in environments:
        if len(environment) != 3:
            raise ValueError(
                f"expected environments to contain three values, got {environment}"
            )

        structure, center, cutoff = environment
        structure = int(structure)
        center = int(center)
        cutoff = float(cutoff)

        if structure >= len(structures):
            raise ValueError(
                f"invalid structure index in environments: got {structure}, "
                f"but we have {len(structures)} structures"
            )

        if center >= structures[structure]["size"]:
            raise ValueError(
                f"invalid center index in environments: got {center} in structure "
                f"which only contains {structures[structure]['size']} atoms"
            )

        if cutoff <= 0:
            raise ValueError("negative cutoff in environments is not valid")

        cleaned.append(
            {
                "structure": structure,
                "center": center,
                "cutoff": float(cutoff),
            }
        )

    return cleaned


def _normalize_metadata(meta):
    cleaned = {}
    if "name" in meta and str(meta["name"]) != "":
        cleaned["name"] = str(meta["name"])
    else:
        cleaned["name"] = "<unknown>"

    if "description" in meta:
        cleaned["description"] = str(meta["description"])

    if "authors" in meta:
        cleaned["authors"] = list(map(str, meta["authors"]))

    if "references" in meta:
        cleaned["references"] = list(map(str, meta["references"]))

    for key in meta.keys():
        if key not in ["name", "description", "authors", "references"]:
            warnings.warn(f"ignoring unexpected metadata: {key}")

    return cleaned


def _expand_properties(short_properties, n_structures, n_atoms):
    """
    Convert a shortened entries of properties into the expanded form.
    Entries in already expanded form are not changed.

    :param dict short_properties: properties to handle
    :param int n_structures: number of structures in the dataset
    :param int n_atoms: total number of atoms in the whole dataset

    For example this property dict:
    .. code-block:: python

        properties = {
            'apple': {
                'target': 'atom',
                'values': np.zeros((300, 4)),
                'unit': 'random / fs',
            }
            'orange' : np.zeros((100, 42)),
            'banana' : np.zeros((300, 17)),
        }

    will be converted to
    .. code-block:: python

        properties = {
            'aple': {
                'target': 'atom',
                'values': np.zeros((300, 4)),
                'unit': 'random / fs',
            }
            'orange': {
                'target': 'structure'
                'values': np.zeros((100, 42)),
            }
            'banana': {
                'target': 'atom',
                'values': np.zeros((300, 17)),
           }
        }

    assuming that number of structures in the dataset is 100 and
    total number of atoms in the dataset is 300.
    """
    properties = {}
    for key, value in short_properties.items():
        if isinstance(value, dict):
            properties[key] = value
        else:
            if (not isinstance(value, list)) and (not isinstance(value, np.ndarray)):
                raise ValueError(
                    "Property values should be either list or numpy array, "
                    f"got {type(value)} instead"
                )
            if n_structures == n_atoms:
                warnings.warn(
                    f"The target of the property '{key}' is ambiguous because "
                    "there is the same number of atoms and structures. "
                    "We will assume target=structure"
                )

            dict_property = {"values": value}

            # heuristically determines the type of target
            if len(value) == n_structures:
                dict_property["target"] = "structure"
            elif len(value) == n_atoms:
                dict_property["target"] = "atom"
            else:
                raise ValueError(
                    "The length of property values is different from the "
                    "number of structures and the number of atoms, we can not "
                    f"guess the target. Got n_atoms = {n_atoms}, n_structures = "
                    f"{n_structures}, the length of property values is "
                    f"{len(value)}, for the '{key}' property"
                )

            properties[key] = dict_property
    return properties


def _linearize(name, property, n_structures, n_centers):
    """
    Transform a single property dict (containing "value", "target", "units",
    "description") with potential multi-dimensional "values" key to data that
    chemiscope can load.

    Multi-dimensional "value" generate multiple properties named "XXX [1]", "XXX
    [2]", "XXX [3]", etc. Data in "values" are converted to either string or
    float, to ensure it is compatible with JSON.

    :param name: name of the property related to this data, to be used in error
                 messages
    :param property: dictionary containing the property data and metadata
    :param n_structures: total number of structures, to validate the array sizes
    :param n_centers: total number of atoms, to validate the array sizes
    """
    _validate_property(name, property)

    data = {}
    if isinstance(property["values"], list):
        data[name] = {
            "target": property["target"],
            "values": _typetransform(property["values"], name),
        }
    elif isinstance(property["values"], np.ndarray):
        if len(property["values"].shape) == 1:
            data[name] = {
                "target": property["target"],
                "values": _typetransform(list(property["values"]), name),
            }
        elif len(property["values"].shape) == 2:
            if property["values"].shape[1] == 1:
                data[name] = {
                    "target": property["target"],
                    "values": _typetransform(list(property["values"]), name),
                }
            else:
                for i in range(property["values"].shape[1]):
                    data[f"{name}[{i + 1}]"] = {
                        "target": property["target"],
                        "values": _typetransform(list(property["values"][:, i]), name),
                    }
        else:
            raise Exception("unsupported ndarray property")
    else:
        raise Exception(
            f"unknown type ({type(property['values'])}) for property '{name}'"
        )

    # get property metadata
    if "units" in property:
        for item in data.values():
            item["units"] = str(property["units"])

    if "description" in property:
        for i, item in enumerate(data.values()):
            # add [component XX] to the description if values was a ndarray
            extra = f" [component {i + 1}]" if len(data) > 1 else ""
            item["description"] = str(property["description"]) + extra

    # Validate the properties size
    for prop in data.values():
        if prop["target"] == "atom" and len(prop["values"]) != n_centers:
            raise Exception(
                f"wrong size for the property '{name}' with target=='atom': "
                f"expected {n_centers} values, got {len(prop['values'])}"
            )

        if prop["target"] == "structure" and len(prop["values"]) != n_structures:
            raise Exception(
                f"wrong size for the property '{name}' with target=='structure': "
                f"expected {n_structures} values, got {len(prop['values'])}"
            )

    return data


def _validate_property(name, property):
    if name == "":
        raise Exception("the name of a property can not be the empty string")
    elif not isinstance(name, str):
        raise Exception(
            "the name of a property name must be a string, "
            f"got '{name}' of type {type(name)}"
        )

    if "target" not in property:
        raise Exception(f"missing 'target' for the '{name}' property")
    elif property["target"] not in ["atom", "structure"]:
        raise Exception(
            f"the target must be 'atom' or 'structure' for the '{name}' property"
        )

    if "values" not in property:
        raise Exception(f"missing 'values' for the '{name}' property")

    for key in property.keys():
        if key not in ["target", "values", "description", "units"]:
            warnings.warn(f"ignoring unexpected property key: {key}")


def _typetransform(data, name):
    """
    Transform the given data to either a list of string or a list of floats.

    :param data: list of unknown type to be converted
    :param name: name of the property related to this data, to be used in
                 error messages
    """
    assert isinstance(data, list) and len(data) > 0
    if isinstance(data[0], str):
        return list(map(str, data))
    elif isinstance(data[0], bytes):
        return list(map(lambda u: u.decode("utf8"), data))
    else:
        try:
            return [float(value) for value in data]
        except Exception:
            raise Exception(
                f"unsupported type in property '{name}' values: "
                "should be string or number"
            )
            
def _is_running_in_notebook():
    """
    Check whether the python interpreter is running for a jupyter notebook or
    not. Taken from https://stackoverflow.com/a/39662359/4692076.
    """

    # apparently get_ipython is lost when this gets called from a callback of
    # an ipython widget. See https://github.com/jupyter/jupyter/issues/299
    try:
        from IPython import get_ipython
    except ImportError:
        return False

    try:
        shell = get_ipython().__class__.__name__
        if shell == "ZMQInteractiveShell" or "Interpreter":
            return True
        elif shell == "TerminalInteractiveShell":
            return False
        else:
            return False
    except NameError:
        return False


In [18]:
# -*- coding: utf-8 -*-
import gzip
import json

import ipywidgets
from traitlets import Bool, Dict, Unicode

# this needs to match the version/name defined in
# python/jupyter/src/labextension.ts
PACKAGE_NAME = "chemiscope"


class ChemiscopeWidgetBase(ipywidgets.DOMWidget, ipywidgets.ValueWidget):
    _view_module = Unicode(PACKAGE_NAME).tag(sync=True)
    _view_module_version = Unicode(chemiscope.__version__).tag(sync=True)
    value = Unicode().tag(sync=True)
    has_metadata = Bool().tag(sync=True)

    # synchronized settings from the JS side. You can assign to this field to
    # change what's being displayed by chemiscope, but you need to assign a full
    # dictionary (`widget.settings["map"]["x"]["property"] = "foo"` will not
    # work, but `widget.settings = updated_settings` will).
    settings = Dict().tag(sync=True)
    # switch to disable automatic update of settings
    _settings_sync = Bool().tag(sync=True)

    def __init__(self, data, has_metadata):
        super().__init__()
        self.value = json.dumps(data)
        self.has_metadata = has_metadata
        self.settings = {}
        self._settings_sync = True

    def save(self, path):
        """
        Save the dataset displayed by this widget as JSON to the given ``path``.
        If ``path`` ends with ``.gz``, the file is written as gzip compressed
        JSON string.

        :param str path: where to save the dataset.
        """
        if path.endswith(".gz"):
            file = gzip.open(path, "w", 9)
        else:
            file = open(path, "wb")

        # update the settings in the data to the latest value
        data = json.loads(self.value)
        data["settings"] = self.settings

        file.write(json.dumps(data).encode("utf8"))
        file.close()


@ipywidgets.register
class ChemiscopeWidget(ChemiscopeWidgetBase):
    _view_name = Unicode("ChemiscopeView").tag(sync=True)

    def __init__(self, data, has_metadata):
        super().__init__(data, has_metadata)


@ipywidgets.register
class StructureWidget(ChemiscopeWidgetBase):
    _view_name = Unicode("StructureView").tag(sync=True)

    def __init__(self, data, has_metadata):
        super().__init__(data, has_metadata)


@ipywidgets.register
class MapWidget(ChemiscopeWidgetBase):
    _view_name = Unicode("MapView").tag(sync=True)

    def __init__(self, data, has_metadata):
        super().__init__(data, has_metadata)


def show(
    frames=None,
    properties=None,
    meta=None,
    environments=None,
    settings=None,
    mode="default",
):
    """
    Show the dataset defined by the given ``frames`` and ``properties``
    (optionally ``meta`` and ``environments`` as well) using a embedded chemiscope
    visualizer inside a Jupyter notebook. These parameters have the same meaning
    as in the :py:func:`chemiscope.create_input` function.

    The ``mode`` keyword also allows overriding the default two-panels
    visualization to show only a structure panel (``mode = "structure"``) or the
    map panel (``mode = "map"``). These modes also make it possible to view a
    dataset for which properties (or frames) are not available.

    When inside a jupyter notebook, the returned object will create a new
    chemiscope visualizer displaying the dataset. The returned object also have
    a ``save`` function that can be used to save the dataset to a ``.json`` or
    ``.json.gz`` file to load it in the main website later.

    .. code-block:: python

        import chemiscope
        from sklearn.decomposition import PCA
        import ase.io

        pca = PCA(n_components = 3)

        frames = ase.io.read(...)
        properties = {
            "PCA": pca.fit_transform(some_data)
        }

        widget = chemiscope.show(frames, properties)
        # display the dataset in a chemiscope visualizer inside the notebook
        widget
        # ...


        # Save the file for later use
        widget.save("dataset.json")

    .. _ase.Atoms: https://wiki.fysik.dtu.dk/ase/ase/atoms.html
    """
    if not _is_running_in_notebook():
        raise Exception("chemiscope.show only works inside a jupyter notebook")

    has_metadata = meta is not None
    if not has_metadata:
        meta = {"name": " "}

    if mode == "default":
        widget_class = ChemiscopeWidget

    elif mode == "structure":
        if properties is None:
            properties = {}
        if "index" not in properties:
            # also adds an index property to have something to show in the info panel
            properties["index"] = {
                "target": "structure",
                "values": list(range(len(frames))),
            }

        widget_class = StructureWidget

    elif mode == "map":
        if properties is None:
            properties = {}

        widget_class = MapWidget

    else:
        raise ValueError(
            f"invalid mode '{mode}' in chemiscope.show, expected one of "
            "'default', 'structure' or 'map'"
        )

    dict_input = create_input(
        frames=frames,
        properties=properties,
        meta=meta,
        environments=environments,
        settings=settings,
    )

    if mode != "structure":
        # if there is a map, we need two properties, otherwise there will be no map
        # and error is only visible in the console
        if len(dict_input["properties"]) < 2:
            raise ValueError("Need at least two properties to visualize a map widget")

    return widget_class(dict_input, has_metadata=has_metadata)


In [19]:
def _guess_adapter(frames):
    """
    Guess which adapter to use for the given frames. This function return the
    frames as a list and a string describing which adapter should be used.
    """

    ase_frames, use_ase = _ase_valid_structures(frames)
    if use_ase:
        return ase_frames, "ASE"

    raise Exception(f"unknown frame type: '{frames[0].__class__.__name__}'")


def frames_to_json(frames):
    """
    Convert the given ``frames`` to the JSON structure used by chemiscope.

    This function is a shim calling specialized implementations for all the
    supported frame types. Currently only `ase.Atoms` frames are supported.

    :param frames: iterable over structures (typically a list of frames)
    """
    frames, adapter = _guess_adapter(frames)

    if adapter == "ASE":
        return [_ase_to_json(frame) for frame in frames]
    else:
        raise Exception("reached unreachable code")


def _list_atom_properties(frames):
    """
    List existing "atom" properties from the given ``frames``. This is used
    to check if the user might be missing some properties because chemiscope is
    no longer automatically extracting properties
    """
    frames, adapter = _guess_adapter(frames)

    if adapter == "ASE":
        return _ase_list_atom_properties(frames)
    else:
        raise Exception("reached unreachable code")


def _list_structure_properties(frames):
    """
    List existing "structure" properties from the given ``frames``. This is used
    to check if the user might be missing some properties because chemiscope is
    no longer automatically extracting properties
    """
    frames, adapter = _guess_adapter(frames)

    if adapter == "ASE":
        return _ase_list_structure_properties(frames)
    else:
        raise Exception("reached unreachable code")


def extract_properties(frames, only=None, environments=None):
    """
    Extract properties defined in the ``frames`` in a chemiscope-compatible
    format.

    :param frames: iterable over structures (typically a list of frames)
    :param only: optional, list of strings. If not ``None``, only properties
                with a name from this list are included in the output.
    :param environments: optional, list of environments (described as
        ``(structure id, center id, cutoff)``) to include when extracting the
        atomic properties.
    """
    frames, adapter = _guess_adapter(frames)

    if adapter == "ASE":
        return _ase_extract_properties(frames, only, environments)
    else:
        raise Exception("reached unreachable code")


def composition_properties(frames, environments=None):
    """
    Generate properties containing the chemical composition of the given
    ``frames``.

    This create two atomic properties: ``symbol`` (string) and ``number`` (int);
    and multiple structure properties: ``composition`` and ``n_{element}`` for
    each elements in the dataset. The properties are then returned in chemiscope
    format.

    :param frames: iterable over structures (typically a list of frames)
    :param environments: optional, list of environments (described as
        ``(structure id, center id, cutoff)``) to include when generating the
        atomic properties.
    """
    frames, adapter = _guess_adapter(frames)

    if adapter == "ASE":
        return _ase_composition_properties(frames, environments)
    else:
        raise Exception("reached unreachable code")


def all_atomic_environments(frames, cutoff=3.5):
    """
    Generate a list of environments containing all the atoms in the given
    ``frames``. The optional spherical ``cutoff`` radius is used to display the
    environments in chemiscope.

    :param frames: iterable over structures (typically a list of frames)
    :param float cutoff: spherical cutoff radius used when displaying the
                         environments
    """
    frames, adapter = _guess_adapter(frames)

    if adapter == "ASE":
        return _ase_all_atomic_environments(frames, cutoff)
    else:
        raise Exception("reached unreachable code")


def librascal_atomic_environments(frames, cutoff=3.5):
    """
    Generate the list of environments for the given ``frames``, matching the
    behavior used by librascal when computing descriptors for only a subset of
    the atomic centers. The optional spherical ``cutoff`` radius is used to
    display the environments in chemiscope.

    Only ``ase.Atoms`` are supported for the ``frames`` since that's what
    librascal uses.

    :param frames: iterable over ``ase.Atoms``
    :param float cutoff: spherical cutoff radius used when displaying the
                         environments
    """
    frames, adapter = _guess_adapter(frames)

    if adapter != "ASE":
        raise Exception("librascal_atomic_environments only supports ASE frames")

    return _ase_librascal_atomic_environments(frames, cutoff)


In [20]:
from IPython import get_ipython
shell = get_ipython().__class__.__name__
print(shell)

ZMQInteractiveShell


In [22]:
structure = ase.Atoms('HH', positions=[[0,0,0], [1,0,0]])


chemiscope_widget = show(frames = [structure], properties=chemiscope.extract_properties([structure]), mode="structure")
display(chemiscope_widget)


StructureWidget(value='{"meta": {"name": " "}, "structures": [{"size": 2, "names": ["H", "H"], "x": [0.0, 1.0]…

In [13]:
from widget_code_input import WidgetCodeInput
import widget_code_input
widget_code_input.__version__

'3.5.5'

In [14]:
import ipywidgets
ipywidgets.__version__

'8.0.6'

In [10]:
ex02_wci = WidgetCodeInput(
        function_name="methylammonium", 
        function_parameters="",
        docstring="""
Loads the structure #1 from the data/qm7b-ase.xyz file, 
and modifies the composition so that it corresponds to CH3NH3+. 

:return: an ASE atoms object that describes the molecular structure
""",
            function_body="""
# Write your solution, then click on the button below to update the plotter 
# and check against the reference value

import ase
from ase.io import read

structure = []  # load here

# modify the structure ...

return structure
"""
)

In [11]:
display(ex02_wci)

WidgetCodeInput(code_theme='nord', docstring='\nLoads the structure #1 from the data/qm7b-ase.xyz file, \nand …