Skip to content

Commit

Permalink
Merge pull request #2020 from ncfrey/material_featurizer_renames
Browse files Browse the repository at this point in the history
Descriptive material featurizer names
  • Loading branch information
Bharath Ramsundar committed Jul 17, 2020
2 parents 53366e7 + 64c5323 commit 0b8b134
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 34 deletions.
4 changes: 2 additions & 2 deletions deepchem/feat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
"""
from deepchem.feat.base_classes import Featurizer
from deepchem.feat.base_classes import MolecularFeaturizer
from deepchem.feat.base_classes import StructureFeaturizer
from deepchem.feat.base_classes import CompositionFeaturizer
from deepchem.feat.base_classes import MaterialStructureFeaturizer
from deepchem.feat.base_classes import MaterialCompositionFeaturizer
from deepchem.feat.base_classes import ComplexFeaturizer
from deepchem.feat.base_classes import UserDefinedFeaturizer
from deepchem.feat.graph_features import ConvMolFeaturizer
Expand Down
61 changes: 43 additions & 18 deletions deepchem/feat/base_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@

logger = logging.getLogger(__name__)

JSON = Dict[str, Any]


class Featurizer(object):
"""Abstract class for calculating a set of features for a datapoint.
Expand All @@ -23,15 +21,18 @@ class Featurizer(object):
new datatype.
"""

def featurize(self, datapoints, log_every_n=1000):
def featurize(self, datapoints: Iterable[Any],
log_every_n: int = 1000) -> np.ndarray:
"""Calculate features for datapoints.
Parameters
----------
datapoints: iterable
A sequence of objects that you'd like to featurize. Subclassses of
`Featurizer` should instantiate the `_featurize` method that featurizes
objects in the sequence.
datapoints: Iterable[Any]
A sequence of objects that you'd like to featurize. Subclassses of
`Featurizer` should instantiate the `_featurize` method that featurizes
objects in the sequence.
log_every_n: int, default 1000
Logs featurization progress every `log_every_n` steps.
Returns
-------
Expand Down Expand Up @@ -67,8 +68,9 @@ def _featurize(self, datapoint):
Parameters
----------
datapoint: object
a single datapoint in a sequence of objects
datapoint: object
Any blob of data you like. Subclass should instantiate
this.
"""
raise NotImplementedError('Featurizer is not defined.')

Expand Down Expand Up @@ -220,12 +222,12 @@ def featurize(self, molecules, log_every_n=1000):
return features


class StructureFeaturizer(Featurizer):
class MaterialStructureFeaturizer(Featurizer):
"""
Abstract class for calculating a set of features for an
inorganic crystal structure.
The defining feature of a `StructureFeaturizer` is that it
The defining feature of a `MaterialStructureFeaturizer` is that it
operates on 3D crystal structures with periodic boundary conditions.
Inorganic crystal structures are represented by Pymatgen structure
objects. Featurizers for inorganic crystal structures that are subclasses of
Expand All @@ -244,15 +246,16 @@ class StructureFeaturizer(Featurizer):
"""

def featurize(self, structures: Iterable[JSON],
def featurize(self,
structures: Iterable[Dict[str, Any]],
log_every_n: int = 1000) -> np.ndarray:
"""Calculate features for crystal structures.
Parameters
----------
structures: Iterable[JSON]
structures: Iterable[Dict[str, Any]]
Iterable sequence of pymatgen structure dictionaries.
Json-serializable dictionary representation of pymatgen.core.structure
Dictionary representations of pymatgen.Structure
https://pymatgen.org/pymatgen.core.structure.html
log_every_n: int, default 1000
Logging messages reported every `log_every_n` samples.
Expand All @@ -265,7 +268,6 @@ def featurize(self, structures: Iterable[JSON],
"""

# Convert iterables to list
structures = list(structures)

try:
Expand All @@ -288,13 +290,25 @@ def featurize(self, structures: Iterable[JSON],
features = np.asarray(features)
return features

def __call__(self, structures: Iterable[Dict[str, Any]]):
"""Calculate features for crystal structures.
Parameters
----------
structures: Iterable[Dict[str, Any]]
An iterable of pymatgen.Structure dictionaries.
"""

return self.featurize(structures)

class CompositionFeaturizer(Featurizer):

class MaterialCompositionFeaturizer(Featurizer):
"""
Abstract class for calculating a set of features for an
inorganic crystal composition.
The defining feature of a `CompositionFeaturizer` is that it
The defining feature of a `MaterialCompositionFeaturizer` is that it
operates on 3D crystal chemical compositions.
Inorganic crystal compositions are represented by Pymatgen composition
objects. Featurizers for inorganic crystal compositions that are
Expand Down Expand Up @@ -332,7 +346,6 @@ def featurize(self, compositions: Iterable[str],
"""

# Convert iterables to list
compositions = list(compositions)

try:
Expand All @@ -355,6 +368,18 @@ def featurize(self, compositions: Iterable[str],
features = np.asarray(features)
return features

def __call__(self, compositions: Iterable[str]):
"""Calculate features for crystal compositions.
Parameters
----------
compositions: Iterable[str]
An iterable of crystal compositions.
"""

return self.featurize(compositions)


class UserDefinedFeaturizer(Featurizer):
"""Directs usage of user-computed featurizations."""
Expand Down
12 changes: 6 additions & 6 deletions deepchem/feat/materials_featurizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@

import numpy as np

from deepchem.feat import StructureFeaturizer, CompositionFeaturizer
from deepchem.feat import MaterialStructureFeaturizer, MaterialCompositionFeaturizer
from deepchem.utils import pad_array


class ElementPropertyFingerprint(CompositionFeaturizer):
class ElementPropertyFingerprint(MaterialCompositionFeaturizer):
"""
Fingerprint of elemental properties from composition.
Expand Down Expand Up @@ -50,7 +50,7 @@ def __init__(self, data_source='matminer'):

self.data_source = data_source

def _featurize(self, composition: "pymatgen.Composition"):
def _featurize(self, composition):
"""
Calculate chemical fingerprint from crystal composition.
Expand Down Expand Up @@ -81,7 +81,7 @@ def _featurize(self, composition: "pymatgen.Composition"):
return np.array(feats)


class SineCoulombMatrix(StructureFeaturizer):
class SineCoulombMatrix(MaterialStructureFeaturizer):
"""
Calculate sine Coulomb matrix for crystals.
Expand Down Expand Up @@ -124,7 +124,7 @@ def __init__(self, max_atoms, flatten=True):
self.max_atoms = int(max_atoms)
self.flatten = flatten

def _featurize(self, struct: "pymatgen.Structure"):
def _featurize(self, struct):
"""
Calculate sine Coulomb matrix from pymatgen structure.
Expand Down Expand Up @@ -164,7 +164,7 @@ def _featurize(self, struct: "pymatgen.Structure"):
return features


class StructureGraphFeaturizer(StructureFeaturizer):
class StructureGraphFeaturizer(MaterialStructureFeaturizer):
"""
Calculate structure graph features for crystals.
Expand Down
16 changes: 8 additions & 8 deletions docs/featurizers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -161,17 +161,17 @@ AtomConvFeaturizer
.. autoclass:: deepchem.feat.NeighborListComplexAtomicCoordinates
:members:

StructureFeaturizer
-------------------
MaterialStructureFeaturizer
---------------------------

Structure Featurizers are those that work with datasets of crystals with
Material Structure Featurizers are those that work with datasets of crystals with
periodic boundary conditions. For inorganic crystal structures, these
featurizers operate on pymatgen.Structure objects, which include a
lattice and 3D coordinates that specify a periodic crystal structure.
They should be applied on systems that have periodic boundary conditions.
Structure featurizers are not designed to work with molecules.

.. autoclass:: deepchem.feat.StructureFeaturizer
.. autoclass:: deepchem.feat.MaterialStructureFeaturizer
:members:

SineCoulombMatrix
Expand All @@ -186,17 +186,17 @@ StructureGraphFeaturizer
.. autoclass:: deepchem.feat.StructureGraphFeaturizer
:members:

CompositionFeaturizer
---------------------
MaterialCompositionFeaturizer
-----------------------------

Composition Featurizers are those that work with datasets of crystal
Material Composition Featurizers are those that work with datasets of crystal
compositions with periodic boundary conditions.
For inorganic crystal structures, these featurizers operate on chemical
compositions (e.g. "MoS2"). They should be applied on systems that have
periodic boundary conditions. Composition featurizers are not designed
to work with molecules.

.. autoclass:: deepchem.feat.CompositionFeaturizer
.. autoclass:: deepchem.feat.MaterialCompositionFeaturizer
:members:

ElementPropertyFingerprint
Expand Down

0 comments on commit 0b8b134

Please sign in to comment.