/
element_property_fingerprint.py
98 lines (82 loc) · 3.43 KB
/
element_property_fingerprint.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import numpy as np
from deepchem.utils.typing import PymatgenComposition
from deepchem.feat import MaterialCompositionFeaturizer
from typing import Any
class ElementPropertyFingerprint(MaterialCompositionFeaturizer):
"""
Fingerprint of elemental properties from composition.
Based on the data source chosen, returns properties and statistics
(min, max, range, mean, standard deviation, mode) for a compound
based on elemental stoichiometry. E.g., the average electronegativity
of atoms in a crystal structure. The chemical fingerprint is a
vector of these statistics. For a full list of properties and statistics,
see ``matminer.featurizers.composition.ElementProperty(data_source).feature_labels()``.
This featurizer requires the optional dependencies pymatgen and
matminer. It may be useful when only crystal compositions are available
(and not 3D coordinates).
See references [1]_, [2]_, [3]_, [4]_ for more details.
References
----------
.. [1] MagPie data: Ward, L. et al. npj Comput Mater 2, 16028 (2016).
https://doi.org/10.1038/npjcompumats.2016.28
.. [2] Deml data: Deml, A. et al. Physical Review B 93, 085142 (2016).
10.1103/PhysRevB.93.085142
.. [3] Matminer: Ward, L. et al. Comput. Mater. Sci. 152, 60-69 (2018).
.. [4] Pymatgen: Ong, S.P. et al. Comput. Mater. Sci. 68, 314-319 (2013).
Examples
--------
>>> import deepchem as dc
>>> import pymatgen as mg
>>> comp = mg.core.Composition("Fe2O3")
>>> featurizer = dc.feat.ElementPropertyFingerprint()
>>> features = featurizer.featurize([comp])
>>> type(features[0])
<class 'numpy.ndarray'>
>>> features[0].shape
(65,)
Note
----
This class requires matminer and Pymatgen to be installed.
`NaN` feature values are automatically converted to 0 by this featurizer.
"""
def __init__(self, data_source: str = 'matminer'):
"""
Parameters
----------
data_source: str of "matminer", "magpie" or "deml" (default "matminer")
Source for element property data.
"""
self.data_source = data_source
self.ep_featurizer: Any = None
def _featurize(self, datapoint: PymatgenComposition,
**kwargs) -> np.ndarray:
"""
Calculate chemical fingerprint from crystal composition.
Parameters
----------
datapoint: pymatgen.core.Composition object
Composition object.
Returns
-------
feats: np.ndarray
Vector of properties and statistics derived from chemical
stoichiometry. Some values may be NaN.
"""
if 'composition' in kwargs and datapoint is None:
datapoint = kwargs.get("composition")
raise DeprecationWarning(
'Composition is being phased out as a parameter, please pass "datapoint" instead.'
)
if self.ep_featurizer is None:
try:
from matminer.featurizers.composition import ElementProperty
self.ep_featurizer = ElementProperty.from_preset(
self.data_source)
except ModuleNotFoundError:
raise ImportError(
"This class requires matminer to be installed.")
try:
feats = self.ep_featurizer.featurize(datapoint)
except:
feats = []
return np.nan_to_num(np.array(feats))