-
Notifications
You must be signed in to change notification settings - Fork 185
/
order.py
303 lines (233 loc) · 10.1 KB
/
order.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
"""
Structure featurizers based on packing or ordering.
"""
import math
import numpy as np
from pymatgen.analysis.local_env import VoronoiNN
from pymatgen.core import Structure
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
from matminer.featurizers.base import BaseFeaturizer
from matminer.utils.caching import get_all_nearest_neighbors
class DensityFeatures(BaseFeaturizer):
"""
Calculates density and density-like features
Features:
- density
- volume per atom
- ("vpa"), and packing fraction
"""
def __init__(self, desired_features=None):
"""
Args:
desired_features: [str] - choose from "density", "vpa",
"packing fraction"
"""
self.features = ["density", "vpa", "packing fraction"] if not desired_features else desired_features
def precheck(self, s: Structure) -> bool:
"""
Precheck a single entry. DensityFeatures does not work for disordered
structures. To precheck an entire dataframe (qnd automatically gather
the fraction of structures that will pass the precheck), please use
precheck_dataframe.
Args:
s (pymatgen.Structure): The structure to precheck.
Returns:
(bool): If True, s passed the precheck; otherwise, it failed.
"""
return s.is_ordered
def featurize(self, s):
output = []
if "density" in self.features:
output.append(s.density)
if "vpa" in self.features:
if not s.is_ordered:
raise ValueError("Disordered structure support not built yet.")
output.append(s.volume / len(s))
if "packing fraction" in self.features:
if not s.is_ordered:
raise ValueError("Disordered structure support not built yet.")
total_rad = 0
for site in s:
total_rad += site.specie.atomic_radius**3
output.append(4 * math.pi * total_rad / (3 * s.volume))
return output
def feature_labels(self):
all_features = ["density", "vpa", "packing fraction"] # enforce order
return [x for x in all_features if x in self.features]
def citations(self):
return []
def implementors(self):
return ["Saurabh Bajaj", "Anubhav Jain"]
class ChemicalOrdering(BaseFeaturizer):
"""
How much the ordering of species in the structure differs from random
These parameters describe how much the ordering of all species in a
structure deviates from random using a Warren-Cowley-like ordering
parameter. The first step of this calculation is to determine the nearest
neighbor shells of each site. Then, for each shell a degree of order for
each type is determined by computing:
:math:`\\alpha (t,s) = 1 - \\frac{\\sum_n w_n \\delta (t - t_n)}{x_t \\sum_n w_n}`
where :math:`w_n` is the weight associated with a certain neighbor,
:math:`t_p` is the type of the neighbor, and :math:`x_t` is the fraction
of type t in the structure. For atoms that are randomly dispersed in a
structure, this formula yields 0 for all types. For structures where
each site is surrounded only by atoms of another type, this formula
yields large values of :math:`alpha`.
The mean absolute value of this parameter across all sites is used
as a feature.
Features:
mean ordering parameter shell [n] - Mean ordering parameter for
atoms in the n<sup>th</sup> neighbor shell
References:
`Ward et al. _PRB_ 2017 <http://link.aps.org/doi/10.1103/PhysRevB.96.024104>`_"""
def __init__(self, shells=(1, 2, 3), weight="area"):
"""Initialize the featurizer
Args:
shells ([int]) - Which neighbor shells to evaluate
weight (str) - Attribute used to weigh neighbor contributions
"""
self.shells = shells
self.weight = weight
def featurize(self, strc):
# Shortcut: Return 0 if there is only 1 type of atom
if len(strc.composition) == 1:
return [0] * len(self.shells)
# Get a list of types
elems, fracs = zip(*strc.composition.element_composition.fractional_composition.items())
# Precompute the list of NNs in the structure
voro = VoronoiNN(weight=self.weight)
all_nn = get_all_nearest_neighbors(voro, strc)
# Evaluate each shell
output = []
for shell in self.shells:
# Initialize an array to store the ordering parameters
ordering = np.zeros((len(strc), len(elems)))
# Get the ordering of each type of each atom
for site_idx in range(len(strc)):
nns = voro._get_nn_shell_info(strc, all_nn, site_idx, shell)
# Sum up the weights
total_weight = sum(x["weight"] for x in nns)
# Get weight by type
for nn in nns:
site_elem = nn["site"].specie
if hasattr(site_elem, "element"):
site_elem = getattr(site_elem, "element")
elem_idx = elems.index(site_elem)
ordering[site_idx, elem_idx] += nn["weight"]
# Compute the ordering parameter
ordering[site_idx, :] = 1 - ordering[site_idx, :] / total_weight / np.array(fracs)
# Compute the average ordering for the entire structure
output.append(np.abs(ordering).mean())
return output
def feature_labels(self):
return [f"mean ordering parameter shell {n}" for n in self.shells]
def citations(self):
return [
"@article{Ward2017,"
"author = {Ward, Logan and Liu, Ruoqian "
"and Krishna, Amar and Hegde, Vinay I. "
"and Agrawal, Ankit and Choudhary, Alok "
"and Wolverton, Chris},"
"doi = {10.1103/PhysRevB.96.024104},"
"journal = {Physical Review B},"
"pages = {024104},"
"title = {{Including crystal structure attributes "
"in machine learning models of formation energies "
"via Voronoi tessellations}},"
"url = {http://link.aps.org/doi/10.1103/PhysRevB.96.024104},"
"volume = {96},year = {2017}}"
]
def implementors(self):
return ["Logan Ward"]
class MaximumPackingEfficiency(BaseFeaturizer):
"""
Maximum possible packing efficiency of this structure
Uses a Voronoi tessellation to determine the largest radius each atom
can have before any atoms touches any one of their neighbors. Given the
maximum radius size, this class computes the maximum packing efficiency
of the structure as a feature.
Features:
max packing efficiency - Maximum possible packing efficiency
"""
def featurize(self, strc):
# Get the Voronoi tessellation of each site
voro = VoronoiNN()
nns = [voro.get_voronoi_polyhedra(strc, i) for i in range(len(strc))]
# Compute the radius of largest possible atom for each site
# The largest radius is equal to the distance from the center of the
# cell to the closest Voronoi face
max_r = [min(x["face_dist"] for x in nn.values()) for nn in nns]
# Compute the packing efficiency
return [4.0 / 3.0 * np.pi * np.power(max_r, 3).sum() / strc.volume]
def feature_labels(self):
return ["max packing efficiency"]
def citations(self):
return [
"@article{Ward2017,"
"author = {Ward, Logan and Liu, Ruoqian "
"and Krishna, Amar and Hegde, Vinay I. "
"and Agrawal, Ankit and Choudhary, Alok "
"and Wolverton, Chris},"
"doi = {10.1103/PhysRevB.96.024104},"
"journal = {Physical Review B},"
"pages = {024104},"
"title = {{Including crystal structure attributes "
"in machine learning models of formation energies "
"via Voronoi tessellations}},"
"url = {http://link.aps.org/doi/10.1103/PhysRevB.96.024104},"
"volume = {96},year = {2017}}"
]
def implementors(self):
return ["Logan Ward"]
class StructuralComplexity(BaseFeaturizer):
r"""
Shannon information entropy of a structure.
This descriptor treat a structure as a message
to evaluate structural complexity (:math:`S`)
using the following equation:
:math:`S = - v \sum_{i=1}^{k} p_i \log_2 p_i`
:math:`p_i = m_i / v`
where :math:`v` is the total number of atoms in the unit cell,
:math:`p_i` is the probability mass function,
:math:`k` is the number of symmetrically inequivalent sites, and
:math:`m_i` is the number of sites classified in :math:`i` th
symmetrically inequivalent site.
Features:
- information entropy (bits/atom)
- information entropy (bits/unit cell)
Args:
symprec: precision for symmetrizing a structure
"""
def __init__(self, symprec=0.1):
self.symprec = symprec
def featurize(self, struct):
n_of_atoms = len(struct.sites)
sga = SpacegroupAnalyzer(struct, symprec=self.symprec)
sym_s = sga.get_symmetrized_structure()
v = n_of_atoms
iG = 0
for eq_site in sym_s.equivalent_sites:
m_i = len(eq_site)
p_i = m_i / v
iG -= p_i * np.log2(p_i)
iG_total = iG * n_of_atoms
return (iG, iG_total)
def implementors(self):
return ["Koki Muraoka"]
def feature_labels(self):
return ["structural complexity per atom", "structural complexity per cell"]
def citations(self):
return [
"@article{complexity2013,"
"author = {Krivovichev, S. V.},"
"title = {Structural complexity of minerals: information storage and processing in the mineral world},"
"journal = {Mineral. Mag.},"
"volume = {77},"
"number = {3},"
"pages = {275-326},"
"year = {2013},"
"month = {04},"
"issn = {0026-461X},"
"doi = {10.1180/minmag.2013.077.3.05},"
"url = {https://doi.org/10.1180/minmag.2013.077.3.05}}",
]