# Tutorial on xtalmet package
This notebook provides a tutorial on the two primary features of the `xtalmet` package:
- Calculating distances between crystals
- Evaluating the uniqueness and novelty of a given set of generated crystals

### Setup

In [None]:
import gzip
import pickle

from huggingface_hub import hf_hub_download
from pymatgen.core import Lattice, Structure

from xtalmet.constants import HF_VERSION
from xtalmet.distance import distance
from xtalmet.evaluator import Evaluator

### Distance calculation

In [2]:
# Prepare five crystals
wzZnO = Structure.from_file("data/wz-ZnO.cif")
rsZnO = Structure.from_file("data/rs-ZnO.cif")
wzGaN = Structure.from_file("data/wz-GaN.cif")
Bi2Te3 = Structure.from_file("data/Bi2Te3.cif")
# The fifth crystal is the 2x2x2 supercell of wz-ZnO
frac_coords_super = []
species_super = []
for frac_coord, specie in zip(wzZnO.frac_coords, wzZnO.species, strict=False):
	for i in range(2):
		for j in range(2):
			for k in range(2):
				frac_coords_super.append(
					[
						(frac_coord[0] + i) / 2,
						(frac_coord[1] + j) / 2,
						(frac_coord[2] + k) / 2,
					]
				)
				species_super.append(specie)
wzZnO_2x2x2 = Structure(
	lattice=Lattice.from_dict(
		{
			"a": wzZnO.lattice.a * 2,
			"b": wzZnO.lattice.b * 2,
			"c": wzZnO.lattice.c * 2,
			"alpha": wzZnO.lattice.alpha,
			"beta": wzZnO.lattice.beta,
			"gamma": wzZnO.lattice.gamma,
		}
	),
	species=species_super,
	coords=frac_coords_super,
)



In [3]:
# d_smat: a binary distance based on pymatgen's StructureMacher
print(f"d_smat(wz-ZnO, wz-ZnO supercell) = {distance('smat', wzZnO, wzZnO_2x2x2)}")
print(f"d_smat(wz-ZnO, rs-ZnO) = {distance('smat', wzZnO, rsZnO)}")
print(f"d_smat(wz-ZnO, wz-GaN) = {distance('smat', wzZnO, wzGaN)}")
print(f"d_smat(wz-ZnO, Bi2Te3) = {distance('smat', wzZnO, Bi2Te3)}")

d_smat(wz-ZnO, wz-ZnO supercell) = 0.0
d_smat(wz-ZnO, rs-ZnO) = 1.0
d_smat(wz-ZnO, wz-GaN) = 1.0
d_smat(wz-ZnO, Bi2Te3) = 1.0


In [4]:
# You may optionally specify arguments for StructureMatcher.
# ref: https://pymatgen.org/pymatgen.analysis.html#pymatgen.analysis.structure_matcher.StructureMatcher
kwargs = {"args_dist": {"ltol": 0.3, "stol": 0.5, "angle_tol": 6}}
print(
	f"d_smat(wz-ZnO, wz-ZnO supercell) = {distance('smat', wzZnO, wzZnO_2x2x2, **kwargs)}"
)
print(f"d_smat(wz-ZnO, rs-ZnO) = {distance('smat', wzZnO, rsZnO, **kwargs)}")
print(f"d_smat(wz-ZnO, wz-GaN) = {distance('smat', wzZnO, wzGaN, **kwargs)}")
print(f"d_smat(wz-ZnO, Bi2Te3) = {distance('smat', wzZnO, Bi2Te3, **kwargs)}")

d_smat(wz-ZnO, wz-ZnO supercell) = 0.0
d_smat(wz-ZnO, rs-ZnO) = 1.0
d_smat(wz-ZnO, wz-GaN) = 1.0
d_smat(wz-ZnO, Bi2Te3) = 1.0


In [5]:
# d_comp: a binary distance based on the match of compositions
print(f"d_comp(wz-ZnO, wz-ZnO supercell) = {distance('comp', wzZnO, wzZnO_2x2x2)}")
print(f"d_comp(wz-ZnO, rs-ZnO) = {distance('comp', wzZnO, rsZnO)}")
print(f"d_comp(wz-ZnO, wz-GaN) = {distance('comp', wzZnO, wzGaN)}")
print(f"d_comp(wz-ZnO, Bi2Te3) = {distance('comp', wzZnO, Bi2Te3)}")

d_comp(wz-ZnO, wz-ZnO supercell) = 0.0
d_comp(wz-ZnO, rs-ZnO) = 0.0
d_comp(wz-ZnO, wz-GaN) = 1.0
d_comp(wz-ZnO, Bi2Te3) = 1.0


In [6]:
# d_wyckoff: a binary distance based on the match of space groups and Wyckoff letters
print(
	f"d_wyckoff(wz-ZnO, wz-ZnO supercell) = {distance('wyckoff', wzZnO, wzZnO_2x2x2)}"
)
print(f"d_wyckoff(wz-ZnO, rs-ZnO) = {distance('wyckoff', wzZnO, rsZnO)}")
print(f"d_wyckoff(wz-ZnO, wz-GaN) = {distance('wyckoff', wzZnO, wzGaN)}")
print(f"d_wyckoff(wz-ZnO, Bi2Te3) = {distance('wyckoff', wzZnO, Bi2Te3)}")

d_wyckoff(wz-ZnO, wz-ZnO supercell) = 0.0
d_wyckoff(wz-ZnO, rs-ZnO) = 1.0
d_wyckoff(wz-ZnO, wz-GaN) = 0.0
d_wyckoff(wz-ZnO, Bi2Te3) = 1.0


In [7]:
# You may optionally specify arguments for SpacegroupAnalyzer.
# ref: https://pymatgen.org/pymatgen.symmetry.html#pymatgen.symmetry.analyzer.SpacegroupAnalyzer
kwargs = {"args_emb": {"symprec": 0.1, "angle_tolerance": 10}}
print(
	f"d_wyckoff(wz-ZnO, wz-ZnO supercell) = {distance('wyckoff', wzZnO, wzZnO_2x2x2, **kwargs)}"
)
print(f"d_wyckoff(wz-ZnO, rs-ZnO) = {distance('wyckoff', wzZnO, rsZnO, **kwargs)}")
print(f"d_wyckoff(wz-ZnO, wz-GaN) = {distance('wyckoff', wzZnO, wzGaN, **kwargs)}")
print(f"d_wyckoff(wz-ZnO, Bi2Te3) = {distance('wyckoff', wzZnO, Bi2Te3, **kwargs)}")

d_wyckoff(wz-ZnO, wz-ZnO supercell) = 0.0
d_wyckoff(wz-ZnO, rs-ZnO) = 1.0
d_wyckoff(wz-ZnO, wz-GaN) = 0.0
d_wyckoff(wz-ZnO, Bi2Te3) = 1.0


In [8]:
# d_magpie: a continuous distance using compositional Magpie fingerprints
print(f"d_magpie(wz-ZnO, wz-ZnO supercell) = {distance('magpie', wzZnO, wzZnO_2x2x2)}")
print(f"d_magpie(wz-ZnO, rs-ZnO) = {distance('magpie', wzZnO, rsZnO)}")
print(f"d_magpie(wz-ZnO, wz-GaN) = {distance('magpie', wzZnO, wzGaN)}")
print(f"d_magpie(wz-ZnO, Bi2Te3) = {distance('magpie', wzZnO, Bi2Te3)}")

d_magpie(wz-ZnO, wz-ZnO supercell) = 0.0
d_magpie(wz-ZnO, rs-ZnO) = 0.0
d_magpie(wz-ZnO, wz-GaN) = 0.9984146672668104
d_magpie(wz-ZnO, Bi2Te3) = 0.9990659142269455


In [9]:
# d_pdd: a continuous structural distance using the pointwise distance distribution
print(f"d_pdd(wz-ZnO, wz-ZnO supercell) = {distance('pdd', wzZnO, wzZnO_2x2x2)}")
print(f"d_pdd(wz-ZnO, rs-ZnO) = {distance('pdd', wzZnO, rsZnO)}")
print(f"d_pdd(wz-ZnO, wz-GaN) = {distance('pdd', wzZnO, wzGaN)}")
print(f"d_pdd(wz-ZnO, Bi2Te3) = {distance('pdd', wzZnO, Bi2Te3)}")

d_pdd(wz-ZnO, wz-ZnO supercell) = 0.0
d_pdd(wz-ZnO, rs-ZnO) = 0.5101944694490168
d_pdd(wz-ZnO, wz-GaN) = 0.08829102742344005
d_pdd(wz-ZnO, Bi2Te3) = 0.7661404980348848


In [10]:
# You may optionally specify arguments for amd.PDD and amd.PDD_cdist. kwargs["args_emb"]
# are arguments for amd.PDD, and kwargs["args_dist"] are arguments for amd.PDD_cdist.
# ref: https://average-minimum-distance.readthedocs.io/en/stable/Using_PDDs.html
kwargs = {
	"args_emb": {"k": 200},
	"args_dist": {
		"metric": "chebyshev",
		"backend": "multiprocessing",
		"n_jobs": 2,
		"verbose": False,
	},
}
print(
	f"d_pdd(wz-ZnO, wz-ZnO supercell) = {distance('pdd', wzZnO, wzZnO_2x2x2, **kwargs)}"
)
print(f"d_pdd(wz-ZnO, rs-ZnO) = {distance('pdd', wzZnO, rsZnO, **kwargs)}")
print(f"d_pdd(wz-ZnO, wz-GaN) = {distance('pdd', wzZnO, wzGaN, **kwargs)}")
print(f"d_pdd(wz-ZnO, Bi2Te3) = {distance('pdd', wzZnO, Bi2Te3, **kwargs)}")

d_pdd(wz-ZnO, wz-ZnO supercell) = 0.0
d_pdd(wz-ZnO, rs-ZnO) = 0.5101944694490168
d_pdd(wz-ZnO, wz-GaN) = 0.1078037037530719
d_pdd(wz-ZnO, Bi2Te3) = 0.7871729223296379


In [11]:
# d_amd: a continuous structural distance using the average minimum distance
print(f"d_amd(wz-ZnO, wz-ZnO supercell) = {distance('amd', wzZnO, wzZnO_2x2x2)}")
print(f"d_amd(wz-ZnO, rs-ZnO) = {distance('amd', wzZnO, rsZnO)}")
print(f"d_amd(wz-ZnO, wz-GaN) = {distance('amd', wzZnO, wzGaN)}")
print(f"d_amd(wz-ZnO, Bi2Te3) = {distance('amd', wzZnO, Bi2Te3)}")

d_amd(wz-ZnO, wz-ZnO supercell) = 4.4408920985006064e-15
d_amd(wz-ZnO, rs-ZnO) = 0.5101944710764641
d_amd(wz-ZnO, wz-GaN) = 0.08829102742344139
d_amd(wz-ZnO, Bi2Te3) = 0.7641429760635811


In [12]:
# You may optionally specify arguments for amd.AMD and amd.AMD_cdist. kwargs["args_emb"]
# are arguments for amd.AMD, and kwargs["args_dist"] are arguments for amd.AMD_cdist.
# ref: https://average-minimum-distance.readthedocs.io/en/stable/Using_AMDs.html
kwargs = {
	"args_emb": {"k": 200},
	"args_dist": {"metric": "chebyshev", "low_memory": False},
}
print(
	f"d_amd(wz-ZnO, wz-ZnO supercell) = {distance('amd', wzZnO, wzZnO_2x2x2, **kwargs)}"
)
print(f"d_amd(wz-ZnO, rs-ZnO) = {distance('amd', wzZnO, rsZnO, **kwargs)}")
print(f"d_amd(wz-ZnO, wz-GaN) = {distance('amd', wzZnO, wzGaN, **kwargs)}")
print(f"d_amd(wz-ZnO, Bi2Te3) = {distance('amd', wzZnO, Bi2Te3, **kwargs)}")

d_amd(wz-ZnO, wz-ZnO supercell) = 7.105427357600951e-15
d_amd(wz-ZnO, rs-ZnO) = 0.5101944710764641
d_amd(wz-ZnO, wz-GaN) = 0.10780370554840095
d_amd(wz-ZnO, Bi2Te3) = 0.7825441112284902


In [13]:
# d_elmd: The Element Movers Distance (ElMD), which is continuous and compositional
print(f"d_elmd(wz-ZnO, wz-ZnO supercell) = {distance('elmd', wzZnO, wzZnO_2x2x2)}")
print(f"d_elmd(wz-ZnO, rs-ZnO) = {distance('elmd', wzZnO, rsZnO)}")
print(f"d_elmd(wz-ZnO, wz-GaN) = {distance('elmd', wzZnO, wzGaN)}")
print(f"d_elmd(wz-ZnO, Bi2Te3) = {distance('elmd', wzZnO, Bi2Te3)}")

d_elmd(wz-ZnO, wz-ZnO supercell) = 0.0
d_elmd(wz-ZnO, rs-ZnO) = 0.0
d_elmd(wz-ZnO, wz-GaN) = 0.875
d_elmd(wz-ZnO, Bi2Te3) = 0.9145299145299145


In [14]:
# You may optionally specify arguments for ElMD.elmd.
# ref: https://github.com/lrcfmd/ElMD/blob/master/ElMD/ElMD.py#L118
kwargs = {"args_dist": {"metric": "mod_petti"}}
print(
	f"d_elmd(wz-ZnO, wz-ZnO supercell) = {distance('elmd', wzZnO, wzZnO_2x2x2, **kwargs)}"
)
print(f"d_elmd(wz-ZnO, rs-ZnO) = {distance('elmd', wzZnO, rsZnO, **kwargs)}")
print(f"d_elmd(wz-ZnO, wz-GaN) = {distance('elmd', wzZnO, wzGaN, **kwargs)}")
print(f"d_elmd(wz-ZnO, Bi2Te3) = {distance('elmd', wzZnO, Bi2Te3, **kwargs)}")

d_elmd(wz-ZnO, wz-ZnO supercell) = 0.0
d_elmd(wz-ZnO, rs-ZnO) = 0.0
d_elmd(wz-ZnO, wz-GaN) = 0.875
d_elmd(wz-ZnO, Bi2Te3) = 0.9145299145299145


In [15]:
# d_elmd+amd: The weighted sum of ElMD and AMD distances.
print(
	f"d_elmd+amd(wz-ZnO, wz-ZnO supercell) = {distance('elmd+amd', wzZnO, wzZnO_2x2x2)}"
)
print(f"d_elmd+amd(wz-ZnO, rs-ZnO) = {distance('elmd+amd', wzZnO, rsZnO)}")
print(f"d_elmd+amd(wz-ZnO, wz-GaN) = {distance('elmd+amd', wzZnO, wzGaN)}")
print(f"d_elmd+amd(wz-ZnO, Bi2Te3) = {distance('elmd+amd', wzZnO, Bi2Te3)}")

d_elmd+amd(wz-ZnO, wz-ZnO supercell) = 9.932194051053093e-16
d_elmd+amd(wz-ZnO, rs-ZnO) = 0.11410658890398942
d_elmd+amd(wz-ZnO, wz-GaN) = 0.6990500781369319
d_elmd+amd(wz-ZnO, Bi2Te3) = 0.8808954054614389


In [16]:
# You may optionally specify arguments for d_elmd and d_amd. You can also specify the
# coefficients for the weighted sum.
kwargs = {
	"args_emb": {"amd": {"k": 200}},
	"args_dist": {
		"elmd": {"metric": "mod_petti"},
		"amd": {"metric": "chebyshev", "low_memory": False},
		"coefs": {"elmd": 0.3, "amd": 0.7},
	},
}
print(
	f"d_elmd+amd(wz-ZnO, wz-ZnO supercell) = {distance('elmd+amd', wzZnO, wzZnO_2x2x2, **kwargs)}"
)
print(f"d_elmd+amd(wz-ZnO, rs-ZnO) = {distance('elmd+amd', wzZnO, rsZnO, **kwargs)}")
print(f"d_elmd+amd(wz-ZnO, wz-GaN) = {distance('elmd+amd', wzZnO, wzGaN, **kwargs)}")
print(f"d_elmd+amd(wz-ZnO, Bi2Te3) = {distance('elmd+amd', wzZnO, Bi2Te3, **kwargs)}")

d_elmd+amd(wz-ZnO, wz-ZnO supercell) = 4.9737991503206655e-15
d_elmd+amd(wz-ZnO, rs-ZnO) = 0.3571361297535248
d_elmd+amd(wz-ZnO, wz-GaN) = 0.33796259388388067
d_elmd+amd(wz-ZnO, Bi2Te3) = 0.8221398522189174


### VSUN evaluation

In [17]:
# Load a set of crystals

# You can alternatively prepare your own set of crystals (list[xtalmet.crystal.Crystal]
# or list[pymatgen.core.Structure]).

path = hf_hub_download(
	repo_id="masahiro-negishi/xtalmet",
	filename="mp20/model/adit.pkl.gz",
	repo_type="dataset",
	revision=HF_VERSION,
)
with gzip.open(path, "rb") as f:
	gen_xtals = pickle.load(f)

# Use only the first 10 crystals for this tutorial
gen_xtals = gen_xtals[:10]

In [18]:
# Evaluate uniqueness

evaluator = Evaluator(
	uniqueness=True,
	distance="amd",  # Options: "smat", "comp", "wyckoff", "magpie", "pdd", "amd", "elmd", "elmd+amd"
)

overall_score, individual_scores, times = evaluator.evaluate(
	xtals=gen_xtals, dir_intermediate=None
)

# Note: If you evaluate the same large set of crystals several times (e.g., trying
# different aggregation functions), we recommend specifying "dir_intermediate." This
# means that validity scores, energy above hulls, embeddings, and distance matrices
# computed in the first run will be saved in the directory and used thereafter,
# significantly reducing the computation cost.

print(f"Average uniqueness score: {overall_score}")
print(f"Individual uniqueness scores: {individual_scores}")
print(f"Computation times: {times}")

Average uniqueness score: 0.5159663868488782
Individual uniqueness scores: [0.47377589 0.5871229  0.44919673 0.47289371 0.6326102  0.60378309
 0.46034537 0.47352032 0.44179318 0.56462248]
Computation times: {'uni_emb': 0.004900217056274414, 'uni_d_mtx': 0.026593923568725586, 'aggregation': 5.626678466796875e-05, 'total': 0.03155040740966797}


In [19]:
# Evaluate novelty
# This cell may take a few minutes to finish.

evaluator = Evaluator(
	novelty=True,
	distance="elmd",
	ref_xtals="mp20",  # Options: "mp20", list[xtalmet.crystal.Crystal], list[pymatgen.core.Structure], A path to the file containing the pre-computed embeddings of the reference structures
)

overall_score, individual_scores, times = evaluator.evaluate(
	xtals=gen_xtals, dir_intermediate=None
)

# Note: Again, if you evaluate the same large set of crystals several times, we
# recommend specifying "dir_intermediate."

print(f"Average novelty score: {overall_score}")
print(f"Individual novelty scores: {individual_scores}")
print(f"Computation times: {times}")

Downloading MP-20 training data from Hugging Face...
Average novelty score: 0.11515527950310558
Individual novelty scores: [0.         0.         0.         0.         0.60869565 0.
 0.         0.         0.54285714 0.        ]
Computation times: {'nov_emb': 0.001439809799194336, 'nov_d_mtx': 37.012083530426025, 'aggregation': 0.00010395050048828125, 'total': 37.01362729072571}


In [20]:
# Evaluate (V)SUN
# This cell may take a few minutes to finish.

# The following setting is equivalent to calculating the conventional VSUN metric in the
# literature using the StructureMatcher in uniqueness and novelty evaluations. If
# validity is set to None, it is equivalent to the conventional SUN metric.

evaluator = Evaluator(
	validity=[
		"smact",
		"structure",
	],  # Options: None, ["smact"], ["structure"], ["smact", "structure"]
	stability="binary",  # Options: None, "binary", "continuous"
	uniqueness=True,
	novelty=True,
	distance="smat",
	ref_xtals="mp20",
)

overall_score, individual_scores, times = evaluator.evaluate(
	xtals=gen_xtals, dir_intermediate=None, multiprocessing=True, n_processes=3
)

# Note: Again, if you calculate novelty using the same distance metric several times, we
# strongly recommend specifying "dir_intermediate_gen"
# Multiprocessing is recommended especially for the "smat" distance to speed up the
# computation.

print(f"Average VSUN score: {overall_score}")
print(f"Individual VSUN scores: {individual_scores}")
print(f"Computation times: {times}")

Using medium MPA-0 model as default MACE-MP model, to use previous (before 3.10) default model please specify 'medium' as model argument
Using Materials Project MACE for MACECalculator with /rds/general/user/mn825/home/.cache/mace/macempa0mediummodel
Using float64 for MACECalculator, which is slower but more accurate. Recommended for geometry optimization.




Using head default out of ['default']
Downloading MP-20 training data from Hugging Face...




Average VSUN score: 0.0
Individual VSUN scores: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Computation times: {'val_smact': 0.3954482078552246, 'val_structure': 0.0023643970489501953, 'stab': 13.018805265426636, 'uni_emb': 0.0, 'uni_d_mtx': 1.0818207263946533, 'nov_emb': 0.0, 'nov_d_mtx': 54.18170690536499, 'aggregation': 0.00014066696166992188, 'total': 68.68028616905212}


In [21]:
# Evaluate (V)SUN
# This cell may take a few minutes to finish.

evaluator = Evaluator(
	validity=[
		"smact",
		"structure",
	],  # Options: None, ["smact"], ["structure"], ["smact", "structure"]
	stability="continuous",  # Options: None, "binary", "continuous"
	uniqueness=True,
	novelty=True,
	distance="elmd+amd",
	ref_xtals="mp20",
)

overall_score, individual_scores, times = evaluator.evaluate(
	xtals=gen_xtals, dir_intermediate=None, multiprocessing=True, n_processes=3
)

# Note: Again, if you calculate novelty using the same distance metric several times, we
# strongly recommend specifying "dir_intermediate_gen"
# Multiprocessing is recommended especially for the "smat" distance to speed up the
# computation.

print(f"Average VSUN score: {overall_score}")
print(f"Individual VSUN scores: {individual_scores}")
print(f"Computation times: {times}")

Using medium MPA-0 model as default MACE-MP model, to use previous (before 3.10) default model please specify 'medium' as model argument
Using Materials Project MACE for MACECalculator with /rds/general/user/mn825/home/.cache/mace/macempa0mediummodel
Using float64 for MACECalculator, which is slower but more accurate. Recommended for geometry optimization.




Using head default out of ['default']
Downloading MP-20 training data from Hugging Face...




Average VSUN score: 0.005078719252008488
Individual VSUN scores: [0.         0.0299327  0.00438288 0.         0.         0.
 0.00909478 0.00737682 0.         0.        ]
Computation times: {'val_smact': 0.20816421508789062, 'val_structure': 0.00124359130859375, 'stab': 104.57961010932922, 'uni_emb': 2.8423497676849365, 'uni_d_mtx': 2.525019407272339, 'nov_emb': 2.8661420345306396, 'nov_d_mtx': 15.589264631271362, 'aggregation': 0.00014162063598632812, 'total': 128.61193537712097}




### More details on the VSUN evaluation
When initializing Evaluator object, you can pass additional arguments via kwargs. kwargs can have four keys: "args_validity," "args_stability," "args_emb", and "args_dist." Below, we explain what arguments you can specify.

#### args_validity
This dictionary is used to pass arguments to the validators in xtalmet/validity.py. The possible arguments depend on the validity evaluation methods you choose.

In [22]:
# The currently available validity screening methods are "smact" and "structure." The
# former does not have parameters that users can specify, while the latter has two:
# "threshold_distance" (float) and "threshold_volume" (float). "threshold_distance" is
# the minimum allowed distance between atoms (default 0.5 [Å]). "threshold_volume" is
# the minimum allowed volume of the unit cell (default 0.1 [Å³]).

kwargs = {
	"args_validity": {
		"structure": {
			"threshold_distance": 0.5,
			"threshold_volume": 0.1,
		}
	},
}

evaluator = Evaluator(validity=["structure"], **kwargs)

overall_score, individual_scores, times = evaluator.evaluate(xtals=gen_xtals)

print(f"Average validity score: {overall_score}")
print(f"Individual validity scores: {individual_scores}")
print(f"Computation times: {times}")

Average validity score: 1.0
Individual validity scores: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Computation times: {'val_structure': 0.0024187564849853516, 'aggregation': 4.3392181396484375e-05, 'total': 0.002462148666381836}


#### args_stability
This dictionary is used to pass arguments to the StabilityCaldulator class in xtalmet/stability.py. The possible arguments depend on the type of stability evaluation chosen (binary or continuous).

In [23]:
# Binary stability
# This cell may take a few minutes to finish.

# kwargs["args_stability"] can have "diagram", "mace_model", and "threshold" arguments.

# "diagram" (Literal["mp_250618", "mp"] | PatchedPhaseDiagram | str) is a phased diagram
# to use. If "mp_250618" is specified, the diagram constructed using compute_ehull
# function from the MP entries on June 18, 2025, will be used. If "mp" is specified,
# the diagram will be constructed on the spot. You can also pass your own diagram or a
# path to it.

# "mace_model" (str) is the MACE model to use for energy prediction. See
# https://github.com/ACEsuit/mace-foundations/releases for available models. Default is
# "medium-mpa-0".

# "threshold" (float) is the energy threshold for binary stability evaluation. Crystals
# with energy above hull values greater than this threshold will be considered unstable
# and assigned a stability score of 0, while those with energy above hull values less
# than or equal to this threshold will be considered stable and assigned a stability
# score of 1. The default threshold is 0.1 eV/atom. See the docstrings of
# xtalmet.evaluator.Evaluator.evaluate for how the binary stability works.

kwargs = {
	"args_stability": {
		"diagram": "mp_250618",
		"mace_model": "medium-mpa-0",
		"threshold": 0.1,
	},
}

evaluator = Evaluator(stability="binary", **kwargs)

overall_score, individual_scores, times = evaluator.evaluate(xtals=gen_xtals)

print(f"Average binary stability score: {overall_score}")
print(f"Individual binary stability scores: {individual_scores}")
print(f"Computation times: {times}")

Using medium MPA-0 model as default MACE-MP model, to use previous (before 3.10) default model please specify 'medium' as model argument
Using Materials Project MACE for MACECalculator with /rds/general/user/mn825/home/.cache/mace/macempa0mediummodel
Using float64 for MACECalculator, which is slower but more accurate. Recommended for geometry optimization.




Using head default out of ['default']




Average binary stability score: 0.1
Individual binary stability scores: [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
Computation times: {'stab': 8.160350799560547, 'aggregation': 0.00011467933654785156, 'total': 8.160465478897095}


In [24]:
# Continuous stability
# This cell may take a few minutes to finish.

# kwargs["args_stability"] can have "diagram", "mace_model", and "intercept" arguments.

# "diagram" and "mace_model" have the same meanings as in the binary stability case.

# "intercept" (float) is used to determine the x-axis intercept of the stability score,
# which determines how much a crystal contributes to the uniqueness or novelty score.
# It is calculated as 1 - (energy above hull) / intercept, clipped between 0 and 1. The
# default intercept is 0.4289 eV/atom, which is the 99.9th percentile of the energy
# above hull values for the MP20 test data. See the docstrings of
# xtalmet.evaluator.Evaluator.evaluate for how the continuous stability works.

kwargs = {
	"args_stability": {
		"diagram": "mp_250618",
		"mace_model": "medium-mpa-0",
		"intercept": 0.4289,
	},
}

evaluator = Evaluator(stability="continuous", **kwargs)

overall_score, individual_scores, times = evaluator.evaluate(xtals=gen_xtals)

print(f"Average continuous stability score: {overall_score}")
print(f"Individual continuous stability scores: {individual_scores}")
print(f"Computation times: {times}")

Using medium MPA-0 model as default MACE-MP model, to use previous (before 3.10) default model please specify 'medium' as model argument
Using Materials Project MACE for MACECalculator with /rds/general/user/mn825/home/.cache/mace/macempa0mediummodel
Using float64 for MACECalculator, which is slower but more accurate. Recommended for geometry optimization.




Using head default out of ['default']




Average continuous stability score: 0.22842546134697886
Individual continuous stability scores: [0.         0.93036712 0.1075391  0.         0.         0.39568
 0.58811372 0.26255466 0.         0.        ]
Computation times: {'stab': 9.187806606292725, 'aggregation': 4.482269287109375e-05, 'total': 9.187851428985596}


#### args_emb and args_dist
These two dictionaries are used to pass arguments to the computation of embeddings and the distance matrix between embeddings, respectively. Depending on the distance function used, you can specify different arguments.

In [25]:
# "smat" distance

# Since "smat" distance does not allow for the pre-computation of embeddings,
# kwargs["args_emb"] does not exist.

# kwargs["args_dist"] will be passed to pymatgen's StructureMatcher. See
# https://pymatgen.org/pymatgen.analysis.html#pymatgen.analysis.structure_matcher.StructureMatcher
# for the complete list of arguments you can specify.

kwargs = {
	"args_dist": {"ltol": 0.2, "stol": 0.3, "angle_tol": 5},
}

evaluator = Evaluator(uniqueness=True, distance="smat", **kwargs)

overall_score, individual_scores, times = evaluator.evaluate(xtals=gen_xtals)

print(f"Average uniqueness score: {overall_score}")
print(f"Individual uniqueness scores: {individual_scores}")
print(f"Computation times: {times}")

Average uniqueness score: 1.0
Individual uniqueness scores: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Computation times: {'uni_emb': 0.0, 'uni_d_mtx': 0.07453584671020508, 'aggregation': 1.5020370483398438e-05, 'total': 0.07455086708068848}


In [26]:
# For "comp" distance, neither kwargs["args_emb"] nor kwargs["args_dist"] exist.

In [None]:
# "wyckoff" distance

# kwargs["args_emb"] will be passed to pymatgen.symmetry.analyzer.SpacegroupAnalyzer.
# ref: https://pymatgen.org/pymatgen.analysis.html#pymatgen.analysis.structure_matcher.StructureMatcher

# kwargs["args_dist"] does not exist

kwargs = {
	"args_emb": {"symprec": 0.1, "angle_tolerance": 10},
}

evaluator = Evaluator(uniqueness=True, distance="wyckoff", **kwargs)

overall_score, individual_scores, times = evaluator.evaluate(xtals=gen_xtals)

print(f"Average uniqueness score: {overall_score}")
print(f"Individual uniqueness scores: {individual_scores}")
print(f"Computation times: {times}")

Average uniqueness score: 0.6
Individual uniqueness scores: [1. 1. 1. 0. 1. 1. 0. 1. 0. 0.]
Computation times: {'uni_emb': 0.013911962509155273, 'uni_d_mtx': 4.8160552978515625e-05, 'aggregation': 1.621246337890625e-05, 'total': 0.013976335525512695}


In [28]:
# For "magpie" distance, neither kwargs["args_emb"] nor kwargs["args_dist"] exist.

In [29]:
# "pdd" distance

# kwargs["args_emb"] will be passed to amd.PDD. See
# https://average-minimum-distance.readthedocs.io/en/latest/Using_PDDs.html#calculation-options
# for the complete list of arguments you can specify.

# kwargs["args_dist"] will be passed to amd.PDD_pdist when evaluating uniqueness and to
# amd.PDD_cdist when evaluating novelty. Both functions share the same arguments. See
# https://average-minimum-distance.readthedocs.io/en/latest/Using_PDDs.html#comparison-options-and-multiprocessing
# for the complete list of arguments you can specify.

kwargs = {
	"args_emb": {"k": 100},
	"args_dist": {
		"metric": "chebyshev",
		"backend": "multiprocessing",
		"n_jobs": None,  # If this value is not specified, and if multiprocessing=True is given when calling evaluate methods, then n_jobs will be set using the _set_n_processes function in the xtalmet/distance.py file.
		"verbose": False,
	},
}

evaluator = Evaluator(uniqueness=True, distance="pdd", **kwargs)

overall_score, individual_scores, times = evaluator.evaluate(xtals=gen_xtals)

print(f"Average uniqueness score: {overall_score}")
print(f"Individual uniqueness scores: {individual_scores}")
print(f"Computation times: {times}")

Average uniqueness score: 0.5373639153251784
Individual uniqueness scores: [0.50424762 0.59284046 0.47509269 0.48954114 0.64088593 0.61548105
 0.49661272 0.4913017  0.48782863 0.57980721]
Computation times: {'uni_emb': 0.004227876663208008, 'uni_d_mtx': 0.09950542449951172, 'aggregation': 3.504753112792969e-05, 'total': 0.10376834869384766}


In [30]:
# "amd" distance

# kwargs["args_emb"] will be passed to amd.AMD. You can specify the integer k. See
# https://average-minimum-distance.readthedocs.io/en/latest/amd.calculate.html#amd.calculate.AMD
# for the details.

# kwargs["args_dist"] will be passed to amd.AMD_pdist when evaluating uniqueness and to
# amd.AMD_cdist when evaluating novelty. Both functions share the same arguments. See
# https://average-minimum-distance.readthedocs.io/en/latest/Using_AMDs.html#comparison-options
# for the complete list of arguments you can specify.

kwargs = {
	"args_emb": {"k": 100},
	"args_dist": {
		"metric": "chebyshev",
		"low_memory": False,
	},
}

evaluator = Evaluator(uniqueness=True, distance="amd", **kwargs)

overall_score, individual_scores, times = evaluator.evaluate(xtals=gen_xtals)

print(f"Average uniqueness score: {overall_score}")
print(f"Individual uniqueness scores: {individual_scores}")
print(f"Computation times: {times}")

Average uniqueness score: 0.5159663868488782
Individual uniqueness scores: [0.47377589 0.5871229  0.44919673 0.47289371 0.6326102  0.60378309
 0.46034537 0.47352032 0.44179318 0.56462248]
Computation times: {'uni_emb': 0.00508570671081543, 'uni_d_mtx': 0.0033643245697021484, 'aggregation': 2.5033950805664062e-05, 'total': 0.008475065231323242}


In [31]:
# "elmd" distance

# kwargs["args_emb"] does not exist.

# kwargs["args_dist"] can contain the argument "metric" for ElMD.elmd. This argument
# specifies the ground distance between elements to be used in the Earth Mover's
# Distance calculation. See https://github.com/lrcfmd/ElMD/tree/master?tab=readme-ov-file#elemental-similarity
# for the complete list of available metrics.

kwargs = {
	"args_dist": {"metric": "mod_petti"},
}

evaluator = Evaluator(uniqueness=True, distance="elmd", **kwargs)

overall_score, individual_scores, times = evaluator.evaluate(xtals=gen_xtals)

print(f"Average uniqueness score: {overall_score}")
print(f"Individual uniqueness scores: {individual_scores}")
print(f"Computation times: {times}")

Average uniqueness score: 0.9466155346206582
Individual uniqueness scores: [0.95309282 0.94980445 0.94052129 0.9592388  0.94202158 0.94759804
 0.94956062 0.9408019  0.94143889 0.94207697]
Computation times: {'uni_emb': 0.0005238056182861328, 'uni_d_mtx': 0.008272409439086914, 'aggregation': 3.170967102050781e-05, 'total': 0.008827924728393555}


In [32]:
# "elmd+amd" distance

# kwargs["args_emb"] can contain the arguments for d_elmd and d_amd. See the previous
# sections for the details.

# kwargs["args_dist"] can contain the arguments for d_elmd and d_amd, as well as the
# coefficients for the weighted sum. See the previous sections for the details.

kwargs = {
	"args_emb": {"amd": {"k": 100}},
	"args_dist": {
		"elmd": {"metric": "mod_petti"},
		"amd": {"metric": "chebyshev", "low_memory": False},
		"coefs": {
			"elmd": float.fromhex("0x1.8d7d565a99f87p-1"),
			"amd": float.fromhex("0x1.ca0aa695981e5p-3"),
		},
	},
}

evaluator = Evaluator(uniqueness=True, distance="elmd+amd", **kwargs)

overall_score, individual_scores, times = evaluator.evaluate(xtals=gen_xtals)

print(f"Average uniqueness score: {overall_score}")
print(f"Individual uniqueness scores: {individual_scores}")
print(f"Computation times: {times}")

Average uniqueness score: 0.8502995060228431
Individual uniqueness scores: [0.84589209 0.86868958 0.83063501 0.8504662  0.87282076 0.87070275
 0.8401461  0.83629291 0.82969156 0.85765809]
Computation times: {'uni_emb': 0.005213260650634766, 'uni_d_mtx': 0.010202407836914062, 'aggregation': 2.7894973754882812e-05, 'total': 0.015443563461303711}


Please note that you can specify all of "args_emb", "args_dist", "args_validity", and "args_stability" in kwargs at the same time.