# Tutorial on xtalmet package
This notebook provides a tutorial on evaluating the uniqueness and novelty of a given set of generated crystals using the `xtalmet` package.

### Setup

In [1]:
import gzip
import pickle

from huggingface_hub import hf_hub_download

from xtalmet.evaluator import Evaluator

  from .autonotebook import tqdm as notebook_tqdm


cuequivariance or cuequivariance_torch is not available. Cuequivariance acceleration will be disabled.


### Evaluation

### 

In [2]:
# Load a set of crystals

# You can alternatively prepare your own set of crystals (list[xtalmet.crystal.Crystal]
# or list[pymatgen.core.Structure]).

path = hf_hub_download(
	repo_id="masahiro-negishi/xtalmet",
	filename="mp20/model/cdvae.pkl.gz",
	repo_type="dataset",
)
with gzip.open(path, "rb") as f:
	gen_xtals = pickle.load(f)

# Use only the first 100 crystals for this tutorial
gen_xtals = gen_xtals[:100]

In [3]:
# Prepare an evaluator
evaluator = Evaluator(gen_xtals)

In [4]:
# Evaluate uniqueness
distance = "amd"  # Options: "smat", "comp", "wyckoff", "magpie", "pdd", "amd"
screen = None  # Options: None, "smact", "ehull"
uni, uni_times = evaluator.uniqueness(
	distance=distance, screen=screen, dir_intermediate=None, return_time=True
)
# Note: If you calculate uniqueness using the same distance metric several times (e.g.,
# when trying out different screening methods), we strongly recommend specifying
# dir_intermediate. This means that the distance matrix computed in the first run will
# be saved in the directory and used thereafter, significantly reducing the computation
# cost.

print(f"Uniqueness: {uni:.4f}")
print(f"Computation times (s): {uni_times}")

Uniqueness: 1.2808
Computation times (s): {'uni_emb': 0.17191743850708008, 'uni_d_mtx': 0.007668495178222656, 'uni_metric': 1.7881393432617188e-05, 'uni_total': 0.17960381507873535}


In [5]:
# Evaluate novelty
train_xtals = "mp20"  # Options: "mp20", list[xtalmet.crystal.Crystal], list[pymatgen.core.Structure]
distance = "amd"  # Options: "smat", "comp", "wyckoff", "magpie", "pdd", "amd"
screen = None  # Options: None, "smact", "ehull"
nov, nov_times = evaluator.novelty(
	train_xtals=train_xtals,
	distance=distance,
	screen=screen,
	dir_intermediate=None,
	return_time=True,
)
# Note: Again, if you calculate novelty using the same distance metric several times, we
# strongly recommend specifying dir_intermediate.

print(f"Novelty: {nov:.4f}")
print(f"Computation times (s): {nov_times}")


Novelty: 0.2009
Computation times (s): {'nov_emb_gen': 0.01799321174621582, 'nov_emb_train': 0.3856959342956543, 'nov_d_mtx': 0.14848995208740234, 'nov_metric': 0.001974821090698242, 'nov_total': 0.5541539192199707}
