# Bonding and topology determination in the Materials Project database

In this file, we test the influence of the two CrystalNN options to handle intermetallic systems.

In [1]:
import json
import os
import random
import warnings

import pymatgen
from pymatgen.analysis.local_env import CrystalNN
from pymatgen.core import Structure
from pymatgen.ext.matproj import MPRester

from tqdm.notebook import tqdm

random.seed("Call me Ishmael")
print("Using pymatgen version:", pymatgen.core.__version__)

Using pymatgen version: 2025.2.18


In [2]:
with open(os.path.expanduser("~/.mpapikey"), "r") as f:
    apikey = f.read().strip()

In [3]:
crystallnn = CrystalNN()
crystallnn_noweight = CrystalNN(x_diff_weight = 0)


def make_labels_unique(struct):
    from collections import Counter
    
    labels = Counter(site.label for site in struct.sites)
    counter = {}
    for i, site in enumerate(struct.sites):
        label = site.label
        if labels[label] > 1 or label.isalpha():
            c = counter.get(label, 0)
            site.label = f"{label}{c}" if label.isalpha() else f"{label}_{c}"
            c = c + 1
            counter[label] = c


def analyse_bonding(mat, method = "CrystalNN"):
    if method != "CrystalNN" and method != "CrystalNN_noweight":
        raise ValueError("invalid method name")

    # Magnetic moments trigger a bug in CifWriter, so we remove them here
    # https://github.com/materialsproject/pymatgen/issues/3772
    if "magmom" in mat.structure.site_properties:
        mat.structure.remove_site_property("magmom")

    # All labels should be unique (otherwise bond specifications will fail)
    make_labels_unique(mat.structure)
    labels = [site.label for site in mat.structure.sites]
    if len(labels) != len(set(labels)):
        raise ValueError("labels are not unique in structure")

    while True:
        bonded_struct = None
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            try:
                if method == "CrystalNN_noweight":
                    bonded_struct = crystallnn_noweight.get_bonded_structure(mat.structure)
                else:
                    bonded_struct = crystallnn.get_bonded_structure(mat.structure)

            except Exception as e:
                print(f"{method} failed for {mat.material_id}: {e}")
                return None

        coord = set()
        bonds = set()
        rerun_flag = False
        for n, site in enumerate(bonded_struct.structure.sites):
            neighbors = bonded_struct.get_connected_sites(n)
            # We cannot use get_coordination_of_site() because of a bug:
            # https://github.com/materialsproject/pymatgen/issues/3888#issuecomment-2232571072
            coord.add((str(site.specie), len(neighbors)))
            for connected in neighbors:
                if site.specie <= connected.site.specie:
                    bonds.add((str(site.specie), str(connected.site.specie)))
                if site.label == connected.site.label:
                    # If an atom is bonded to its own image, then CrystalNets will not like it
                    # and we need to make rerun on a supercell
                    rerun_flag = True

        if rerun_flag:
            # If we found an atom bonded to itself, 
            mat.structure.make_supercell(2, in_place=True)
            make_labels_unique(mat.structure)
        else:
            # Otherwise we're done
            break

    return {"material_id": str(mat.material_id),
            "formula_pretty": mat.formula_pretty,
            "nelements": mat.nelements,
            "theoretical": mat.theoretical,
            "is_stable": mat.is_stable,
            "crystal_system": str(mat.symmetry.crystal_system),
            "space_group": mat.symmetry.number,
            "coordination": coord,
            "bonds": bonds}

In [4]:
# Test our analysis method on one material
with MPRester(apikey) as mpr:
    mp_data = mpr.materials.summary.search(
        material_ids=["mp-3934"],
        fields=["material_id", "builder_meta", "deprecated", "formula_pretty", "nelements", "elements", "structure", "theoretical", "symmetry"]
    )

    assert len(mp_data) == 1
    res = analyse_bonding(mp_data[0])
    print(res)

    res = analyse_bonding(mp_data[0], "CrystalNN_noweight")
    print(res)

Retrieving SummaryDoc documents:   0%|          | 0/1 [00:00<?, ?it/s]

{'material_id': 'mp-3934', 'formula_pretty': 'Cu3PS4', 'nelements': 3, 'theoretical': False, 'is_stable': None, 'crystal_system': 'Orthorhombic', 'space_group': 31, 'coordination': {('S', 4), ('Cu', 4), ('P', 4)}, 'bonds': {('Cu', 'S'), ('P', 'S')}}
{'material_id': 'mp-3934', 'formula_pretty': 'Cu3PS4', 'nelements': 3, 'theoretical': False, 'is_stable': None, 'crystal_system': 'Orthorhombic', 'space_group': 31, 'coordination': {('S', 4), ('Cu', 4), ('P', 4)}, 'bonds': {('Cu', 'S'), ('P', 'S')}}


In [5]:
with MPRester(apikey) as mpr:
    mp_data = mpr.materials.summary.search(
        deprecated=False,
        fields=["material_id", "builder_meta", "deprecated", "formula_pretty", "nelements", "elements", "structure", "theoretical", "symmetry"]
    )

print("Database version", mpr.get_database_version())
print("Number of materials found:", len(mp_data))

# Check that we did not get any deprecated material
# See https://github.com/materialsproject/api/issues/964
assert sum(1 for x in mp_data if x.deprecated) == 0

Retrieving SummaryDoc documents:   0%|          | 0/170470 [00:00<?, ?it/s]

Database version 2025.02.12.post
Number of materials found: 170470


In [6]:
nonmetals = {'O', 'S', 'Se', 'Te', 'Po', 'F', 'Cl', 'Br', 'I', 'At', 'N', 'P', 'As', 'Sb', 'C', 'Si', 'Ge', 'B', 'H', 'He', 'Ne', 'Ar', 'Kr', 'Xe'}

def isIntermetallic(struct):
    el = set(str(x) for x in struct.elements)
    return len(el & nonmetals) == 0

In [7]:
sum(1 for x in mp_data if isIntermetallic(x)) / len(mp_data)

0.1617000058661348

In [8]:
intermetallics = [x for x in mp_data if isIntermetallic(x)]

In [9]:
data = {}

with warnings.catch_warnings():
    warnings.filterwarnings("ignore", message="No Pauling electronegativity")
    for mat in tqdm(intermetallics, smoothing=0.01, mininterval=1):
        x = analyse_bonding(mat)
        if x is not None:
            data[mat.material_id] = x

print("Number of systems analyzed:", len(data))

  0%|          | 0/27565 [00:00<?, ?it/s]

CrystalNN failed for mp-1210439: No Voronoi neighbors found for site - try increasing cutoff
CrystalNN failed for mp-1212347: No Voronoi neighbors found for site - try increasing cutoff
Number of systems analyzed: 27563


In [10]:
data_noweight = {}

with warnings.catch_warnings():
    warnings.filterwarnings("ignore", message="No Pauling electronegativity")
    for mat in tqdm(intermetallics, smoothing=0.01, mininterval=1):
        x = analyse_bonding(mat, "CrystalNN_noweight")
        if x is not None:
            data_noweight[mat.material_id] = x

print("Number of systems analyzed:", len(data_noweight))

  0%|          | 0/27565 [00:00<?, ?it/s]

CrystalNN_noweight failed for mp-1210439: No Voronoi neighbors found for site - try increasing cutoff
CrystalNN_noweight failed for mp-1212347: No Voronoi neighbors found for site - try increasing cutoff
Number of systems analyzed: 27563


In [11]:
sum(1 for x in data.keys() if data[x] != data_noweight[x])

11477

In [12]:
differences = [x for x in data.keys() if data[x] != data_noweight[x]]
differences[:10]

[MPID(mp-1104792),
 MPID(mp-11241),
 MPID(mp-11252),
 MPID(mp-11454),
 MPID(mp-11493),
 MPID(mp-1181275),
 MPID(mp-1183189),
 MPID(mp-1183278),
 MPID(mp-1183511),
 MPID(mp-1183643)]

In [13]:
data['mp-1183278']

{'material_id': 'mp-1183278',
 'formula_pretty': 'AlPb3',
 'nelements': 2,
 'theoretical': True,
 'is_stable': None,
 'crystal_system': 'Tetragonal',
 'space_group': 139,
 'coordination': {('Al', 4), ('Pb', 4), ('Pb', 12)},
 'bonds': {('Al', 'Pb'), ('Pb', 'Pb')}}

In [14]:
data_noweight['mp-1183278']

{'material_id': 'mp-1183278',
 'formula_pretty': 'AlPb3',
 'nelements': 2,
 'theoretical': True,
 'is_stable': None,
 'crystal_system': 'Tetragonal',
 'space_group': 139,
 'coordination': {('Al', 4), ('Pb', 8), ('Pb', 12)},
 'bonds': {('Al', 'Pb'), ('Pb', 'Pb')}}

In [15]:
sum(1 for x in data.keys() if data[x] != data_noweight[x]) / len(data)

0.41639153938250556