In [1]:
import json
import os
import warnings

import juliacall

import pymatgen
from pymatgen.analysis.local_env import CrystalNN
from pymatgen.core import Structure
from pymatgen.ext.matproj import MPRester
from pymatgen.io.cif import CifWriter

from tqdm.notebook import tqdm

print("Using pymatgen version:", pymatgen.core.__version__)

Detected IPython. Loading juliacall extension. See https://juliapy.github.io/PythonCall.jl/stable/compat/#IPython
Using pymatgen version: 2024.6.10


In [2]:
jl = juliacall.newmodule("NotebookModule")
jl.seval("using CrystalNets")
jl.CrystalNets.toggle_export(False)

print("Using Julia version:", jl.seval("VERSION"))
print("Running from directory:", jl.seval("Sys.BINDIR"))

Using Julia version: 1.10.4
Running from directory: /opt/julia-1.10.4/bin


In [3]:
with open(os.path.expanduser("~/.mpapikey"), "r") as f:
    apikey = f.read().strip()

In [4]:
crystallnn = CrystalNN()

try:
    os.mkdir("cif")
except FileExistsError as e:
    pass

def make_labels_unique(struct):
    from collections import Counter
    
    labels = Counter(site.label for site in struct.sites)
    counter = {}
    for i, site in enumerate(struct.sites):
        label = site.label
        if labels[label] > 1 or label.isalpha():
            c = counter.get(label, 0)
            site.label = f"{label}{c}" if label.isalpha() else f"{label}_{c}"
            c = c + 1
            counter[label] = c


def jimage_to_site_symmetry(jimage):
    i, j, k = jimage
    return f"1_{5+i}{5+j}{5+k}"


def writeToCifFile(bonded_struct, file):
    cif_writer = CifWriter(bonded_struct.structure)
    cif = str(cif_writer)
    cif += """loop_
_geom_bond_atom_site_label_1
_geom_bond_atom_site_label_2
_geom_bond_distance
_geom_bond_site_symmetry_2
"""

    for n, site in enumerate(bonded_struct.structure.sites):
        for connected in bonded_struct.get_connected_sites(n):
            # Make sure we only output each bond once
            if site.label <= connected.site.label:
                cif += f"{site.label} {connected.site.label} {connected.dist:.6f} {jimage_to_site_symmetry(connected.jimage)}\n"

    file.write(cif)


def analyse_bonding(mat):
    # Magnetic moments trigger a bug in CifWriter, so we remove them here
    # https://github.com/materialsproject/pymatgen/issues/3772
    if "magmom" in mat.structure.site_properties:
        mat.structure.remove_site_property("magmom")

    # All labels should be unique (otherwise bond specifications will fail)
    make_labels_unique(mat.structure)
    labels = [site.label for site in mat.structure.sites]
    if len(labels) != len(set(labels)):
        raise ValueError("labels are not unique in structure")

    while True:
        bonded_struct = None
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            try:
                bonded_struct = crystallnn.get_bonded_structure(mat.structure)
            except Exception as e:
                print(f"CrystalNN failed for {mat.material_id}: {e}")
                return None

        coord = set()
        bonds = set()
        rerun_flag = False
        for n, site in enumerate(bonded_struct.structure.sites):
            neighbors = bonded_struct.get_connected_sites(n)
            # We cannot use get_coordination_of_site() because of a bug:
            # https://github.com/materialsproject/pymatgen/issues/3888#issuecomment-2232571072
            coord.add((str(site.specie), len(neighbors)))
            for connected in neighbors:
                if site.specie <= connected.site.specie:
                    bonds.add((str(site.specie), str(connected.site.specie)))
                if site.label == connected.site.label:
                    # If an atom is bonded to its own image, then CrystalNets will not like it
                    # and we need to make rerun on a supercell
                    rerun_flag = True

        if rerun_flag:
            # If we found an atom bonded to itself, 
            mat.structure.make_supercell(2, in_place=True)
            make_labels_unique(mat.structure)
        else:
            # Otherwise we're done
            break

    with open(f"cif/{mat.material_id}.cif", "w") as f:
        writeToCifFile(bonded_struct, f)

    return {"material_id": str(mat.material_id),
            "formula_pretty": mat.formula_pretty,
            "nelements": mat.nelements,
            "theoretical": mat.theoretical,
            "is_stable": mat.is_stable,
            "crystal_system": str(mat.symmetry.crystal_system),
            "space_group": mat.symmetry.number,
            "coordination": coord,
            "bonds": bonds}

In [5]:
with MPRester(apikey) as mpr:
    mp_data = mpr.materials.summary.search(
        material_ids=["mp-3934"],
        fields=["material_id", "formula", "formula_pretty", "nelements", "structure", "theoretical", "symmetry"]
    )

    assert len(mp_data) == 1
    res = analyse_bonding(mp_data[0])
    print(res)

Retrieving SummaryDoc documents:   0%|          | 0/1 [00:00<?, ?it/s]

{'material_id': 'mp-3934', 'formula_pretty': 'Cu3PS4', 'nelements': 3, 'theoretical': False, 'is_stable': None, 'crystal_system': 'Orthorhombic', 'space_group': 31, 'coordination': {('Cu', 4), ('S', 4), ('P', 4)}, 'bonds': {('Cu', 'S'), ('P', 'S')}}


In [6]:
with MPRester(apikey) as mpr:
    mp_data = mpr.materials.summary.search(
        fields=["material_id", "formula", "formula_pretty", "nelements", "structure", "theoretical", "symmetry"]
    )
    print("Number of materials found:", len(mp_data))
    print("Database version", mpr.get_database_version())

Retrieving SummaryDoc documents:   0%|          | 0/153235 [00:00<?, ?it/s]

Number of materials found: 155361
Database version 2023.11.1


In [7]:
data = {}

with warnings.catch_warnings():
    warnings.filterwarnings("ignore", message="No Pauling electronegativity")
    for mat in tqdm(mp_data, smoothing=0.01, mininterval=1):
        x = analyse_bonding(mat)
        if x is not None:
            data[mat.material_id] = x

print("Number of systems analyzed:", len(data))

  0%|          | 0/155361 [00:00<?, ?it/s]

CrystalNN failed for mp-994911: No Voronoi neighbors found for site - try increasing cutoff
CrystalNN failed for mp-1210439: No Voronoi neighbors found for site - try increasing cutoff
CrystalNN failed for mp-1247838: No Voronoi neighbors found for site - try increasing cutoff
CrystalNN failed for mp-1213668: No Voronoi neighbors found for site - try increasing cutoff
CrystalNN failed for mp-1215144: No Voronoi neighbors found for site - try increasing cutoff
CrystalNN failed for mp-1180797: No Voronoi neighbors found for site - try increasing cutoff
CrystalNN failed for mp-1247813: No Voronoi neighbors found for site - try increasing cutoff
CrystalNN failed for mp-1214815: No Voronoi neighbors found for site - try increasing cutoff
CrystalNN failed for mp-1215160: No Voronoi neighbors found for site - try increasing cutoff
CrystalNN failed for mp-1212347: No Voronoi neighbors found for site - try increasing cutoff
CrystalNN failed for mp-1212578: No Voronoi neighbors found for site - 

In [8]:
data["mp-1206677"]

{'material_id': 'mp-1206677',
 'formula_pretty': 'Rb2O2',
 'nelements': 2,
 'theoretical': False,
 'is_stable': None,
 'crystal_system': 'Monoclinic',
 'space_group': 11,
 'coordination': {('O', 6), ('Rb', 6)},
 'bonds': {('Rb', 'O')}}

In [9]:
crystalnets_options = jl.CrystalNets.Options(structure=jl.StructureType.Auto, clusterings=[jl.Clustering.EachVertex], bonding=jl.Bonding.Input, split_O_vertex=False)

def analyse_topology(material_id):
    res = jl.determine_topology(f"cif/{material_id}.cif", crystalnets_options)
    topologies = [(mult, jl.ndims(net[jl.Clustering.EachVertex].genome), str(net[jl.Clustering.EachVertex])) for net, mult in res]
    return topologies

In [10]:
analyse_topology("mp-1206677")

[(1, 3, 'pcu')]

In [11]:
jl.CrystalNets.toggle_export(False)
jl.CrystalNets.toggle_warning(False)

for material_id in tqdm(data.keys(), smoothing=0.01, mininterval=1):
    topo = analyse_topology(material_id)
    data[material_id]["topology"] = topo

  0%|          | 0/155345 [00:00<?, ?it/s]

spglib: Indicated max size(=384) is less than number spglib: of symmetry operations(=768).
spglib: Indicated max size(=384) is less than number spglib: of symmetry operations(=768).
spglib: Indicated max size(=384) is less than number spglib: of symmetry operations(=768).
spglib: Indicated max size(=384) is less than number spglib: of symmetry operations(=768).
spglib: Indicated max size(=384) is less than number spglib: of symmetry operations(=768).
spglib: Indicated max size(=384) is less than number spglib: of symmetry operations(=768).


In [12]:
data["mp-1206677"]

{'material_id': 'mp-1206677',
 'formula_pretty': 'Rb2O2',
 'nelements': 2,
 'theoretical': False,
 'is_stable': None,
 'crystal_system': 'Monoclinic',
 'space_group': 11,
 'coordination': {('O', 6), ('Rb', 6)},
 'bonds': {('Rb', 'O')},
 'topology': [(1, 3, 'pcu')]}

In [13]:
class SetEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, set):
            return list(obj)
        return json.JSONEncoder.default(self, obj)

with open("topo_data.json", "w") as file:
    json.dump(data, file, cls=SetEncoder)