# Install/import packages

In [1]:
# install necessary packages
!pip install pymatgen
!pip install mp_api

Collecting pymatgen
  Downloading pymatgen-2025.3.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting monty>=2025.1.9 (from pymatgen)
  Downloading monty-2025.3.3-py3-none-any.whl.metadata (3.6 kB)
Collecting palettable>=3.3.3 (from pymatgen)
  Downloading palettable-3.3.3-py2.py3-none-any.whl.metadata (3.3 kB)
Collecting pybtex>=0.24.0 (from pymatgen)
  Downloading pybtex-0.24.0-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting ruamel.yaml>=0.17.0 (from pymatgen)
  Downloading ruamel.yaml-0.18.10-py3-none-any.whl.metadata (23 kB)
Collecting spglib>=2.5 (from pymatgen)
  Downloading spglib-2.6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.2 kB)
Collecting uncertainties>=3.1.4 (from pymatgen)
  Downloading uncertainties-3.2.2-py3-none-any.whl.metadata (6.9 kB)
Collecting latexcodec>=1.0.4 (from pybtex>=0.24.0->pymatgen)
  Downloading latexcodec-3.0.0-py3-none-any.whl.metadata (4.9 kB)
Collecting ruamel.yaml.clib>=0.2.7 (fr

In [2]:
# import necessary packages
import pymatgen
from pymatgen.analysis.local_env import CrystalNN
from pymatgen.core.periodic_table import Element
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from mp_api.client import MPRester
from pymatgen.core import Structure
import numpy as np
import pandas as pd

# Define API key and structural descriptors

In [3]:
# Define API key for data extraction from the MPD
API_KEY = "INSERT YOUR API KEY HERE"

In [4]:
'''
Compute 5 structural descriptors including the following:
1. Lithium-Lithium Bonding (LLB)
2. Sublattice Bond Ionicity (SBI)
3. Anion Framework Coordination (AFC)
4. Li-Anion Separation Distance (LASD)
5. Li-Li Separation Distance (LLSD)
'''

def compute_descriptors(structure):

    results = {}

    # Initialize neighbor analysis tool
    cnn = CrystalNN()

    # Get Li sites
    li_sites = [site for site in structure if site.species_string == "Li"]

    # 1. Li-Li Bond Count (LLB)
    li_bonds = 0
    for li in li_sites:
        neighbors = cnn.get_nn_info(structure, structure.index(li))
        li_neighbors = [n for n in neighbors if n["site"].species_string == "Li"]
        li_bonds += len(li_neighbors)
    results["LLB"] = li_bonds / len(li_sites) if li_sites else 0

    # 2. Sublattice Bond Ionicity (SBI)
    ionicity_sum = 0
    for li in li_sites:
        neighbors = cnn.get_nn_info(structure, structure.index(li))
        for neighbor in neighbors:
            delta_X = abs(Element("Li").X - Element(neighbor["site"].species_string).X)
            ionicity_sum += delta_X
    results["SBI"] = ionicity_sum / len(structure) if len(structure) > 0 else 0

    # 3. Anion Framework Coordination (AFC)
    anions = [site for site in structure]
    total_coord = sum(len(cnn.get_nn_info(structure, structure.index(anion))) for anion in anions)
    results["AFC"] = total_coord / len(anions) if anions else 0

    # 4. Li-Anion Separation Distance (LASD)
    anion_sites = [site for site in structure]
    if li_sites and anion_sites:
        distances = [li.distance(anion) for li in li_sites for anion in anion_sites]
        results["LASD"] = np.mean(distances)
    else:
        results["LASD"] = 0

    # 5. Li-Li Separation Distance (LLSD)
    if len(li_sites) >= 2:
        distances = []
        for i in range(len(li_sites)):
            for j in range(i+1, len(li_sites)):
                distances.append(li_sites[i].distance(li_sites[j]))
        results["LLSD"] = np.mean(distances)
    else:
        results["LLSD"] = 0

    return results

# Retrieve parameters of all Li-based compounds from the Materials Project Database (MPD) for full analysis of potential new superionic materials

In [6]:
'''
Retrieve compound properties from the MPD to calculate 5 structural
descriptors including:
1. MPD ID
2. Chemical formula
3. Number of elements in compound
4. Band gap in eV
5. Thermodynamic stability in eV
6. Gibbs free energy in eV
'''

# Query Materials Project database for all Li-based compounds
with MPRester(API_KEY) as mpr:
    query = mpr.materials.summary.search(
        elements=["Li"],
        fields=["material_id", "structure", "nelements", "band_gap",
                "energy_above_hull", "formation_energy_per_atom"]
    )

ValueError: Please use a new API key from https://materialsproject.org/api Keys for the new API are 32 characters, whereas keys for the legacy API are 16 characters.

In [None]:
# Process all materials and compute descriptors
data = []
for doc in tqdm(query, total=len(query)):
    try:
        struct = doc.structure
        descriptors = compute_descriptors(struct)
        descriptors.update({
            "material_id": doc.material_id,
            "formula": struct.formula,
            "num_elements": doc.nelements,
            "bandgap": doc.band_gap,
            "energy_above_hull": doc.energy_above_hull,
            "formation_energy_per_atom": doc.formation_energy_per_atom
        })
        data.append(descriptors)
    except Exception as e:
        print(f"Error processing {doc.material_id}: {str(e)}")

In [None]:
# Create DataFrame and save to .csv file
Li_materials_df = pd.DataFrame(data)
#csv = df.to_csv("li_structural_descriptors.csv", index=False)

print(f"Successfully processed {len(data)}/{len(query)} materials")
print(df.head())

**Pre-filtering MPD Materials**

Many of the 20,000+ Li-based materials in the Materials Project Database can be pre-filtered based on the following parameters:

1. **Band gap**: Materials with a band gap lower than 1 eV can conduct electrons too easily and thus would not be effective ion conductors.
2. **Energy above hull**: Materials need to be thermodynamically stable to operate as an effective ion conductor, so $E_{hull}$ should equal 0 eV.
3. **Contains transition metals**: Previous studies have shown that Li-based materials without transition metals typically do not operate as effective ion conductors.

This filtering process decreases the size of our dataset from 21,756 to 452 Li-based materials.

In [None]:
# define list of transition metals
transition_metals = [
    "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn",  # 3rd period
    "Y", "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd",  # 4th period
    "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt", "Au", "Hg",        # 5th period
    "Rf", "Db", "Sg", "Bh", "Hs", "Mt", "Ds", "Rg", "Cn"        # 6th period
]

# filter dataframe of Li-based materials based on parameters listed above
for material_index in range(len(Li_materials_df)):
  if (Li_materials_df.bandgap[material_index] < 1) or \
     (not Li_materials_df.energy_above_hull[material_index] == 0) or \
     (not any(metal in Li_materials_df.formula[material_index] for metal in
         transition_metals)):
      Li_materials_df = Li_materials_df.drop(index=material_index)

In [None]:
# Export values to .csv file
csv = Li_materials_df.to_csv("li_structural_descriptors_filtered.csv", index=False)
from google.colab import files
files.download('li_structural_descriptors_filtered.csv')