In [None]:
# Sublattice Generator: generates sublattices from the 2DMatpedia.json database. Sublattices are split based on a per element basis.
# The sublattices are saved into an extended xyz structure file containing number of atoms, lattice vectors, PBCs and atom types with positions.
# Naming: each file is saved in the form 2dm-{ID}_{Element}.xyz


import json
import os
from pymatgen.core import Structure

# Name of material database file to draw lattices from
INPUT_FILE = "Materials/2DMatpedia.json"
# Name of folder where sublattice files will be placed
OUTPUT_FOLDER = "Materials/2DMatpedia Sublattices xyz"

# 1. Function that splits a lattice into a sublattice of a specific element
def truncate_structure(struct, element_to_keep):
    old_dict = struct.as_dict()
    new_sites = []
    for site in old_dict["sites"]:
        if "species" in site and len(site["species"]) > 0:
            sp_elem = site["species"][0]["element"]
            if sp_elem == element_to_keep:
                new_sites.append(site)
    old_dict["sites"] = new_sites
    new_struct = struct.__class__.from_dict(old_dict)
    return new_struct

# 2. Function that creates an extended .xyz structure file for a lattice
def write_extended_xyz(struct, filename, pbc="T T F"):
    nsites = len(struct)
    lattice = struct.lattice.matrix
    lat_flat = [
        lattice[0][0], lattice[0][1], lattice[0][2],
        lattice[1][0], lattice[1][1], lattice[1][2],
        lattice[2][0], lattice[2][1], lattice[2][2]
    ]
    
    lattice_str = " ".join(f"{v:.8f}" for v in lat_flat)
    line2 = f'Lattice="{lattice_str}" Properties=species:S:1:pos:R:3 pbc="{pbc}"'
    
    lines_coords = []
    for site in struct.sites:
        elem_label = site.species_string
        x, y, z = site.coords
        lines_coords.append(f"{elem_label:<3} {x:15.8f} {y:15.8f} {z:15.8f}")
    
    with open(filename, "w") as f:
        f.write(f"{nsites}\n")
        f.write(line2 + "\n")
        for line in lines_coords:
            f.write(line + "\n")

# 3. Generates the sublattice structure files
os.makedirs(OUTPUT_FOLDER, exist_ok=True)

materials_data = []
with open(INPUT_FILE, "r") as f:
    for line in f:
        line = line.strip()
        if not line:
            continue
        try:
            material_entry = json.loads(line)
            materials_data.append(material_entry)
        except json.JSONDecodeError as e:
            print(f"Skipping line due to JSON error: {e}")

for mat_data in materials_data:
    if "structure" not in mat_data:
        continue
    structure_dict = mat_data["structure"]
    try:
        full_struct = Structure.from_dict(structure_dict)
    except Exception as e:
        print(f"Error reading structure: {e}")
        continue

    mat_id = mat_data.get("material_id", "no_id")

    all_elems = list({str(sp) for sp in full_struct.species})
    for elem in all_elems:
        truncated = truncate_structure(full_struct, elem)
        if len(truncated) == 0:
            continue
        fname = f"{mat_id}_{elem}.xyz"
        out_path = os.path.join(OUTPUT_FOLDER, fname)
        write_extended_xyz(truncated, out_path)
        print(f"Saved sublattice {elem} for {mat_id} => {fname}")

In [None]:
# Alternate Sublattice Generator: generates sublattices from C2DB

import os
from ase.db import connect
from ase import Atoms

# Path to your local c2db.db file
INPUT_FILE = "Materials/c2db.db"
# Output folder for sublattice files
OUTPUT_FOLDER = "Materials/c2db Sublattices xyz"

# 1. Function that splits a lattice into a sublattice of a specific element
def truncate_structure(atoms: Atoms, element: str) -> Atoms:
    """
    Given an ASE Atoms object, return a new Atoms object
    containing only the atoms of the specified element.
    The cell and PBC settings are preserved.
    """
    indices = [i for i, sym in enumerate(atoms.get_chemical_symbols()) if sym == element]
    if not indices:
        return None  # Return None if no atoms of that element
    sub_atoms = atoms[indices]
    # Preserve cell & PBC
    sub_atoms.set_cell(atoms.cell)
    sub_atoms.set_pbc(atoms.pbc)
    return sub_atoms


# 2. Function that creates an extended .xyz structure file for a lattice
def write_extended_xyz(atoms: Atoms, filename: str, pbc: str = "T T F"):
    """
    Writes an extended .xyz file with:
      1) Number of atoms
      2) "Lattice=..." line with cell vectors and pbc
      3) Cartesian coordinates
    """
    n_atoms = len(atoms)
    cell = atoms.cell

    # Flatten the 3x3 cell matrix into 9 numbers
    # ASE’s cell is a 3x3 array: cell[a][b]
    lat_flat = [
        cell[0][0], cell[0][1], cell[0][2],
        cell[1][0], cell[1][1], cell[1][2],
        cell[2][0], cell[2][1], cell[2][2]
    ]
    
    # Build the Lattice=... line
    lattice_str = " ".join(f"{v:.8f}" for v in lat_flat)
    line2 = f'Lattice="{lattice_str}" Properties=species:S:1:pos:R:3 pbc="{pbc}"'
    
    # Gather atom lines
    lines_coords = []
    symbols = atoms.get_chemical_symbols()
    positions = atoms.get_positions()
    
    for sym, pos in zip(symbols, positions):
        x, y, z = pos
        lines_coords.append(f"{sym:<3} {x:15.8f} {y:15.8f} {z:15.8f}")
    
    # Write the file
    with open(filename, "w") as f:
        f.write(f"{n_atoms}\n")      # 1st line: number of atoms
        f.write(line2 + "\n")        # 2nd line: Lattice=...
        for line in lines_coords:
            f.write(line + "\n")

# 3. Generates the sublattice structure files
os.makedirs(OUTPUT_FOLDER, exist_ok=True)

db = connect(INPUT_FILE)

# Loop over each row in the database
for row in db.select():
    # Convert the row to an ASE Atoms object
    atoms = row.toatoms()

    # Retrieve an identifier for naming files
    mat_id = row.uid if hasattr(row, 'uid') and row.uid else row.name

    # Get list of unique element symbols
    elements = set(atoms.get_chemical_symbols())

    for elem in elements:
        sub_atoms = truncate_structure(atoms, elem)
        if sub_atoms is None or len(sub_atoms) == 0:
            # No atoms of this element, skip
            continue
        
        # Construct output filename
        fname = f"{mat_id}_{elem}.xyz"
        out_path = os.path.join(OUTPUT_FOLDER, fname)

        # Write out the sublattice in extended xyz
        write_extended_xyz(sub_atoms, out_path, pbc="T T F")

        print(f"Saved sublattice {elem} for {mat_id} => {fname}")