In [12]:
## To get individial CIF files

import re
from pathlib import Path


CIF_DIR = Path(".")   


def get_formula(text):
    for line in text.splitlines():
        line = line.strip()
        if line.startswith("_chemical_formula_sum"):
            return (
                line.split(None, 1)[1]
                .replace("'", "")
                .replace('"', "")
                .replace(" ", "")
            )
    return None

def sanitize(name):
    return re.sub(r"[^\w\.\-]", "", name)

for cif_file in CIF_DIR.glob("*.cif"):
    text = cif_file.read_text(errors="ignore")

    # Split on real data_ blocks
    blocks = re.split(r"(?=^data_)", text, flags=re.MULTILINE)
    blocks = [b for b in blocks if b.strip().startswith("data_")]


    if len(blocks) <= 1:
        continue

    print(f"\nProcessing batch CIF: {cif_file.name}")
    print(f"  Found {len(blocks)} entries")

    for i, block in enumerate(blocks, 1):
        formula = get_formula(block)
        if not formula:
            formula = f"{cif_file.stem}_entry{i}"

        formula = sanitize(formula)
        out_path = cif_file.with_name(f"{formula}.cif")

        # Avoid overwrite
        counter = 1
        while out_path.exists():
            out_path = cif_file.with_name(f"{formula}_{counter}.cif")
            counter += 1

        out_path.write_text(block.strip() + "\n")
        print(f"    → wrote {out_path.name}")




Processing batch CIF: A10.cif
  Found 3 entries
    → wrote Al2.5Ga2.5Gd3O12_2.cif
    → wrote Al2Ga3Gd3O12_2.cif
    → wrote Al3Ga2Gd3O12_2.cif

Processing batch CIF: A12.cif
  Found 12 entries
    → wrote Al2.5Ga2.5O12Y3_6.cif
    → wrote Al2.5Ga2.5O12Y3_7.cif
    → wrote Al2.5Ga2.5O12Y3_8.cif
    → wrote Al4GaO12Y3_2.cif
    → wrote Al3Ga2O12Y3_2.cif
    → wrote Al2Ga3O12Y3_4.cif
    → wrote AlGa4O12Y3_2.cif
    → wrote Al3.97Ga1.03O12Y3_2.cif
    → wrote Al3.08Ga1.92O12Y3_2.cif
    → wrote Al2.10Ga2.90O12Y3_2.cif
    → wrote Al0.92Ga4.08O12Y3_2.cif
    → wrote Al2Ga3O12Y3_5.cif

Processing batch CIF: A13.cif
  Found 2 entries
    → wrote Al3Li9O29P8_2.cif
    → wrote AlLiO12P4_2.cif

Processing batch CIF: A14.cif
  Found 5 entries
    → wrote Al10.26Mg0.74O17Sr0.85_2.cif
    → wrote Al9.94Mg1.06O17Sr_4.cif
    → wrote Al9.94Mg1.06O17Sr_5.cif
    → wrote Al22MgO36Sr1.91_2.cif
    → wrote Al10MgO17Sr0.92_2.cif

Processing batch CIF: A15.cif
  Found 2 entries
    → wrote Al4O15Sc2Sr6