In [20]:
import os
import pandas as pd
from random import sample
from ase import io
from ase.io import read, write
from pymatgen.io.ase import AseAtomsAdaptor

# === Config ===
input_dir = "/home/phanim/harshitrawat/summer/md/mdcifs"
cif_out_dir = "/home/phanim/harshitrawat/summer/md/mdcifs_strained_perturbed_prime"
traj_out_dir = "/home/phanim/harshitrawat/summer/md/mdtraj_strained_perturbed_prime"
os.makedirs(cif_out_dir, exist_ok=True)
os.makedirs(traj_out_dir, exist_ok=True)

strain_percents = [-0.015, -0.01, 0.01, 0.015] #changed here...
perturb_amplitude = 0.005 #changed here 
sample_size = 400

# === Sample 400 CIFs ===
all_cifs = [f for f in os.listdir(input_dir) if f.endswith(".cif")]
selected = sample(all_cifs, sample_size)
metadata = []

print(f"🚀 Applying strain + perturbation to {sample_size} CIFs...")

for i, fname in enumerate(selected, 1):
    atoms = read(os.path.join(input_dir, fname))
    base = fname[:-4]

    for eps in strain_percents:
        strained = atoms.copy()
        strained.set_cell(strained.cell * (1 + eps), scale_atoms=True)

        # Convert to pymatgen and apply perturbation
        struct = AseAtomsAdaptor.get_structure(strained)
        struct.perturb(perturb_amplitude)

        # Back to ASE
        perturbed = AseAtomsAdaptor.get_atoms(struct)

        # Save CIF
        cif_name = f"{base}_strain{eps:+.3f}_perturbed.cif"
        io.write(os.path.join(cif_out_dir, cif_name), perturbed)

        # Save TRAJ
        traj_name = cif_name.replace(".cif", ".traj")
        io.write(os.path.join(traj_out_dir, traj_name), perturbed)

        metadata.append({
            "original_file": fname,
            "strained_file": cif_name,
            "traj_file": traj_name,
            "strain_percent": eps,
            "perturb_amplitude": perturb_amplitude
        })

    print(f"[{i}/{sample_size}] ✅ {fname}")

# === Save metadata
excel_path = "/home/phanim/harshitrawat/summer/md/strain_perturb_prime_info.xlsx"
pd.DataFrame(metadata).to_excel(excel_path, index=False)

print("\n✅ Done: All strained + perturbed structures saved.")
print(f"📁 CIFs  → {cif_out_dir}")
print(f"📁 TRAJs → {traj_out_dir}")
print(f"🧾 Info  → {excel_path}")


🚀 Applying strain + perturbation to 400 CIFs...
[1/400] ✅ cellrelaxed_LLZO_010_Li_order0_off__Li_100_slab_heavy_T300_0037.cif
[2/400] ✅ cellrelaxed_LLZO_010_Li_order4_off__Li_100_slab_heavy_T300_0175.cif
[3/400] ✅ cellrelaxed_LLZO_010_Li_order4_off__Li_110_slab_heavy_T450_0080.cif
[4/400] ✅ cellrelaxed_LLZO_001_Zr_code93_sto__Li_110_slab_heavy_T450_0018.cif
[5/400] ✅ cellrelaxed_LLZO_010_Li_order4_off__Li_111_slab_heavy_T450_0125.cif
[6/400] ✅ cellrelaxed_LLZO_010_Li_order0_off__Li_100_slab_heavy_T300_0001.cif
[7/400] ✅ cellrelaxed_LLZO_001_Zr_code93_sto__Li_111_slab_heavy_T450_0010.cif
[8/400] ✅ cellrelaxed_LLZO_010_La_order0_off__Li_110_slab_heavy_T450_0091.cif
[9/400] ✅ cellrelaxed_LLZO_010_La_order0_off__Li_110_slab_heavy_T300_0016.cif
[10/400] ✅ cellrelaxed_LLZO_010_Li_order0_off__Li_110_slab_heavy_T300_0083.cif
[11/400] ✅ cellrelaxed_LLZO_010_La_order0_off__Li_111_slab_heavy_T300_0010.cif
[12/400] ✅ cellrelaxed_LLZO_010_Li_order4_off__Li_110_slab_heavy_T450_0023.cif
[13/400] ✅ ce

In [21]:
import os
import json
from ase.io import read
from chgnet.model.model import CHGNet
from chgnet.model.dynamics import CHGNetCalculator
import torch

# === Setup ===
folders = {
    "/home/phanim/harshitrawat/summer/md/mdcifs_strained_perturbed_prime": "/home/phanim/harshitrawat/summer/final_work/strain_perturb_primechgnet_predictions_forces.json"
}

device = torch.device("cuda:0")  # or whichever MIG slice is active

# === Load CHGNet ===
model = CHGNet.load(use_device="cpu", verbose=True)
model = model.to(device)
calc = CHGNetCalculator(model=model, use_device=device)

def extract_info_from_cif(cif_path):
    try:
        atoms = read(cif_path)
        atoms.calc = calc
        return {
            "file": os.path.basename(cif_path),
            "energy_eV": atoms.get_potential_energy(),
            "forces_per_atom_eV_per_A": atoms.get_forces().tolist(),
            "stress_tensor": atoms.get_stress(voigt=False).tolist(),
            "magmom_total": atoms.get_magnetic_moment() if "magmom" in atoms.arrays else None
        }
    except Exception as e:
        return {
            "file": os.path.basename(cif_path),
            "error": str(e)
        }

# === Label and Save ===
for folder, out_json in folders.items():
    print(f"\n📂 Labeling: {folder}")
    results = []
    cif_files = sorted([f for f in os.listdir(folder) if f.endswith(".cif")])

    for i, fname in enumerate(cif_files):
        full_path = os.path.join(folder, fname)
        result = extract_info_from_cif(full_path)
        results.append(result)
        if "error" in result:
            print(f"❌ {fname} — {result['error']}")
        else:
            print(f"✅ {i+1}/{len(cif_files)} — {fname}")

    os.makedirs(os.path.dirname(out_json), exist_ok=True)
    with open(out_json, "w") as f:
        json.dump(results, f, indent=2)

    print(f"🧾 Saved {len(results)} entries to: {out_json}")


CHGNet v0.3.0 initialized with 412,525 parameters
CHGNet will run on cpu
CHGNet will run on cuda:0

📂 Labeling: /home/phanim/harshitrawat/summer/md/mdcifs_strained_perturbed_prime
✅ 1/1612 — cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0006_strain+0.010_perturbed.cif
✅ 2/1612 — cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0006_strain+0.015_perturbed.cif
✅ 3/1612 — cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0006_strain-0.010_perturbed.cif
✅ 4/1612 — cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0006_strain-0.015_perturbed.cif
✅ 5/1612 — cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0007_strain+0.010_perturbed.cif
✅ 6/1612 — cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0007_strain+0.015_perturbed.cif
✅ 7/1612 — cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0007_strain-0.010_perturbed.cif
✅ 8/1612 — cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0007_strain-0.015_perturbed.cif
✅ 9/

In [22]:
import os, json, numpy as np
import pandas as pd
from ase.io import read, write
from sklearn.model_selection import train_test_split

# === Paths ===
json_paths = [
    "/home/phanim/harshitrawat/summer/final_work/strain_perturb_primechgnet_predictions_forces.json"
]
base_cif_dir = "/home/phanim/harshitrawat/summer/md/mdcifs"
pert_cif_dir = "/home/phanim/harshitrawat/summer/md/mdcifs_strained_perturbed_prime"
out_folder = "/home/phanim/harshitrawat/summer/final_work"

# === Load JSON and match with CIFs ===
entries = []
for path in json_paths:
    with open(path) as f:
        entries.extend(json.load(f))

entries = [e for e in entries if "error" not in e]

def make_extxyz(entries, outfile):
    print("extxyz process started")
    atoms_list = []
    for entry in entries:
        fname = entry["file"]
        cif_path = os.path.join(pert_cif_dir if "perturbed" in fname else base_cif_dir, fname)

        try:
            atoms = read(cif_path)
            atoms.info["REF_energy"] = entry["energy_eV"]
            atoms.arrays["REF_forces"] = np.array(entry["forces_per_atom_eV_per_A"])
            atoms.info["file"] = fname
            atoms_list.append(atoms)
        except Exception as e:
            print(f"❌ Failed on {fname}: {e}")

    write(outfile, atoms_list, format="extxyz", write_info=True)
    print(f"✅ Wrote {len(atoms_list)} to: {outfile}")



In [23]:
make_extxyz(entries, os.path.join(out_folder, "T3_chgnet_labeled.extxyz"))


extxyz process started
✅ Wrote 1612 to: /home/phanim/harshitrawat/summer/final_work/T3_chgnet_labeled.extxyz
