In [1]:
import os
import json
import numpy as np
import pandas as pd
from ase.io import read, write
from mace.calculators import MACECalculator
from chgnet.model.model import CHGNet
from chgnet.model.dynamics import CHGNetCalculator
import torch


  _Jd, _W3j_flat, _W3j_indices = torch.load(os.path.join(os.path.dirname(__file__), 'constants.pt'))


In [None]:
import os
import json
from ase.io import read
from chgnet.model.model import CHGNet
from chgnet.model.dynamics import CHGNetCalculator
import torch

# === Setup ===
folders = {
    "/home/phanim/harshitrawat/summer/md/mdcifs": "/home/phanim/harshitrawat/summer/final_work/mdinfo_chgnet_predictions_forces.json",
    "/home/phanim/harshitrawat/summer/md/mdcifs_strained_perturbed": "/home/phanim/harshitrawat/summer/final_work/strain_perturb_chgnet_predictions_forces.json"
}

device = torch.device("cuda:0")  # or whichever MIG slice is active

# === Load CHGNet ===
model = CHGNet.load(use_device="cpu", verbose=True)
model = model.to(device)
calc = CHGNetCalculator(model=model, use_device=device)

def extract_info_from_cif(cif_path):
    try:
        atoms = read(cif_path)
        atoms.calc = calc
        return {
            "file": os.path.basename(cif_path),
            "energy_eV": atoms.get_potential_energy(),
            "forces_per_atom_eV_per_A": atoms.get_forces().tolist(),
            "stress_tensor": atoms.get_stress(voigt=False).tolist(),
            "magmom_total": atoms.get_magnetic_moment() if "magmom" in atoms.arrays else None
        }
    except Exception as e:
        return {
            "file": os.path.basename(cif_path),
            "error": str(e)
        }

# === Label and Save ===
for folder, out_json in folders.items():
    print(f"\n📂 Labeling: {folder}")
    results = []
    cif_files = sorted([f for f in os.listdir(folder) if f.endswith(".cif")])

    for i, fname in enumerate(cif_files):
        full_path = os.path.join(folder, fname)
        result = extract_info_from_cif(full_path)
        results.append(result)
        if "error" in result:
            print(f"❌ {fname} — {result['error']}")
        else:
            print(f"✅ {i+1}/{len(cif_files)} — {fname}")

    os.makedirs(os.path.dirname(out_json), exist_ok=True)
    with open(out_json, "w") as f:
        json.dump(results, f, indent=2)

    print(f"🧾 Saved {len(results)} entries to: {out_json}")


  state = torch.load(path, map_location=torch.device("cpu"))


CHGNet v0.3.0 initialized with 412,525 parameters
CHGNet will run on cpu
CHGNet will run on cuda:0

📂 Labeling: /home/phanim/harshitrawat/summer/md/mdcifs
✅ 1/6030 — cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0000.cif
✅ 2/6030 — cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0001.cif
✅ 3/6030 — cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0002.cif
✅ 4/6030 — cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0003.cif
✅ 5/6030 — cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0004.cif
✅ 6/6030 — cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0005.cif
✅ 7/6030 — cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0006.cif
✅ 8/6030 — cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0007.cif
✅ 9/6030 — cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0008.cif
✅ 10/6030 — cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0009.cif
✅ 11/6030 — cellrelaxed_LLZO_001_Zr_code93_sto__Li_100

In [None]:
import os, json, numpy as np
import pandas as pd
from ase.io import read, write
from sklearn.model_selection import train_test_split

# === Paths ===
json_paths = [
    "/home/phanim/harshitrawat/summer/final_work/mdinfo_chgnet_predictions_forces.json",
    "/home/phanim/harshitrawat/summer/final_work/strain_perturb_chgnet_predictions_forces.json"
]
base_cif_dir = "/home/phanim/harshitrawat/summer/md/mdcifs"
pert_cif_dir = "/home/phanim/harshitrawat/summer/md/mdcifs_strained_perturbed"
out_folder = "/home/phanim/harshitrawat/summer/final_work"

# === Load JSON and match with CIFs ===
entries = []
for path in json_paths:
    with open(path) as f:
        entries.extend(json.load(f))

entries = [e for e in entries if "error" not in e]

# === Split into T1 and T2 ===
train_entries, val_entries = train_test_split(entries, test_size=0.1, random_state=42)

def make_extxyz(entries, outfile):
    atoms_list = []
    for entry in entries:
        fname = entry["file"]
        cif_path = os.path.join(pert_cif_dir if "perturbed" in fname else base_cif_dir, fname)

        try:
            atoms = read(cif_path)
            atoms.info["REF_energy"] = entry["energy_eV"]
            atoms.arrays["REF_forces"] = np.array(entry["forces_per_atom_eV_per_A"])
            atoms.info["file"] = fname
            atoms_list.append(atoms)
        except Exception as e:
            print(f"❌ Failed on {fname}: {e}")

    write(outfile, atoms_list, format="extxyz", write_info=True)
    print(f"✅ Wrote {len(atoms_list)} to: {outfile}")



In [None]:

# === Write EXTXYZs ===
make_extxyz(train_entries, os.path.join(out_folder, "T1_chgnet_labeled.extxyz"))
make_extxyz(val_entries, os.path.join(out_folder, "T2_chgnet_labeled.extxyz"))



In [None]:
# === Save splits as Excel ===
pd.DataFrame(train_entries).to_excel(os.path.join(out_folder, "T1_split.xlsx"), index=False)
pd.DataFrame(val_entries).to_excel(os.path.join(out_folder, "T2_split.xlsx"), index=False)

In [None]:
!mace_run_train \
  --name mace_T1_finetune \
  --model MACE \
  --train_file /home/phanim/harshitrawat/summer/final_work/T1_chgnet_labeled.extxyz \
  --foundation_model /home/phanim/harshitrawat/summer/mace_models/universal/2024-01-07-mace-128-L2_epoch-199.model \
  --foundation_model_readout \
  --device cuda \
  --batch_size 4 \
  --valid_batch_size 4 \
  --r_max 5.0 \
  --default_dtype float64 \
  --max_num_epochs 300 \
  --forces_weight 100.0 \
  --energy_weight 1.0 \
  --valid_fraction 0.1 \
  --E0s "{3: 0.0, 8: 0.0, 40: 0.0, 57: 0.0}"
