In [1]:
import os
import pandas as pd
import numpy as np
import torch
import json
from ase.io import read
from chgnet.model.model import CHGNet
from chgnet.model.dynamics import CHGNetCalculator

# === Config ===
folders = {
    "/home/phanim/harshitrawat/summer/md/mdcifs": "/home/phanim/harshitrawat/summer/temp_1/mdinfo_chgnet_predictions_forces.json",
    "/home/phanim/harshitrawat/summer/md/mdcifs_strained_perturbed": "/home/phanim/harshitrawat/summer/temp_1/strain_perturb_chgnet_predictions_forces.json"
}

device = torch.device("cuda:0")

# Load model manually on CPU
model = CHGNet.load(use_device="cpu", verbose=True)

# Force move to device
model = model.to(device)

# Build calculator manually
calc = CHGNetCalculator(model=model, use_device=device)

def extract_info_from_cif(cif_path):
    try:
        atoms = read(cif_path)
        atoms.calc = calc
        return {
            "file": os.path.basename(cif_path),
            "energy_eV": atoms.get_potential_energy(),
            "forces_per_atom_eV_per_A": atoms.get_forces().tolist(),
            "stress_tensor": atoms.get_stress(voigt=False).tolist(),
            "magmom_total": atoms.get_magnetic_moment() if "magmom" in atoms.arrays else None
        }
    except Exception as e:
        return {"file": os.path.basename(cif_path), "error": str(e)}

# === Run and Save subset ===
for folder, output_json in folders.items():
    print(f"\n📂 Processing folder: {folder}")
    results = []

    cif_files = sorted([f for f in os.listdir(folder) if f.endswith(".cif")])[:5]  # only 5

    for fname in cif_files:
        path = os.path.join(folder, fname)
        result = extract_info_from_cif(path)
        results.append(result)
        print(f"✅ {fname}" if "error" not in result else f"❌ {fname} — {result['error']}")

    os.makedirs(os.path.dirname(output_json), exist_ok=True)
    with open(output_json, "w") as f:
        json.dump(results, f, indent=2)

    print(f"🧾 Saved {len(results)} entries to: {output_json}")


  state = torch.load(path, map_location=torch.device("cpu"))


CHGNet v0.3.0 initialized with 412,525 parameters
CHGNet will run on cpu
CHGNet will run on cuda:0

📂 Processing folder: /home/phanim/harshitrawat/summer/md/mdcifs
✅ cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0000.cif
✅ cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0001.cif
✅ cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0002.cif
✅ cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0003.cif
✅ cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0004.cif
🧾 Saved 5 entries to: /home/phanim/harshitrawat/summer/temp_1/mdinfo_chgnet_predictions_forces.json

📂 Processing folder: /home/phanim/harshitrawat/summer/md/mdcifs_strained_perturbed
✅ cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0022_strain+2_perturbed.cif
✅ cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0022_strain+3_perturbed.cif
✅ cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0022_strain-2_perturbed.cif
✅ cellrelaxed_LLZO_001_Zr_code93_st

In [5]:
import os
import json
from ase.io import read, write
from ase import Atoms

# === Paths ===
json_path = "/home/phanim/harshitrawat/summer/temp_1/mdinfo_chgnet_predictions_forces.json"
base_cif_dir = "/home/phanim/harshitrawat/summer/md/mdcifs"
pert_cif_dir = "/home/phanim/harshitrawat/summer/md/mdcifs_strained_perturbed"
out_path = "/home/phanim/harshitrawat/summer/temp_1/mace_train_sample.extxyz"

# === Load JSON ===
with open(json_path) as f:
    data = json.load(f)

atoms_list = []

for entry in data:
    if "error" in entry:
        continue

    fname = entry["file"]
    cif_path = os.path.join(pert_cif_dir if "perturbed" in fname else base_cif_dir, fname)

    try:
        atoms = read(cif_path)
        forces = entry["forces_per_atom_eV_per_A"]
        if len(forces) != len(atoms):
            print(f"❌ force mismatch in {fname}")
            continue
        import numpy as np
        atoms.arrays["REF_forces"] = np.array(forces)
        atoms.info["REF_energy"] = entry["energy_eV"]
        atoms_list.append(atoms)
    except Exception as e:
        print(f"❌ Failed on {fname}: {e}")

# === Write EXTXYZ ===
if atoms_list:
    os.makedirs(os.path.dirname(out_path), exist_ok=True)
    write(out_path, atoms_list, format="extxyz")
    print(f"✅ Wrote {len(atoms_list)} structures to: {out_path}")
else:
    print("⚠️ No valid structures found")


✅ Wrote 5 structures to: /home/phanim/harshitrawat/summer/temp_1/mace_train_sample.extxyz


In [4]:
!mace_run_train \
  --name mace_T1_finetune \
  --model MACE \
  --train_file /home/phanim/harshitrawat/summer/final_work/T1_chgnet_labeled.extxyz \
  --foundation_model /home/phanim/harshitrawat/summer/mace_models/universal/2024-01-07-mace-128-L2_epoch-199.model \
  --foundation_model_readout \
  --device cuda \
  --batch_size 4 \
  --valid_batch_size 4 \
  --r_max 5.0 \
  --default_dtype float64 \
  --max_num_epochs 300 \
  --forces_weight 100.0 \
  --energy_weight 1.0 \
  --valid_fraction 0.1 \
  --E0s "{3: 0.0, 8: 0.0, 40: 0.0, 57: 0.0}"


  _Jd, _W3j_flat, _W3j_indices = torch.load(os.path.join(os.path.dirname(__file__), 'constants.pt'))
2025-07-22 23:14:13.683 INFO: MACE version: 0.3.13
2025-07-22 23:14:14.163 INFO: CUDA version: 12.6, CUDA device: 0
  model_foundation = torch.load(
2025-07-22 23:14:14.525 INFO: Using foundation model /home/phanim/harshitrawat/summer/mace_models/universal/2024-01-07-mace-128-L2_epoch-199.model as initial checkpoint.
2025-07-22 23:14:14.526 INFO: Using heads: ['Default']
2025-07-22 23:14:14.526 INFO: Using the key specifications to parse data:
2025-07-22 23:14:14.526 INFO: Default: KeySpecification(info_keys={'energy': 'REF_energy', 'stress': 'REF_stress', 'virials': 'REF_virials', 'dipole': 'dipole', 'head': 'head'}, arrays_keys={'forces': 'REF_forces', 'charges': 'REF_charges'})
2025-07-22 23:14:14.559 INFO: Training set 1/1 [energy: 5, stress: 0, virials: 0, dipole components: 0, head: 5, forces: 5, charges: 0]
2025-07-22 23:14:14.559 INFO: Total Training set [energy: 5, stress: 0, v

In [12]:
import os
from ase.io import read
from mace.calculators import MACECalculator

# === Config ===
extxyz_path = "/home/phanim/harshitrawat/summer/temp_1/mace_train_sample.extxyz"  # Update if needed
model_path = "/home/phanim/harshitrawat/summer/test_temp_1_compiled.model"  # Update if needed

# === Load model ===
calculator = MACECalculator(model_paths=[model_path], device="cuda")

# === Read and predict ===
atoms_list = read(extxyz_path, ":")

print(f"📦 Found {len(atoms_list)} structures")

for i, atoms in enumerate(atoms_list):
    try:
        atoms.calc = calculator
        energy = atoms.get_potential_energy()
        forces = atoms.get_forces()

        print(f"\n✅ Structure {i}")
        print(f"Total Energy (eV): {energy:.6f}")
        print(f"Forces (eV/Å):\n{forces}")

    except Exception as e:
        print(f"❌ Structure {i} failed: {e}")


  torch.load(f=model_path, map_location=device)


Using head Default out of ['Default']
No dtype selected, switching to float64 to match model dtype.
📦 Found 5 structures

✅ Structure 0
Total Energy (eV): -2818.687335
Forces (eV/Å):
[[-0.07087808  0.05581734  0.1615765 ]
 [-0.14862033  0.21109401  0.41290478]
 [ 0.26797158  0.03796886  0.13902917]
 ...
 [-0.04536701 -0.11729197  0.41627157]
 [-0.02077799 -0.02988945  0.0052728 ]
 [-0.00712466 -0.18346801  0.01606639]]

✅ Structure 1
Total Energy (eV): -2817.274109
Forces (eV/Å):
[[ 0.11475117  0.16171285  0.08611563]
 [-0.24241879  0.2127288   0.45683261]
 [ 0.19567496 -0.03610636  0.06928247]
 ...
 [-0.15334908 -0.1383471   0.50806271]
 [-0.01979897 -0.12550196 -0.14312563]
 [-0.03887371 -0.49446553 -0.10857664]]

✅ Structure 2
Total Energy (eV): -2813.713681
Forces (eV/Å):
[[ 0.27427625  0.27146415  0.03127643]
 [-0.32018665  0.21810729  0.49394328]
 [ 0.1302428  -0.10046142  0.00706694]
 ...
 [-0.3024747  -0.17931495  0.56872505]
 [-0.00724553 -0.26390154 -0.35058655]
 [-0.05626525

In [15]:
import json
from ase.io import read
from mace.calculators import MACECalculator
import os

# === Config ===
model_path = "/home/phanim/harshitrawat/summer/test_temp_1_compiled.model"
input_extxyz = "/home/phanim/harshitrawat/summer/temp_1/mace_train_sample.extxyz"
output_jsonl = "/home/phanim/harshitrawat/summer/temp_1/mace_predictions.jsonl"
device = "cuda"

# === Load model ===
calc = MACECalculator(model_paths=model_path, device=device)

# === Predict ===
results = []

for atoms in read(input_extxyz, index=":"):
    atoms.calc = calc

    # Extract filename info if available
    fname = atoms.info.get("file", None)
    if fname is None:
        # Try to infer it from atoms.info["filename"], atoms.info["source"], or use a fallback
        fname = atoms.info.get("source", "unknown.cif")
    fname = os.path.basename(fname)

    try:
        energy = atoms.get_potential_energy()
        forces = atoms.get_forces()

        results.append({
            "file": fname,
            "energy_eV_mace": energy,
            "forces_per_atom_eV_per_A_mace": forces.tolist(),
        })

    except Exception as e:
        results.append({
            "file": fname,
            "error": str(e),
        })

# === Save as JSONL ===
with open(output_jsonl, "w") as f:
    for entry in results:
        f.write(json.dumps(entry) + "\n")

print(f"✅ Wrote {len(results)} entries to: {output_jsonl}")

  torch.load(f=model_path, map_location=device)


Using head Default out of ['Default']
No dtype selected, switching to float64 to match model dtype.
✅ Wrote 5 entries to: /home/phanim/harshitrawat/summer/temp_1/mace_predictions.jsonl


In [16]:
import json
import pandas as pd
import numpy as np

# === Paths ===
chgnet_json = "/home/phanim/harshitrawat/summer/temp_1/mdinfo_chgnet_predictions_forces.json"
mace_jsonl = "/home/phanim/harshitrawat/summer/temp_1/mace_predictions.jsonl"

# === Load CHGNet (standard JSON) ===
with open(chgnet_json, "r") as f:
    chgnet_data = json.load(f)
chgnet_dict = {entry["file"]: entry for entry in chgnet_data}

# === Load MACE (JSONL) ===
mace_dict = {}
with open(mace_jsonl, "r") as f:
    for line in f:
        entry = json.loads(line)
        mace_dict[entry["file"]] = entry

# === Compare ===
records = []
for fname in sorted(set(chgnet_dict) & set(mace_dict)):
    try:
        chg = chgnet_dict[fname]
        mace = mace_dict[fname]

        # Compare energy
        e_chg = chg["energy_eV"]
        e_mace = mace["energy_eV"]
        e_diff = e_mace - e_chg

        # Compare forces (L2 norm per atom)
        f_chg = np.array(json.loads(chg["forces_per_atom_eV_per_A"]))
        f_mace = np.array(mace["forces_per_atom_eV_per_A"])

        if f_chg.shape != f_mace.shape:
            continue

        f_diff = np.linalg.norm(f_mace - f_chg, axis=1).mean()

        records.append({
            "file": fname,
            "energy_diff_eV": e_diff,
            "mean_force_diff_eV_per_A": f_diff
        })
    except Exception as e:
        print(f"Skipping {fname}: {e}")

# === Output DataFrame ===
df_diff = pd.DataFrame(records)
if not df_diff.empty:
    df_diff = df_diff.sort_values("energy_diff_eV", key=lambda x: np.abs(x))
else:
    print("⚠️ No matching structures found or records is empty.")

# Display if records present
import ace_tools as tools; tools.display_dataframe_to_user(name="CHGNet vs MACE Comparison", dataframe=df_diff)


⚠️ No matching structures found or records is empty.


ModuleNotFoundError: No module named 'ace_tools'

In [18]:
print("CHGNet entries:", len(chgnet_dict))
print("MACE entries:", len(mace_dict))
print("Common files:", len(set(chgnet_dict) & set(mace_dict)))
print("Sample mismatch:")
print(set(chgnet_dict).difference(mace_dict))


CHGNet entries: 5
MACE entries: 1
Common files: 0
Sample mismatch:
{'cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0003.cif', 'cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0002.cif', 'cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0000.cif', 'cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0001.cif', 'cellrelaxed_LLZO_001_Zr_code93_sto__Li_100_slab_heavy_T300_0004.cif'}


In [22]:
with open("/home/phanim/harshitrawat/summer/temp_1/mdinfo_chgnet_predictions_forces.json") as f:
    for line in f:
        entry = json.loads(line)
        print(entry["file"])


JSONDecodeError: Expecting value: line 2 column 1 (char 2)

In [21]:
import json, glob, os, pandas as pd

jsonl_path = "/home/phanim/harshitrawat/summer/temp_1/mace_predictions.jsonl"
preds = [json.loads(l) for l in open(jsonl_path)]

# If the JSONL order is the same as the original CIF list --------------------
cif_list = sorted(glob.glob("/home/phanim/harshitrawat/summer/temp_1/*.cif"))
for entry, cif in zip(preds, cif_list):
    entry["file"] = os.path.basename(cif)

pred_df = pd.DataFrame(preds)               # <— now has correct “file” column
print(pred_df)

          file  energy_eV_mace  \
0  unknown.cif    -2818.687335   
1  unknown.cif    -2817.274109   
2  unknown.cif    -2813.713681   
3  unknown.cif    -2809.430981   
4  unknown.cif    -2805.959857   

                       forces_per_atom_eV_per_A_mace  
0  [[-0.07087807630609763, 0.055817341744374124, ...  
1  [[0.11475117245601918, 0.16171285451060896, 0....  
2  [[0.2742762513175856, 0.2714641523806318, 0.03...  
3  [[0.3573762841545923, 0.36315809898278406, 0.0...  
4  [[0.35334470923256306, 0.4212216192706316, 0.0...  
