In [1]:
# ============================================================================
# MATTERSIM STRUCTURE RELAXATION PIPELINE
# ============================================================================
# This notebook uses MatterSim to:
# 1. Compute single-point energies for generated crystal structures
# 2. Perform geometry relaxation to find energy minima
# 3. Export results for further analysis
#
# Requirements: Google Colab with GPU runtime (Runtime > Change runtime type > GPU)
# ============================================================================

In [2]:
# ============================================================================
# SECTION 1: ENVIRONMENT SETUP
# ============================================================================
# Install system dependencies and clone the MatterSim repository
# Note: We rename the repo to avoid package name conflicts
# ============================================================================

# Install git-lfs for downloading large model files
!apt-get -y -qq install git-lfs > /dev/null 2>&1

# Remove any existing installation to ensure clean state
!rm -rf mattersim_repo

# Clone MatterSim repo with a distinct name to avoid import conflicts
!git clone https://github.com/microsoft/mattersim.git mattersim_repo

%cd /content/mattersim_repo

# Initialize git-lfs and download pretrained model weights
!git lfs install
!git lfs pull -I "pretrained_models/*" --exclude=""

# Install the package in editable mode along with required dependencies
# -q flag suppresses verbose output
!pip install -q -e .
!pip install -q ase tqdm

print("✓ Environment setup complete")

Cloning into 'mattersim_repo'...
remote: Enumerating objects: 1515, done.[K
remote: Counting objects: 100% (317/317), done.[K
remote: Compressing objects: 100% (152/152), done.[K
remote: Total 1515 (delta 243), reused 165 (delta 165), pack-reused 1198 (from 2)[K
Receiving objects: 100% (1515/1515), 132.83 MiB | 17.82 MiB/s, done.
Resolving deltas: 100% (729/729), done.
/content/mattersim_repo
Updated git hooks.
Git LFS initialized.
  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.5/88.5 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.7/63.7 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m

In [3]:
# ============================================================================
# SECTION 2: VERIFY INSTALLATION AND IMPORTS
# ============================================================================
# Import required packages and verify MatterSim is accessible
# If import fails, we explicitly add the source directory to Python's path
# ============================================================================

import sys
from pathlib import Path
import torch

# Define paths
REPO_ROOT = Path("/content/mattersim_repo")
SRC_DIR = REPO_ROOT / "src"

# Verify repo exists
if not REPO_ROOT.exists():
    raise RuntimeError(
        f"Repository not found at {REPO_ROOT}. "
        "Please run Section 1 first."
    )

# Add source directory to Python path to ensure imports work
if str(SRC_DIR) not in sys.path:
    sys.path.insert(0, str(SRC_DIR))

# Import MatterSim components
try:
    import mattersim
    from mattersim.forcefield import MatterSimCalculator
    print(f"✓ MatterSim imported from: {mattersim.__file__}")
except ImportError as e:
    raise ImportError(
        "Failed to import mattersim. Check that Section 1 completed successfully."
    ) from e

# Import other required packages
from ase.io import read, write
from ase.optimize import FIRE
import pandas as pd
from tqdm import tqdm

# Verify pretrained models exist
PRETRAINED_DIR = REPO_ROOT / "pretrained_models"
model_files = sorted(PRETRAINED_DIR.glob("*.pth"))

if not model_files:
    raise RuntimeError(
        f"No pretrained model files found in {PRETRAINED_DIR}. "
        "Check that git-lfs pull succeeded."
    )

print("\nAvailable models:")
for model in model_files:
    print(f"  - {model.name}")

# Select model (prefer 1M for speed, fall back to first available)
MODEL_PATH = None
for model in model_files:
    if "1M" in model.name:
        MODEL_PATH = model
        break
if MODEL_PATH is None:
    MODEL_PATH = model_files[0]

print(f"\n✓ Using model: {MODEL_PATH.name}")

# Check GPU availability
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"✓ Computing device: {DEVICE}")
if DEVICE == "cpu":
    print("  ⚠ Warning: GPU not detected. Calculations will be slow.")
    print("  Consider: Runtime > Change runtime type > Hardware accelerator > GPU")

✓ MatterSim imported from: /content/mattersim_repo/src/mattersim/__init__.py

Available models:
  - mattersim-v1.0.0-1M.pth
  - mattersim-v1.0.0-5M.pth

✓ Using model: mattersim-v1.0.0-1M.pth
✓ Computing device: cuda


In [4]:
# ============================================================================
# SECTION 3: UPLOAD AND EXTRACT STRUCTURES
# ============================================================================
# Upload the combined_cif.zip file containing your generated structures
# Expected structure: combined_cif.zip contains a folder combined_cif/
#                     which contains struct_0001.cif, struct_0002.cif, etc.
# ============================================================================

from google.colab import files
uploaded = files.upload()

# Extract Files
!unzip combined_cif.zip -d combined_cif
!ls combined_cif | head

Saving combined_cif.zip to combined_cif.zip
Archive:  combined_cif.zip
   creating: combined_cif/combined_cif/
  inflating: combined_cif/combined_cif/struct_0001.cif  
  inflating: combined_cif/combined_cif/struct_0002.cif  
  inflating: combined_cif/combined_cif/struct_0003.cif  
  inflating: combined_cif/combined_cif/struct_0004.cif  
  inflating: combined_cif/combined_cif/struct_0005.cif  
  inflating: combined_cif/combined_cif/struct_0006.cif  
  inflating: combined_cif/combined_cif/struct_0007.cif  
  inflating: combined_cif/combined_cif/struct_0008.cif  
  inflating: combined_cif/combined_cif/struct_0009.cif  
  inflating: combined_cif/combined_cif/struct_0010.cif  
  inflating: combined_cif/combined_cif/struct_0011.cif  
  inflating: combined_cif/combined_cif/struct_0012.cif  
  inflating: combined_cif/combined_cif/struct_0013.cif  
  inflating: combined_cif/combined_cif/struct_0014.cif  
  inflating: combined_cif/combined_cif/struct_0015.cif  
  inflating: combined_cif/combined

In [5]:
# Define CIF directory path
# Adjust this if your zip has a different internal structure
CIF_DIR = Path("/content/mattersim_repo/combined_cif/combined_cif")

if not CIF_DIR.exists():
    raise RuntimeError(
        f"Expected CIF directory at {CIF_DIR} but it doesn't exist. "
        f"Check the internal structure of your zip file."
    )

# Count and display sample files
cif_files = sorted(CIF_DIR.glob("*.cif"))
print(f"\n✓ Found {len(cif_files)} CIF files")
print("\nFirst 5 files:")
for cif in cif_files[:5]:
    print(f"  - {cif.name}")


✓ Found 800 CIF files

First 5 files:
  - struct_0001.cif
  - struct_0002.cif
  - struct_0003.cif
  - struct_0004.cif
  - struct_0005.cif


In [6]:
# ============================================================================
# SECTION 4: INITIALIZE CALCULATOR
# ============================================================================
# Create a single MatterSimCalculator instance that will be reused for all
# structures. This is much more efficient than creating a new calculator
# for each structure.
# ============================================================================

print("Initializing MatterSim calculator...")
print(f"  Model: {MODEL_PATH.name}")
print(f"  Device: {DEVICE}")

calc = MatterSimCalculator(
    load_path=str(MODEL_PATH),
    device=DEVICE
)

print("✓ Calculator initialized")

Initializing MatterSim calculator...
  Model: mattersim-v1.0.0-1M.pth
  Device: cuda
[32m2025-11-19 18:45:10.704[0m | [1mINFO    [0m | [36mmattersim.forcefield.potential[0m:[36mfrom_checkpoint[0m:[36m893[0m - [1mLoading the model from /content/mattersim_repo/pretrained_models/mattersim-v1.0.0-1M.pth[0m
✓ Calculator initialized


In [7]:
# ============================================================================
# SECTION 5: SINGLE-STRUCTURE TEST
# ============================================================================
# Before processing all structures, test the pipeline on a single structure
# to verify everything works correctly
# ============================================================================

print("Testing pipeline with struct_0001.cif...")

test_cif = CIF_DIR / "struct_0001.cif"
if not test_cif.exists():
    raise RuntimeError(f"Test file {test_cif} not found")

# Read structure
atoms = read(str(test_cif))
print(f"  Atoms: {len(atoms)}")
print(f"  Formula: {atoms.get_chemical_formula()}")

# Attach calculator and compute energy
atoms.calc = calc
energy = atoms.get_potential_energy()
forces = atoms.get_forces()

print(f"  Energy: {energy:.6f} eV")
print(f"  Forces shape: {forces.shape}")
print("✓ Single-structure test passed")

Testing pipeline with struct_0001.cif...
  Atoms: 4
  Formula: BaO2Ti
  Energy: -28.487114 eV
  Forces shape: (4, 3)
✓ Single-structure test passed


In [8]:
# ============================================================================
# SECTION 6: COMPUTE SINGLE-POINT ENERGIES
# ============================================================================
# Calculate the energy of each structure in its as-generated geometry
# This provides a baseline before relaxation
# ============================================================================

# Create output directory
ENERGY_OUT_DIR = Path("/content/mattersim_results")
ENERGY_OUT_DIR.mkdir(parents=True, exist_ok=True)

print(f"Computing single-point energies for {len(cif_files)} structures...")

energy_records = []

for cif_file in tqdm(cif_files, desc="Energy evaluation"):
    try:
        # Read structure
        atoms = read(str(cif_file))

        # Attach calculator
        atoms.calc = calc

        # Compute energy
        energy = atoms.get_potential_energy()

        # Store results
        energy_records.append({
            "file": cif_file.name,
            "energy_eV": energy,
            "energy_per_atom_eV": energy / len(atoms),
            "natoms": len(atoms),
            "formula": atoms.get_chemical_formula(),
        })

    except Exception as e:
        print(f"  ✗ Failed: {cif_file.name} - {e}")
        energy_records.append({
            "file": cif_file.name,
            "error": str(e),
        })

# Save results
energy_df = pd.DataFrame(energy_records)
energy_csv = ENERGY_OUT_DIR / "energy_summary.csv"
energy_df.to_csv(energy_csv, index=False)

print(f"\n✓ Results saved to: {energy_csv}")
print(f"✓ Successfully processed: {energy_df['energy_eV'].notna().sum()}/{len(cif_files)}")

# Display summary statistics
if not energy_df.empty and 'energy_per_atom_eV' in energy_df.columns:
    valid_energies = energy_df['energy_per_atom_eV'].dropna()
    if len(valid_energies) > 0:
        print("\nEnergy statistics (eV/atom):")
        print(f"  Mean:   {valid_energies.mean():.4f}")
        print(f"  Median: {valid_energies.median():.4f}")
        print(f"  Std:    {valid_energies.std():.4f}")
        print(f"  Range:  [{valid_energies.min():.4f}, {valid_energies.max():.4f}]")

energy_df.head(10)

Computing single-point energies for 800 structures...


Energy evaluation: 100%|██████████| 800/800 [00:35<00:00, 22.71it/s]



✓ Results saved to: /content/mattersim_results/energy_summary.csv
✓ Successfully processed: 800/800

Energy statistics (eV/atom):
  Mean:   -7.3290
  Median: -7.4120
  Std:    0.6417
  Range:  [-8.8189, -3.7729]


Unnamed: 0,file,energy_eV,energy_per_atom_eV,natoms,formula
0,struct_0001.cif,-28.487114,-7.121778,4,BaO2Ti
1,struct_0002.cif,-111.921272,-7.461418,15,Ba2O10Ti3
2,struct_0003.cif,-42.632149,-7.105358,6,BaO4Ti
3,struct_0004.cif,-41.544533,-6.924089,6,Ba2O3Ti
4,struct_0005.cif,-69.632904,-7.736989,9,Ba2O5Ti2
5,struct_0006.cif,-130.295517,-8.14347,16,Ba2O10Ti4
6,struct_0007.cif,-41.582397,-6.930399,6,Ba2O3Ti
7,struct_0008.cif,-79.956802,-7.99568,10,Ba2O6Ti2
8,struct_0009.cif,-40.294598,-6.715766,6,BaOTi4
9,struct_0010.cif,-83.25975,-6.938313,12,Ba4O6Ti2


In [9]:
# ============================================================================
# SECTION 7: GEOMETRY RELAXATION
# ============================================================================
# Use the FIRE optimizer to find the nearest local energy minimum for each
# structure. This improves the geometry and provides more realistic energies.
#
# Parameters:
#   fmax: Maximum force threshold (eV/Å) for convergence
#   steps: Maximum number of optimization steps
# ============================================================================

# Create output directory
RELAX_OUT_DIR = Path("/content/mattersim_results_relaxed")
RELAX_OUT_DIR.mkdir(parents=True, exist_ok=True)

print(f"Relaxing {len(cif_files)} structures...")
print("  Convergence criterion: fmax = 0.02 eV/Å")
print("  Maximum steps: 200\n")

relax_records = []

for cif_file in tqdm(cif_files, desc="Relaxation"):
    try:
        # Read structure
        atoms = read(str(cif_file))
        atoms.calc = calc

        # Energy before relaxation
        e_before = atoms.get_potential_energy()

        # Run FIRE optimization
        # logfile=None suppresses per-step output
        optimizer = FIRE(atoms, logfile=None)
        optimizer.run(fmax=0.02, steps=200)

        # Energy after relaxation
        e_after = atoms.get_potential_energy()

        # Save relaxed structure
        relaxed_name = cif_file.stem + "_relaxed.cif"
        relaxed_path = RELAX_OUT_DIR / relaxed_name
        write(str(relaxed_path), atoms)

        # Record results
        relax_records.append({
            "file": cif_file.name,
            "energy_before_eV": e_before,
            "energy_after_eV": e_after,
            "delta_energy_eV": e_after - e_before,
            "energy_per_atom_eV": e_after / len(atoms),
            "natoms": len(atoms),
            "formula": atoms.get_chemical_formula(),
            "relaxed_file": relaxed_name,
        })

    except Exception as e:
        print(f"  ✗ Failed: {cif_file.name} - {e}")
        relax_records.append({
            "file": cif_file.name,
            "error": str(e),
        })

# Save results
relax_df = pd.DataFrame(relax_records)
relax_csv = RELAX_OUT_DIR / "relaxation_summary.csv"
relax_df.to_csv(relax_csv, index=False)

print(f"\n✓ Results saved to: {relax_csv}")
print(f"✓ Successfully relaxed: {relax_df['energy_after_eV'].notna().sum()}/{len(cif_files)}")

# Display summary statistics
if not relax_df.empty and 'delta_energy_eV' in relax_df.columns:
    valid_deltas = relax_df['delta_energy_eV'].dropna()
    if len(valid_deltas) > 0:
        print("\nRelaxation energy change (eV):")
        print(f"  Mean:   {valid_deltas.mean():.4f}")
        print(f"  Median: {valid_deltas.median():.4f}")
        print(f"  Std:    {valid_deltas.std():.4f}")
        print(f"  Range:  [{valid_deltas.min():.4f}, {valid_deltas.max():.4f}]")

relax_df.head(10)

Relaxing 800 structures...
  Convergence criterion: fmax = 0.02 eV/Å
  Maximum steps: 200



Relaxation: 100%|██████████| 800/800 [16:17<00:00,  1.22s/it]


✓ Results saved to: /content/mattersim_results_relaxed/relaxation_summary.csv
✓ Successfully relaxed: 800/800

Relaxation energy change (eV):
  Mean:   -0.1494
  Median: -0.0276
  Std:    0.5002
  Range:  [-7.4287, 0.0000]





Unnamed: 0,file,energy_before_eV,energy_after_eV,delta_energy_eV,energy_per_atom_eV,natoms,formula,relaxed_file
0,struct_0001.cif,-28.487114,-28.489346,-0.002232,-7.122336,4,BaO2Ti,struct_0001_relaxed.cif
1,struct_0002.cif,-111.921272,-112.093506,-0.172234,-7.4729,15,Ba2O10Ti3,struct_0002_relaxed.cif
2,struct_0003.cif,-42.632149,-42.685551,-0.053402,-7.114258,6,BaO4Ti,struct_0003_relaxed.cif
3,struct_0004.cif,-41.544529,-41.705463,-0.160934,-6.950911,6,Ba2O3Ti,struct_0004_relaxed.cif
4,struct_0005.cif,-69.632904,-69.656021,-0.023117,-7.739558,9,Ba2O5Ti2,struct_0005_relaxed.cif
5,struct_0006.cif,-130.295517,-130.694244,-0.398727,-8.16839,16,Ba2O10Ti4,struct_0006_relaxed.cif
6,struct_0007.cif,-41.582397,-41.718246,-0.135849,-6.953041,6,Ba2O3Ti,struct_0007_relaxed.cif
7,struct_0008.cif,-79.956802,-79.956848,-4.6e-05,-7.995685,10,Ba2O6Ti2,struct_0008_relaxed.cif
8,struct_0009.cif,-40.294598,-40.296642,-0.002045,-6.716107,6,BaOTi4,struct_0009_relaxed.cif
9,struct_0010.cif,-83.25975,-83.282677,-0.022926,-6.940223,12,Ba4O6Ti2,struct_0010_relaxed.cif


In [10]:
# ============================================================================
# SECTION 8: PACKAGE RESULTS
# ============================================================================
# Create zip archives of results for download
# ============================================================================

from google.colab import files

print("Creating zip archives...")

# Zip relaxed structures and summary
!zip -r -q /content/mattersim_results_relaxed.zip /content/mattersim_results_relaxed

# Zip both CSV summaries
!zip -j -q /content/mattersim_csv_summaries.zip \
    /content/mattersim_results/energy_summary.csv \
    /content/mattersim_results_relaxed/relaxation_summary.csv

print("✓ Archives created")
print("\nDownloading files...")

# Download relaxed structures
print("  1. Downloading mattersim_results_relaxed.zip...")
files.download("/content/mattersim_results_relaxed.zip")

# Download CSV summaries
print("  2. Downloading mattersim_csv_summaries.zip...")
files.download("/content/mattersim_csv_summaries.zip")

print("\n✓ Downloads complete")

Creating zip archives...
✓ Archives created

Downloading files...
  1. Downloading mattersim_results_relaxed.zip...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

  2. Downloading mattersim_csv_summaries.zip...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


✓ Downloads complete
