In [None]:
# ============================================================================
# MATTERGEN CRYSTAL STRUCTURE GENERATION - Ba-Ti-O SYSTEM
# ============================================================================
# This notebook generates stable crystal structures for the Ba-Ti-O chemical
# system using Microsoft's MatterGen diffusion model.
#
# Project: MS508 Group Project
# Chemical System: Ba-Ti-O (Barium-Titanium-Oxygen)
# Target: Structures with energy_above_hull ≤ 0.05 eV/atom (thermodynamically stable)
# ============================================================================

In [None]:
# ============================================================================
# NOTES:
# ============================================================================
#
# DEPENDENCY MANAGEMENT:
# - MatterGen officially requires PyTorch 2.2.1+cu118, but we use 2.5.1+cu121
#   for better compatibility with Colab's GPU environment
# - Some version conflicts are expected but don't affect core functionality
# - numpy was downgraded to <2.0 as required by MatterGen
# - monty kept at 2024.7.30 as specified by MatterGen (pymatgen conflict OK)
#
# GENERATION PARAMETERS:
# 1. Energy above hull: Measures thermodynamic stability relative to competing
#    phases. Values ≤0.05 eV/atom indicate structures likely to be synthesizable
#    under normal conditions.
#
# 2. Diffusion guidance factor: Controls the trade-off between structure quality
#    and diversity. Higher values (3.0) produce more physically realistic and
#    stable structures but with less diversity in the generated ensemble.
#
# 3. Ba-Ti-O system: This chemical space includes barium titanate (BaTiO3) and
#    related perovskite structures known for ferroelectric and piezoelectric
#    properties. These materials are widely used in capacitors, sensors, and
#    actuators.
#
# OUTPUT FORMAT:
# - Generated structures are saved as CIF (Crystallographic Information File)
#   files in the specified output directory
# - Each CIF contains atomic positions, lattice parameters, and space group info
# - Structures can be analyzed using:
#   * ASE (Atomic Simulation Environment) - Python-based analysis
#   * pymatgen - Materials analysis and property prediction
#   * VESTA - 3D visualization software
#   * Materials Project database - Compare with known structures
#
# TROUBLESHOOTING:
# - If generation fails, check GPU memory with: !nvidia-smi
# - Reduce batch_size if CUDA out-of-memory errors occur
# - Dependency warnings are normal and usually don't affect functionality
# ============================================================================

In [None]:
# ----------------------------------------------------------------------------
# SECTION 1: Install Git LFS and Clone MatterGen
# ----------------------------------------------------------------------------
# Set up Git LFS for downloading large model files first
!apt-get -y -qq install git-lfs
!git lfs install

# Clone MatterGen repository
!git clone https://github.com/microsoft/mattergen.git
%cd mattergen

In [None]:
# ----------------------------------------------------------------------------
# SECTION 2: Install PyTorch with CUDA Support
# ----------------------------------------------------------------------------
# Install PyTorch 2.5.1 with CUDA 12.1 support for GPU acceleration
# Note: MatterGen officially requires 2.2.1+cu118, but 2.5.1+cu121 works
print("Installing PyTorch 2.5.1 with CUDA 12.1 support...")

!pip install -q --index-url https://download.pytorch.org/whl/cu121 \
  torch==2.5.1+cu121 torchvision torchaudio

# Verify PyTorch installation and CUDA availability
import torch
import platform

print("\n=== Environment Check ===")
print("Python version:", platform.python_version())
print("PyTorch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
print("CUDA version:", torch.version.cuda)
print("========================\n")

In [None]:
# ----------------------------------------------------------------------------
# SECTION 3: Install MatterGen Core Dependencies
# ----------------------------------------------------------------------------
print("Installing MatterGen core dependencies...")

# Install configuration management tools
!pip install -q fire hydra-core==1.3.1 hydra-joblib-launcher==1.1.5

# Install deep learning and scientific computing libraries
!pip install -q einops transformers accelerate safetensors tqdm requests contextlib2 lmdb SMACT

# Install materials science packages with MatterGen-compatible versions
!pip install -q "ase==3.25.0" "matplotlib==3.8.4" "monty==2024.7.30"

# Downgrade numpy to satisfy MatterGen requirements
!pip install -q "numpy<2.0"

# Install PyTorch Lightning
!pip install -q pytorch-lightning==2.0.6

In [None]:
# ----------------------------------------------------------------------------
# SECTION 3: Install MatterGen Core Dependencies
# ----------------------------------------------------------------------------
print("Installing MatterGen core dependencies...")

# Install configuration management tools
!pip install -q fire hydra-core==1.3.1 hydra-joblib-launcher==1.1.5

# Install deep learning and scientific computing libraries
!pip install -q einops transformers accelerate safetensors tqdm requests contextlib2 lmdb SMACT

# Install materials science packages with MatterGen-compatible versions
!pip install -q "ase==3.25.0" "matplotlib==3.8.4" "monty==2024.7.30"

# Downgrade numpy to satisfy MatterGen requirements
!pip install -q "numpy<2.0"

# Install PyTorch Lightning
!pip install -q pytorch-lightning==2.0.6

In [None]:
# ----------------------------------------------------------------------------
# SECTION 5: Install Additional MatterGen Dependencies
# ----------------------------------------------------------------------------
print("Installing additional MatterGen requirements...")

# Install materials science and development tools
!pip install -q matscipy mattersim autopep8 pylint emmet-core

In [None]:
# ----------------------------------------------------------------------------
# SECTION 6: Install MatterGen Package
# ----------------------------------------------------------------------------
print("Installing MatterGen package...")

# Install MatterGen without strict dependency checking
# (we've already installed compatible versions manually)
!pip install -q --no-deps -e .

In [None]:
# ----------------------------------------------------------------------------
# SECTION 7: Download Pre-trained Model Checkpoint
# ----------------------------------------------------------------------------
print("Downloading pre-trained model checkpoint...")

!git lfs pull -I checkpoints/mattergen_base --exclude=""

# Verify checkpoint exists
!ls -lh checkpoints/mattergen_base/ 2>/dev/null || echo "Checkpoint directory not found"

print("\n=== Installation Complete ===\n")

In [None]:
# Quick fix: Force numpy downgrade
!pip install --force-reinstall -q "numpy<2.0"
print("Numpy downgraded successfully")

In [None]:
# ----------------------------------------------------------------------------
# SECTION 8: Generate Ba-Ti-O Crystal Structures
# ----------------------------------------------------------------------------
import os

# Use a simple local directory (change this path if needed)
output_path = './ba_ti_o_results'

# Create output directory if it doesn't exist
os.makedirs(output_path, exist_ok=True)

print(f"Output directory: {output_path}")
print("\n=== Starting Structure Generation ===")
print("Chemical system: Ba-Ti-O (note: MatterGen sorts to Ba-O-Ti internally)")
print("Energy above hull: ≤0.05 eV/atom")
print("Batch size: 100 structures")
print("Diffusion guidance factor: 3.0")
print("=====================================\n")

# Generate crystal structures with specified conditions:
# - Chemical system: Ba-O-Ti (sorted alphabetically as MatterGen requires)
# - Energy above hull: ≤0.05 eV/atom (ensures thermodynamic stability)
# - Diffusion guidance factor: 3.0 (controls generation quality vs diversity)
# - Batch size: 100 structures per batch
# - Number of batches: 1 (total of 100 structures)

!mattergen-generate ./ba_ti_o_results \
  --pretrained-name chemical_system_energy_above_hull \
  --properties_to_condition_on "{'chemical_system': 'Ba-O-Ti', 'energy_above_hull': 0.05}" \
  --diffusion_guidance_factor 3.0 \
  --batch_size 3 \
  --num_batches 1

print("\n=== Generation Complete ===")
print(f"Results saved to: {output_path}")

# List generated files
print("\n=== Generated Files ===")
!ls -lh ./ba_ti_o_results/

# Show first few lines of a sample CIF file if available
print("\n=== Sample Structure (first 20 lines) ===")
!find ./ba_ti_o_results -name "*.cif" | head -1 | xargs head -20

print("\n=== Generation Complete ===")
print(f"Results saved to: {output_path}")
print("Check the output directory for generated CIF files.")