# Example 04: PyTorch Featurization

This notebook demonstrates how to featurize larger data sets efficiently with the AUC method.

In [1]:
import torch
import numpy as np
from aenet.torch_featurize import ChebyshevDescriptor, BatchedFeaturizer

## 1. Basic Featurization: Water Molecule

Featurize a simple water molecule.

In [6]:
# Create descriptor
descriptor = ChebyshevDescriptor(
    species=['O', 'H'],
    rad_order=10,      # Radial polynomial order
    rad_cutoff=4.0,    # Radial cutoff (Angstroms)
    ang_order=3,       # Angular polynomial order
    ang_cutoff=1.5     # Angular cutoff (Angstroms)
)

# Water molecule positions
positions = np.array([
    [0.000, 0.000,  0.118],  # O
    [0.000, 0.755, -0.471],  # H
    [0.000, -0.755, -0.471]  # H
])
species = ['O', 'H', 'H']

# Featurize
features = descriptor.featurize_structure(positions, species)

print(f"Feature shape: {features.shape}")
print(f"Number of features per atom: {descriptor.get_n_features()}")
print(f"\nOxygen features (first 10):")
print(features[0, :10])
print(f"\nHydrogen features (first 10):")
print(features[1, :10])

Feature shape: (3, 30)
Number of features per atom: 30

Oxygen features (first 10):
[ 1.73027217 -0.90184124 -0.79016849  1.72553423 -1.00857502 -0.67416837
  1.71134635 -1.10978532 -0.55447614  1.68778623]

Hydrogen features (first 10):
[ 1.55284387 -0.61940903 -1.00023271  1.32777827 -0.12699451 -0.98696891
  0.79682369  0.12382831 -0.55096186  0.2992968 ]


## 2. Understanding Feature Dimensions

Features are organized as: [radial_unwt, angular_unwt, radial_wt, angular_wt]

In [3]:
# For 2 species with rad_order=10, ang_order=3:
# Radial features: 2 × (10+1) = 22
# Angular features: 2 × (3+1) = 8
# Total: 30 features per atom

rad_unwt = features[:, :11]      # Radial unweighted
ang_unwt = features[:, 11:15]    # Angular unweighted
rad_wt = features[:, 15:26]      # Radial weighted
ang_wt = features[:, 26:30]      # Angular weighted

print("Feature organization:")
print(f"  Radial unweighted:  indices 0-10   (11 features)")
print(f"  Angular unweighted: indices 11-14  (4 features)")
print(f"  Radial weighted:    indices 15-25  (11 features)")
print(f"  Angular weighted:   indices 26-29  (4 features)")

Feature organization:
  Radial unweighted:  indices 0-10   (11 features)
  Angular unweighted: indices 11-14  (4 features)
  Radial weighted:    indices 15-25  (11 features)
  Angular weighted:   indices 26-29  (4 features)


## 3. Periodic System: Crystal Structure

Featurize a crystal with periodic boundary conditions.

In [5]:
# AuCu crystal structure
positions_pbc = np.array([
    [0.0, 0.0, 0.0],
    [0.0, 0.5, 0.5],
    [0.5, 0.0, 0.5],
    [0.5, 0.5, 0.0]
])
species_pbc = ['Cu', 'Cu', 'Au', 'Au']

# Unit cell
cell = np.array([
    [4.0, 0.0, 0.0],
    [0.0, 4.0, 0.0],
    [0.0, 0.0, 4.0]
])
pbc = np.array([True, True, True])

# Create descriptor for Au-Cu system
descriptor_aucu = ChebyshevDescriptor(
    species=['Au', 'Cu'],
    rad_order=8,
    rad_cutoff=3.5,
    ang_order=5,
    ang_cutoff=3.5
)

# Featurize with PBC
features_pbc = descriptor_aucu.featurize_structure(
    positions_pbc, species_pbc, cell=cell, pbc=pbc
)

print(f"Crystal feature shape: {features_pbc.shape}")
print(f"Cu atom 0 features (first 10): {features_pbc[0, :10]}")
print(f"Au atom 2 features (first 10): {features_pbc[2, :10]}")

Crystal feature shape: (4, 30)
Cu atom 0 features (first 10): [ 2.70787659 -1.61372922 -0.78450829  2.54876737 -2.25331137  0.1369048
  2.09013755 -2.62809369  1.0422294   2.44419854]
Au atom 2 features (first 10): [ 2.70787659 -1.61372922 -0.78450829  2.54876737 -2.25331137  0.1369048
  2.09013755 -2.62809369  1.0422294   2.44419854]


## 4. Batch Processing

Efficiently featurize multiple structures.

In [7]:
# Create batch featurizer
batch_fzer = BatchedFeaturizer(descriptor)

# Multiple water molecules with slight perturbations
batch_positions = [
    torch.tensor(positions, dtype=torch.float64),
    torch.tensor(positions + 0.1 * np.random.randn(*positions.shape),
                 dtype=torch.float64),
    torch.tensor(positions + 0.1 * np.random.randn(*positions.shape),
                 dtype=torch.float64),
]
batch_species = [species, species, species]

# Featurize batch
features_batch, batch_indices = batch_fzer(
    batch_positions, batch_species
)

print(f"Batch features shape: {features_batch.shape}")
print(f"Batch indices shape: {batch_indices.shape}")
print(f"Structure 0 atoms: {(batch_indices == 0).sum().item()}")
print(f"Structure 1 atoms: {(batch_indices == 1).sum().item()}")
print(f"Structure 2 atoms: {(batch_indices == 2).sum().item()}")

Batch features shape: torch.Size([9, 30])
Batch indices shape: torch.Size([9])
Structure 0 atoms: 3
Structure 1 atoms: 3
Structure 2 atoms: 3


## 5. GPU Acceleration

Use GPU for faster featurization (if available).

In [8]:
if torch.cuda.is_available():
    print("CUDA available - creating GPU descriptor")
    
    # Create descriptor on GPU
    descriptor_gpu = ChebyshevDescriptor(
        species=['O', 'H'],
        rad_order=10,
        rad_cutoff=4.0,
        ang_order=3,
        ang_cutoff=1.5,
        device='cuda'
    )
    
    # Featurize on GPU (input automatically moved)
    features_gpu = descriptor_gpu.featurize_structure(positions, species)
    
    print(f"GPU features shape: {features_gpu.shape}")
    print(f"Features computed on GPU")
else:
    print("CUDA not available - using CPU")

CUDA not available - using CPU


## 6. Gradient Computation

Compute feature gradients for force calculations.

In [9]:
# Enable gradient tracking
positions_torch = torch.tensor(
    positions, dtype=torch.float64, requires_grad=True
)

# Compute features with gradients
features_torch = descriptor(positions_torch, species)

# Compute gradient via backpropagation
loss = features_torch.sum()
loss.backward()

print("Position gradients:")
print(f"Shape: {positions_torch.grad.shape}")
print(f"Oxygen gradient: {positions_torch.grad[0]}")

Position gradients:
Shape: torch.Size([3, 3])
Oxygen gradient: tensor([0.0000, 0.0000, 5.9964], dtype=torch.float64)
