# Deep Reaction Model Training


## 1. Import Required Libraries

In [1]:
import torch
import os
import sys
from pathlib import Path

# Add current directory to path for local imports
sys.path.insert(0, str(Path('.').absolute()))

from deepreaction import Config, ReactionDataset, ReactionTrainer

## 2. Configuration Parameters

Define all hyperparameters and settings for the training process.

In [2]:
# Training configuration parameters
params = {
    # Dataset configuration
    'dataset': 'XTB',
    'readout': 'mean',
    'dataset_root': './dataset/DATASET_DA_F',
    'dataset_csv': './dataset/DATASET_DA_F/dataset_xtb_final.csv',
    'train_ratio': 0.8,
    'val_ratio': 0.1,
    'test_ratio': 0.1,
    'target_fields': ['DG_act', 'DrG'],
    'target_weights': [1.0, 1.0],
    'input_features': ['DG_act_xtb', 'DrG_xtb'],
    'file_patterns': ['*_reactant.xyz', '*_ts.xyz', '*_product.xyz'],
    'file_dir_pattern': 'reaction_*',
    'id_field': 'ID',
    'dir_field': 'R_dir',
    'reaction_field': 'smiles',
    
    # Cross-validation settings
    'cv_folds': 0,
    'use_scaler': True,
    'val_csv': None,
    'test_csv': None,
    'cv_test_fold': -1,
    'cv_stratify': False,
    'cv_grouped': True,
    'file_suffixes': ['_reactant.xyz', '_ts.xyz', '_product.xyz'],
}

In [3]:
# Model architecture parameters
model_params = {
    'model_type': 'dimenet++',
    'node_dim': 128,
    'dropout': 0.1,
    'prediction_hidden_layers': 3,
    'prediction_hidden_dim': 512,
    'use_layer_norm': False,
    'activation': 'silu',
    'use_xtb_features': True,
    'max_num_atoms': 100,
    
    # DimeNet++ specific parameters
    'hidden_channels': 128,
    'num_blocks': 5,
    'int_emb_size': 64,
    'basis_emb_size': 8,
    'out_emb_channels': 256,
    'num_spherical': 7,
    'num_radial': 6,
    'cutoff': 5.0,
    'envelope_exponent': 5,
    'num_before_skip': 1,
    'num_after_skip': 2,
    'num_output_layers': 3,
    'max_num_neighbors': 32,
    
    # Readout layer parameters
    'readout_hidden_dim': 128,
    'readout_num_heads': 4,
    'readout_num_sabs': 2,
}

# Add model parameters to main params
params.update(model_params)

In [4]:
# Training hyperparameters
training_params = {
    'batch_size': 16,
    'eval_batch_size': 32,
    'lr': 0.0005,
    'finetune_lr': None,
    'max_epochs': 4,
    'min_epochs': 0,
    'early_stopping_patience': 40,
    'early_stopping_min_delta': 0.0001,
    'optimizer': 'adamw',
    'scheduler': 'warmup_cosine',
    'warmup_epochs': 10,
    'min_lr': 1e-7,
    'weight_decay': 0.0001,
    'random_seed': 42234,
    'loss_function': 'mse',
    'gradient_clip_val': 0.0,
    'gradient_accumulation_steps': 1,
    'precision': '32',
}

# Add training parameters to main params
params.update(training_params)

In [5]:
# Output and system configuration
system_params = {
    'out_dir': './results/reaction_model',
    'save_best_model': True,
    'save_last_model': False,
    'save_predictions': True,
    'save_interval': 0,
    'checkpoint_path': None,
    'mode': 'train',
    'freeze_base_model': False,
    
    # Hardware configuration
    'cuda': True,
    'gpu_id': 0,
    'num_workers': 4,
    'strategy': 'auto',
    'num_nodes': 1,
    'devices': 1,
    'log_level': 'info',
    'log_to_file': False,
}

# Add system parameters to main params
params.update(system_params)

print("Configuration parameters loaded successfully!")
print(f"Total parameters: {len(params)}")

Configuration parameters loaded successfully!
Total parameters: 82


## 3. Device Setup

Configure GPU/CPU usage and check available hardware.

In [6]:
# Setup device (GPU/CPU)
if params['cuda'] and torch.cuda.is_available():
    os.environ["CUDA_VISIBLE_DEVICES"] = str(params['gpu_id'])
    device = torch.device(f"cuda:{params['gpu_id']}")
    print(f"Using GPU: {torch.cuda.get_device_name(device)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(device).total_memory / 1e9:.1f} GB")
else:
    os.environ["CUDA_VISIBLE_DEVICES"] = ""
    device = torch.device("cpu")
    print("Using CPU")
    params['cuda'] = False

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

Using GPU: NVIDIA GeForce RTX 3080 Ti
GPU Memory: 12.6 GB
PyTorch version: 2.3.0+cu121
CUDA available: True


## 4. Create Configuration Object

Initialize the configuration object from parameters and display key settings.

In [7]:
print("Creating configuration...")
config = Config.from_params(params)

# Display key configuration details
if params['log_level'] == 'debug':
    config.print_config()
else:
    print(f"Dataset: {config.dataset.dataset}")
    print(f"Model: {config.model.model_type}")
    print(f"Target fields: {config.dataset.target_fields}")
    print(f"Input features: {config.dataset.input_features}")
    print(f"Batch size: {config.training.batch_size}")
    print(f"Learning rate: {config.training.lr}")
    print(f"Max epochs: {config.training.max_epochs}")
    print(f"Output directory: {config.training.out_dir}")

print("\nConfiguration created successfully!")

Creating configuration...
Dataset: XTB
Model: dimenet++
Target fields: ['DG_act', 'DrG']
Input features: ['DG_act_xtb', 'DrG_xtb']
Batch size: 16
Learning rate: 0.0005
Max epochs: 4
Output directory: ./results/reaction_model

Configuration created successfully!


## 5. Load Dataset

Load and prepare the reaction dataset for training.

In [8]:
print("Loading dataset...")
dataset = ReactionDataset(config=config)

# Check if cross-validation is enabled
if config.reaction.cv_folds > 0:
    print(f"Cross-validation enabled with {dataset.get_num_folds()} folds.")
else:
    train_data, val_data, test_data, scalers = dataset.get_data_splits()
    print(f"Dataset loaded: train={len(train_data)}, val={len(val_data)}, test={len(test_data)}")

print("Dataset loaded successfully!")

2025-05-23 13:31:26,344 - deepreaction - INFO - Loading reaction dataset...
2025-05-23 13:31:26,346 - deepreaction - INFO - Loading single dataset with automatic train/val/test split


Loading dataset...
Target fields changed from None to ['DG_act', 'DrG']
Removing old processed file: dataset/DATASET_DA_F/processed/data_f74edeff.pt
Using target fields: ['DG_act', 'DrG']
Using input features: ['DG_act_xtb', 'DrG_xtb']
Using file suffixes: reactant='_reactant.xyz', ts='_ts.xyz', product='_product.xyz'


Processing reactions:  76%|███████▌  | 1202/1582 [00:00<00:00, 1997.64it/s]



Processing reactions: 100%|██████████| 1582/1582 [00:00<00:00, 1986.28it/s]


Saved metadata to dataset/DATASET_DA_F/processed/metadata.json
Processed 1580 reactions, saved to dataset/DATASET_DA_F/processed/data_f74edeff.pt
Dataset split: train 1269, validation 162, test 149 samples


2025-05-23 13:31:27,793 - deepreaction - INFO - Data splits: train=1269, val=162, test=149
2025-05-23 13:31:27,808 - deepreaction - INFO - Scaler 0: mean=37.6035, std=8.8419
2025-05-23 13:31:27,822 - deepreaction - INFO - Scaler 1: mean=-4.6943, std=16.5769
2025-05-23 13:31:27,822 - deepreaction - INFO - Trained 2 scalers
2025-05-23 13:31:28,994 - deepreaction - INFO - Loaded train: 1269, val: 162, test: 149


Dataset loaded: train=1269, val=162, test=149
Dataset loaded successfully!


## 6. Initialize Trainer

Set up the training environment and model.

In [9]:
print("Initializing trainer...")
trainer = ReactionTrainer(config=config)

print("Trainer initialized successfully")
print(f"Starting training with {config.training.max_epochs} epochs")

2025-05-23 13:31:29,000 - deepreaction - INFO - Using GPU: NVIDIA GeForce RTX 3080 Ti
2025-05-23 13:31:29,002 - deepreaction - INFO - Validating configuration...
2025-05-23 13:31:29,003 - deepreaction - INFO - Configuration validation completed


Initializing trainer...
Trainer initialized successfully
Starting training with 4 epochs


## 7. Train the Model

Execute the training process and monitor progress.

In [10]:
try:
    # Get data splits
    train_data, val_data, test_data, scalers = dataset.get_data_splits()

    # Start training
    train_metrics = trainer.fit(
        train_dataset=train_data,
        val_dataset=val_data,
        test_dataset=test_data,
        scalers=scalers,
        checkpoint_path=config.training.checkpoint_path,
        mode=config.training.mode
    )
    
    training_success = True
    
except Exception as e:
    print(f"\nTraining failed with error: {e}")
    import traceback
    traceback.print_exc()
    training_success = False
    train_metrics = None

Seed set to 42234
2025-05-23 13:31:29,011 - deepreaction - INFO - Set random seed to 42234
2025-05-23 13:31:29,016 - deepreaction - INFO - Configuration saved to ./results/reaction_model/config.json
2025-05-23 13:31:29,016 - deepreaction - INFO - Creating dataloader: batch_size=16, num_workers=4, eval_mode=False
2025-05-23 13:31:29,017 - deepreaction - INFO - Creating dataloader: batch_size=32, num_workers=2, eval_mode=True
2025-05-23 13:31:29,017 - deepreaction - INFO - Created dataloaders: train=80, val=6 batches
2025-05-23 13:31:29,018 - deepreaction - INFO - Creating model with 2 targets and 2 input features
2025-05-23 13:31:29,018 - deepreaction - INFO - Model configuration:
2025-05-23 13:31:29,019 - deepreaction - INFO -   model_type: dimenet++
2025-05-23 13:31:29,019 - deepreaction - INFO -   readout: mean
2025-05-23 13:31:29,020 - deepreaction - INFO -   batch_size: 16
2025-05-23 13:31:29,020 - deepreaction - INFO -   lr: 0.0005
2025-05-23 13:31:29,020 - deepreaction - INFO -  

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_total_loss improved. New best score: 0.492
Epoch 0, global step 80: 'val_total_loss' reached 0.49227 (best 0.49227), saving model to '/root/autodl-tmp/new/original8/results/reaction_model/checkpoints/best-epoch=0000-val_total_loss=0.4923.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_total_loss improved by 0.322 >= min_delta = 0.0001. New best score: 0.171
Epoch 1, global step 160: 'val_total_loss' reached 0.17062 (best 0.17062), saving model to '/root/autodl-tmp/new/original8/results/reaction_model/checkpoints/best-epoch=0001-val_total_loss=0.1706.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_total_loss improved by 0.057 >= min_delta = 0.0001. New best score: 0.113
Epoch 2, global step 240: 'val_total_loss' reached 0.11343 (best 0.11343), saving model to '/root/autodl-tmp/new/original8/results/reaction_model/checkpoints/best-epoch=0002-val_total_loss=0.1134.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 3, global step 320: 'val_total_loss' was not in top 1
`Trainer.fit` stopped: `max_epochs=4` reached.
2025-05-23 13:33:56,825 - deepreaction - INFO - Best model saved to: /root/autodl-tmp/new/original8/results/reaction_model/checkpoints/best-epoch=0002-val_total_loss=0.1134.ckpt
2025-05-23 13:33:56,826 - deepreaction - INFO - Creating dataloader: batch_size=32, num_workers=2, eval_mode=True
2025-05-23 13:33:56,827 - deepreaction - INFO - Running test evaluation
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

2025-05-23 13:33:59,582 - deepreaction - INFO - Test results: {'test_total_loss': 0.17179837822914124, 'Test MAE DG_act': 2.1112043857574463, 'Test RMSE DG_act': 2.7726213932037354, 'Test R2 DG_act': 0.8459211587905884, 'Test MAE DrG': 3.1726670265197754, 'Test RMSE DrG': 4.215632915496826, 'Test R2 DrG': 0.8971480131149292, 'Test Avg MAE': 2.6419358253479004, 'Test Avg RMSE': 3.494127035140991, 'Test Avg R2': 0.8715345859527588}
2025-05-23 13:33:59,584 - deepreaction - INFO - Training completed in 147.44 seconds
2025-05-23 13:33:59,585 - deepreaction - INFO - Training metrics saved to ./results/reaction_model/metrics.json


────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      Test Avg MAE          2.6419358253479004
       Test Avg R2          0.8715345859527588
      Test Avg RMSE          3.494127035140991
     Test MAE DG_act        2.1112043857574463
      Test MAE DrG          3.1726670265197754
     Test R2 DG_act         0.8459211587905884
       Test R2 DrG          0.8971480131149292
    Test RMSE DG_act        2.7726213932037354
      Test RMSE DrG          4.215632915496826
     test_total_loss        0.17179837822914124
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


## 8. Training Results

Display training results and save locations.

In [11]:
if training_success and train_metrics is not None:
    print("\n" + "="*50)
    print("TRAINING COMPLETED SUCCESSFULLY")
    print("="*50)
    
    print(f"Training time: {train_metrics.get('training_time', 0):.2f} seconds")
    print(f"Epochs completed: {train_metrics.get('epochs_completed', 0)}")
    
    if 'best_model_path' in train_metrics and train_metrics['best_model_path']:
        print(f"Best model saved to: {train_metrics['best_model_path']}")
    elif config.training.save_last_model and 'last_model_path' in train_metrics:
        print(f"Last model saved to: {train_metrics['last_model_path']}")
    
    if 'test_results' in train_metrics and train_metrics['test_results']:
        print(f"Test results: {train_metrics['test_results']}")
    
    print(f"All outputs saved in: {config.training.out_dir}")
    print("="*50)
    
else:
    print("\n" + "="*50)
    print("TRAINING FAILED")
    print("="*50)
    print("Please check the error messages above for details.")


TRAINING COMPLETED SUCCESSFULLY
Training time: 147.44 seconds
Epochs completed: 4
Best model saved to: /root/autodl-tmp/new/original8/results/reaction_model/checkpoints/best-epoch=0002-val_total_loss=0.1134.ckpt
Test results: {'test_total_loss': 0.17179837822914124, 'Test MAE DG_act': 2.1112043857574463, 'Test RMSE DG_act': 2.7726213932037354, 'Test R2 DG_act': 0.8459211587905884, 'Test MAE DrG': 3.1726670265197754, 'Test RMSE DrG': 4.215632915496826, 'Test R2 DrG': 0.8971480131149292, 'Test Avg MAE': 2.6419358253479004, 'Test Avg RMSE': 3.494127035140991, 'Test Avg R2': 0.8715345859527588}
All outputs saved in: ./results/reaction_model


## 9. Summary

