# Deep Reaction Model Inference


## 1. Import Required Libraries

In [1]:
import torch
import os
import sys
from pathlib import Path

# Add current directory to path for local imports
sys.path.insert(0, str(Path('.').absolute()))

from deepreaction import Config, ReactionPredictor

## 2. Configuration Parameters

Set up the configuration for inference. These parameters should match those used during training.

In [2]:
# Inference configuration parameters
params = {
    # Dataset configuration
    'dataset': 'XTB',
    'readout': 'mean',
    'dataset_root': './dataset/DATASET_DA_F',  
    'input_features': ['DG_act_xtb', 'DrG_xtb'],  # XTB-computed features
    'file_patterns': ['*_reactant.xyz', '*_ts.xyz', '*_product.xyz'],
    'file_dir_pattern': 'reaction_*',
    'id_field': 'ID',
    'dir_field': 'R_dir',
    'reaction_field': 'smiles',  # SMILES representation field
    'use_scaler': True,  # Use the same scaler as training
    
    # Inference settings
    'batch_size': 32,  # Can be larger for inference
    'random_seed': 42234,
    
    # Hardware configuration
    'cuda': True,
    'gpu_id': 0,
    'num_workers': 4,
    'log_level': 'info'
}

print("Configuration parameters loaded successfully!")
print(f"Features to use: {params['input_features']}")
print(f"Batch size: {params['batch_size']}")

Configuration parameters loaded successfully!
Features to use: ['DG_act_xtb', 'DrG_xtb']
Batch size: 32


## 3. File Paths Configuration

Specify the paths for the trained model, input data, and output directory.

In [3]:
# File paths
checkpoint_path = "./results/reaction_model/checkpoints/best-epoch=0002-val_total_loss=0.1134.ckpt"
inference_csv = "./dataset/DATASET_DA_F/dataset_xtb_final.csv"     
output_dir = "./predictions"

print(f"Model checkpoint: {checkpoint_path}")
print(f"Input data: {inference_csv}")
print(f"Output directory: {output_dir}")

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)
print(f"Output directory created/verified: {output_dir}")

Model checkpoint: ./results/reaction_model/checkpoints/best-epoch=0002-val_total_loss=0.1134.ckpt
Input data: ./dataset/DATASET_DA_F/dataset_xtb_final.csv
Output directory: ./predictions
Output directory created/verified: ./predictions


## 4. Device Setup

Configure GPU/CPU usage for inference.

In [4]:
# Setup device (GPU/CPU)
if params['cuda'] and torch.cuda.is_available():
    os.environ["CUDA_VISIBLE_DEVICES"] = str(params['gpu_id'])
    device = torch.device(f"cuda:{params['gpu_id']}")
    print(f"Using GPU: {torch.cuda.get_device_name(device)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(device).total_memory / 1e9:.1f} GB")
else:
    os.environ["CUDA_VISIBLE_DEVICES"] = ""
    device = torch.device("cpu")
    print("Using CPU")
    params['cuda'] = False

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

Using GPU: NVIDIA GeForce RTX 3080 Ti
GPU Memory: 12.6 GB
PyTorch version: 2.3.0+cu121
CUDA available: True


## 5. Verify Input Files

Check that all required files exist before proceeding.

In [5]:
# Verify that required files exist
files_to_check = {
    "Model checkpoint": checkpoint_path,
    "Inference CSV": inference_csv
}

all_files_exist = True
for file_type, file_path in files_to_check.items():
    if os.path.exists(file_path):
        file_size = os.path.getsize(file_path) / (1024*1024)  # Size in MB
        print(f"✓ {file_type}: {file_path} ({file_size:.1f} MB)")
    else:
        print(f"✗ {file_type}: {file_path} - FILE NOT FOUND")
        all_files_exist = False

if not all_files_exist:
    raise FileNotFoundError("One or more required files are missing. Please check the paths above.")
    
print("\n✓ All required files found!")

✓ Model checkpoint: ./results/reaction_model/checkpoints/best-epoch=0002-val_total_loss=0.1134.ckpt (39.9 MB)
✓ Inference CSV: ./dataset/DATASET_DA_F/dataset_xtb_final.csv (0.8 MB)

✓ All required files found!


## 6. Initialize Configuration and Predictor

Create the configuration object and load the trained model.

In [6]:
print("Creating configuration...")
config = Config.from_params(params)
print("Configuration created successfully!")

print(f"\nLoading model from: {checkpoint_path}")
predictor = ReactionPredictor(config=config, checkpoint_path=checkpoint_path)
print("Model loaded successfully!")

# Display model information
print(f"\nModel Configuration:")
print(f"- Dataset type: {config.dataset.dataset}")
print(f"- Input features: {config.dataset.input_features}")
print(f"- Batch size: {config.training.batch_size}")
print(f"- Using scaler: {config.dataset.use_scaler}")

2025-05-23 13:49:19,861 - deepreaction - INFO - Loading model from checkpoint: ./results/reaction_model/checkpoints/best-epoch=0002-val_total_loss=0.1134.ckpt


Creating configuration...
Configuration created successfully!

Loading model from: ./results/reaction_model/checkpoints/best-epoch=0002-val_total_loss=0.1134.ckpt


2025-05-23 13:49:20,466 - deepreaction - INFO - Model target fields: ['DG_act', 'DrG']
2025-05-23 13:49:20,467 - deepreaction - INFO - Model has 2 scalers


Model loaded successfully!

Model Configuration:
- Dataset type: XTB
- Input features: ['DG_act_xtb', 'DrG_xtb']
- Batch size: 32
- Using scaler: True


## 7. Run Predictions

Execute the prediction process on the input data.

In [7]:
print("Starting prediction...")
print(f"Processing data from: {inference_csv}")
print(f"Output will be saved to: {output_dir}")

try:
    # Run predictions
    results = predictor.predict_from_csv(inference_csv, output_dir=output_dir)
    
    prediction_success = True
    print("\n✓ Prediction completed successfully!")
    
except Exception as e:
    print(f"\n✗ Prediction failed with error: {e}")
    import traceback
    traceback.print_exc()
    prediction_success = False
    results = None

2025-05-23 13:49:20,473 - deepreaction - INFO - Loading inference data from: ./dataset/DATASET_DA_F/dataset_xtb_final.csv
2025-05-23 13:49:20,475 - deepreaction - INFO - Loading inference dataset from ./dataset/DATASET_DA_F/dataset_xtb_final.csv
Processing...


Starting prediction...
Processing data from: ./dataset/DATASET_DA_F/dataset_xtb_final.csv
Output will be saved to: ./predictions
Inference mode: Using dummy target field
Using target fields: ['target']
Using input features: ['DG_act_xtb', 'DrG_xtb']
Using file suffixes: reactant='_reactant.xyz', ts='_ts.xyz', product='_product.xyz'


Processing reactions:  80%|███████▉  | 1258/1582 [00:00<00:00, 2028.46it/s]



Processing reactions: 100%|██████████| 1582/1582 [00:00<00:00, 2049.52it/s]
Done!


Saved metadata to dataset/DATASET_DA_F/processed/metadata.json
Processed 1580 reactions, saved to dataset/DATASET_DA_F/processed/data_7577d801.pt
Null data at index 0
Removing old processed file: dataset/DATASET_DA_F/processed/data_7577d801.pt
Inference mode: Using dummy target field
Using target fields: ['target']
Using input features: ['DG_act_xtb', 'DrG_xtb']
Using file suffixes: reactant='_reactant.xyz', ts='_ts.xyz', product='_product.xyz'


Processing reactions:  78%|███████▊  | 1240/1582 [00:00<00:00, 1924.27it/s]



Processing reactions: 100%|██████████| 1582/1582 [00:00<00:00, 1592.31it/s]


Saved metadata to dataset/DATASET_DA_F/processed/metadata.json
Processed 1580 reactions, saved to dataset/DATASET_DA_F/processed/data_7577d801.pt


2025-05-23 13:49:23,278 - deepreaction - INFO - Loaded 1580 samples for inference
2025-05-23 13:49:23,282 - deepreaction - INFO - Running predictions on 1580 samples...
2025-05-23 13:49:29,754 - deepreaction - INFO - Predictions shape: (1580, 2)
2025-05-23 13:49:29,755 - deepreaction - INFO - Mapping input features ['DG_act_xtb', 'DrG_xtb'] to model targets ['DG_act', 'DrG']
2025-05-23 13:49:29,756 - deepreaction - INFO - Mapped feature 'DG_act_xtb' (key: 'DG_act') to scaler index 0
2025-05-23 13:49:29,757 - deepreaction - INFO - Mapped feature 'DrG_xtb' (key: 'DrG') to scaler index 1
2025-05-23 13:49:29,758 - deepreaction - INFO - Output field names: ['DG_act', 'DrG']
2025-05-23 13:49:29,760 - deepreaction - INFO - Applied inverse scaling (scaler 0) for output field 'DG_act'
2025-05-23 13:49:29,762 - deepreaction - INFO - Applied inverse scaling (scaler 1) for output field 'DrG'
2025-05-23 13:49:29,826 - deepreaction - INFO - Predictions saved to ./predictions/predictions.csv
2025-05-


✓ Prediction completed successfully!


## 8. Analyze Results

Display and analyze the prediction results.

In [8]:
if prediction_success and results is not None:
    print("\n" + "="*50)
    print("PREDICTION RESULTS")
    print("="*50)
    
    print(f"Results shape: {results.shape}")
    print(f"Number of predictions: {len(results)}")
    print(f"Number of features: {results.shape[1]}")
    
    # Display column information
    print(f"\nColumns in results:")
    for i, col in enumerate(results.columns):
        print(f"  {i+1}. {col}")
    
    print(f"\nResults saved to: {output_dir}/predictions.csv")
    print("="*50)
    
else:
    print("\n" + "="*50)
    print("PREDICTION FAILED")
    print("="*50)
    print("Please check the error messages above for details.")


PREDICTION RESULTS
Results shape: (1580, 5)
Number of predictions: 1580
Number of features: 5

Columns in results:
  1. ID
  2. id
  3. smiles
  4. DG_act_predicted
  5. DrG_predicted

Results saved to: ./predictions/predictions.csv


## 9. Display Sample Results

Show the first few prediction results for inspection.

## 10. Summary and Next Steps

