In [1]:
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import rdMolAlign
# Correct file loading
mol1 = Chem.MolFromMol2File('ref.mol2', sanitize=False)
mol2 = Chem.MolFromMol2File('tar.mol2', sanitize=False)

# Check load success
if mol1 is None or mol2 is None:
    raise ValueError("Failed to load one or both mol2 files")

# # Check atom count
# if mol1.GetNumAtoms() != mol2.GetNumAtoms():
#     raise ValueError("Molecule atom count mismatch")

# Calculate RMSD
rmsd = rdMolAlign.GetBestRMS(mol1, mol2)
print(f"RMSD: {rmsd:.3f} Å")


In [24]:
import numpy as np
from rdkit import Chem
from rdkit.Chem import AllChem, rdMolAlign
from scipy.spatial.distance import cdist
from scipy.optimize import linear_sum_assignment

def load_mol_no_sanitize(path):
    mol = Chem.MolFromMol2File(path, sanitize=False, removeHs=False)
    if mol is None:
        raise ValueError(f"Could not load: {path}")
    if mol.GetNumConformers() == 0:
        raise ValueError(f"No 3D coordinates found in: {path}")
    return mol

def get_atom_positions(mol):
    conf = mol.GetConformer()
    return np.array([list(conf.GetAtomPosition(i)) for i in range(mol.GetNumAtoms())])

# === Load molecules ===
ref = load_mol_no_sanitize("ref.mol2")
tar = load_mol_no_sanitize("tar.mol2")

# === Check atom counts ===
if ref.GetNumAtoms() != tar.GetNumAtoms():
    raise ValueError("Atom count mismatch!")

# === Get 3D coordinates ===
ref_coords = get_atom_positions(ref)
tar_coords = get_atom_positions(tar)

# === Compute distance matrix and optimal atom mapping ===
dist_matrix = cdist(tar_coords, ref_coords)  # probe rows, reference columns
tar_idx, ref_idx = linear_sum_assignment(dist_matrix)

# === Prepare atom map (ref_index, tar_index) for AlignMol ===
atom_map = [(int(r), int(t)) for t, r in zip(tar_idx, ref_idx)]  # AlignMol wants (ref, tar)

# === Align and compute RMSD ===
rmsd = rdMolAlign.AlignMol(tar, ref, atomMap=atom_map)
print("Best atom map (ref → tar):", atom_map)
print(f"Geometry-based RMSD: {rmsd:.3f} Å")

# === Optional: Save aligned tar as SDF ===
from rdkit.Chem import SDWriter
writer = SDWriter("aligned_tar.sdf")
writer.write(tar)
writer.close()



Best atom map (ref → tar): [(38, 0), (4, 1), (29, 2), (14, 3), (1, 4), (27, 5), (12, 6), (17, 7), (11, 8), (21, 9), (37, 10), (36, 11), (6, 12), (25, 13), (9, 14), (19, 15), (8, 16), (7, 17), (16, 18), (15, 19), (10, 20), (24, 21), (35, 22), (26, 23), (33, 24), (28, 25), (40, 26), (23, 27), (18, 28), (31, 29), (32, 30), (42, 31), (34, 32), (20, 33), (3, 34), (13, 35), (5, 36), (39, 37), (2, 38), (30, 39), (22, 40), (0, 41), (41, 42)]
Geometry-based RMSD: 4.778 Å






RuntimeError: No sub-structure match found between the reference and probe mol

In [25]:
from rdkit import Chem
from rdkit.Chem import AllChem, rdMolAlign

# Load mol2 files (without sanitization, to avoid hydrogen/charge issues)
mol1 = Chem.MolFromMol2File('ref.mol2', sanitize=False, removeHs=False)
mol2 = Chem.MolFromMol2File('tar.mol2', sanitize=False, removeHs=False)

# Check if molecules loaded correctly
if mol1 is None or mol2 is None:
    raise ValueError("Failed to load one or both mol2 files")

# Check atom count
n_atoms1 = mol1.GetNumAtoms()
n_atoms2 = mol2.GetNumAtoms()
if n_atoms1 != n_atoms2:
    raise ValueError(f"Atom count mismatch: ref={n_atoms1}, tar={n_atoms2}")

try:
    # Try best RMS with automatic atom matching
    rmsd = rdMolAlign.GetBestRMS(mol1, mol2)
    print(f"Best RMSD (with matching): {rmsd:.3f} Å")
except RuntimeError as e:
    print("Best RMSD matching failed:", e)
    # Attempt direct mapping if atom order is assumed to match
    atom_map = list(zip(range(n_atoms1), range(n_atoms2)))
    rmsd = rdMolAlign.AlignMol(mol2, mol1, atomMap=atom_map)
    print(f"Forced RMSD (atom order assumed to match): {rmsd:.3f} Å")


Best RMSD matching failed: No sub-structure match found between the reference and probe mol
Forced RMSD (atom order assumed to match): 3.291 Å


