# Hydration Free Energy Calculations using GNNImplicit Solvent

This requires https://github.com/fjclark/GNNImplicitSolvent/tree/feature-multiple-molecules.

In [16]:
from rdkit import Chem
from openff.toolkit import ForceField, Molecule, Topology
from Simulation.helper_functions import get_gnn_sim, MODEL_PATH, SOLVENT_DICT
from openmm.app import HBonds
from rdkit import Chem
from openff.toolkit import ForceField, Molecule, Topology
from Simulation.helper_functions import get_gnn_sim, MODEL_PATH, SOLVENT_DICT, create_vac_sim, create_gnn_sim
from openmm.app import HBonds
from openbabel import pybel
from loguru import logger
from openff.units import unit

KJ_TO_KCAL = (1 * unit.kilojoule).to(unit.kilocalorie).magnitude
FORCE_FIELD = "openff_no_water-3.0.0-alpha0.offxml"

## Load and process the receptor

Add topology information to the pdb using obabel

In [2]:
# Read pdb and write back to sdf
pdb_structures = list(pybel.readfile("pdb", "3QTU/receptor.pdb"))
assert len(pdb_structures) == 1
pdb = pdb_structures[0]
initial_num_atoms = len(pdb.atoms)
logger.info(f"Number of atoms in PDB: {initial_num_atoms}")
pdb.write("sdf", "3QTU/receptor.sdf", overwrite=True)


[32m2025-12-05 10:07:03.029[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m6[0m - [1mNumber of atoms in PDB: 1593[0m


In [3]:
def fix_charges(mol: Chem.Mol) -> None:
    """Assign formal charges to atoms based on valence rules before sanitization."""

    for atom in mol.GetAtoms():
        if atom.GetSymbol() == 'N':
            # Count explicit bonds
            total_valence = sum([bond.GetBondTypeAsDouble() for bond in atom.GetBonds()])
            
            # If nitrogen has 4 bonds, it should be positively charged
            if total_valence == 4 and atom.GetFormalCharge() == 0:
                atom.SetFormalCharge(1)
                logger.info(f"Set charge +1 on N atom {atom.GetIdx()}")
        
        elif atom.GetSymbol() == 'O':
            # Oxygen with 1 single bond should typically be negatively charged
            total_valence = sum([bond.GetBondTypeAsDouble() for bond in atom.GetBonds()])
            
            if total_valence == 1 and atom.GetFormalCharge() == 0:
                atom.SetFormalCharge(-1)
                logger.info(f"Set charge -1 on O atom {atom.GetIdx()}")

    Chem.SanitizeMol(mol)
    logger.info(f"\nTotal molecular charge: {Chem.GetFormalCharge(mol)}")

mol = Chem.SDMolSupplier('3QTU/receptor.sdf', sanitize=False, removeHs=False)[0]
frags = Chem.GetMolFrags(mol, asMols=True, sanitizeFrags=False)
for i, frag in enumerate(frags):
    logger.info(f"\n#### Processing fragment {i} with {frag.GetNumAtoms()} atoms")
    fix_charges(frag)

receptor_off_mols = [Molecule.from_rdkit(frag, allow_undefined_stereo=True, hydrogens_are_explicit=True) for frag in frags]

[32m2025-12-05 10:07:03.208[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m28[0m - [1m
#### Processing fragment 0 with 237 atoms[0m
[32m2025-12-05 10:07:03.209[0m | [1mINFO    [0m | [36m__main__[0m:[36mfix_charges[0m:[36m20[0m - [1mSet charge -1 on O atom 31[0m
[32m2025-12-05 10:07:03.209[0m | [1mINFO    [0m | [36m__main__[0m:[36mfix_charges[0m:[36m12[0m - [1mSet charge +1 on N atom 50[0m
[32m2025-12-05 10:07:03.210[0m | [1mINFO    [0m | [36m__main__[0m:[36mfix_charges[0m:[36m20[0m - [1mSet charge -1 on O atom 94[0m
[32m2025-12-05 10:07:03.210[0m | [1mINFO    [0m | [36m__main__[0m:[36mfix_charges[0m:[36m12[0m - [1mSet charge +1 on N atom 215[0m
[32m2025-12-05 10:07:03.210[0m | [1mINFO    [0m | [36m__main__[0m:[36mfix_charges[0m:[36m23[0m - [1m
Total molecular charge: 0[0m
[32m2025-12-05 10:07:03.211[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m28[0m - [1m
#### Processing fragment 

In [4]:
# Get total charge and number of atoms
total_charge = sum(mol.total_charge for mol in receptor_off_mols)
total_atoms = sum(mol.n_atoms for mol in receptor_off_mols)
logger.info(f"\nTotal system charge: {total_charge}, Total number of atoms: {total_atoms}")
assert total_atoms == initial_num_atoms

[32m2025-12-05 10:07:10.993[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1m
Total system charge: 2.0 elementary_charge, Total number of atoms: 1593[0m


## Load the ligand and make all topologies

In [9]:
ligand = Molecule.from_file("3QTU/ligand.sdf", allow_undefined_stereo=True)

receptor_top = Topology.from_molecules(receptor_off_mols)
ligand_top = Topology.from_molecules([ligand])
complex_top = Topology.from_molecules(receptor_off_mols + [ligand])

## Calculate solvation free energies

In [10]:
def make_sim(off_topology, solvent, num_confs=1):
    sim = get_gnn_sim(
        off_topology=off_topology,
        solvent=solvent,
        model_path=MODEL_PATH,
        solvent_dict=SOLVENT_DICT,
        cache=None,
        save_name=None,
        partial_charges=None,
        forcefield=FORCE_FIELD,
        constraints=HBonds,
        num_confs=num_confs,
    )
    return sim

def single_energy(sim):
    return sim._ref_system.calculate_energy()._value

In [11]:
# Solvent (e.g., TIP3P)
complex_solv = single_energy(make_sim(complex_top, "tip3p", num_confs=1))
ligand_solv = single_energy(make_sim(ligand_top, "tip3p", num_confs=1))
receptor_solv = single_energy(make_sim(receptor_top, "tip3p", num_confs=1))

# Vacuum
complex_vac = single_energy(make_sim(complex_top, "vac", num_confs=1))
ligand_vac = single_energy(make_sim(ligand_top, "vac", num_confs=1))
receptor_vac = single_energy(make_sim(receptor_top, "vac", num_confs=1))

# Solvation contribution to binding
ddG_solv = (complex_solv - complex_vac) - (ligand_solv - ligand_vac) - (receptor_solv - receptor_vac)

  model_dict = torch.load(model_path, map_location="cpu")["model"]


Using OpenFF forcefield: openff_no_water-3.0.0-alpha0.offxml
setting charges based on AM1BCC
Topologically unique molecules: ['[H]c1c(c(c(c(c1C([H])([H])[C@@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])C([H])([H])[H])C([H])([H])[H])C([H])([H])C([H])([H])C([H])([H])C([H])([H])[N+]([H])([H])[H])N([H])C(=O)[C@]([H])(C([H])(C([H])([H])[H])C([H])([H])[H])N([H])C(=O)[C@]([H])(C([H])(C([H])([H])[H])C([H])([H])[H])N([H])C(=O)C([H])([H])N([H])C(=O)[C@]([H])(C([H])([H])c2c(c(c(c(c2[H])[H])O[H])[H])[H])N([H])C(=O)[C@]([H])([C@@]([H])(C([H])([H])[H])O[H])N([H])C(=O)C([H])([H])N([H])C(=O)[C@]([H])(C([H])([H])C([H])([H])C(=O)[O-])N([H])C(=O)C([H])([H])N([H])C(=O)[C@]([H])([C@@]([H])(C([H])([H])[H])C([H])([H])C([H])([H])[H])N([H])C(=O)[C@]([H])(C([H])([H])C([H])([H])C([H])([H])C([H])([H])[N+]([H])([H])[H])N([H])C(=O)[C@]([H])(C([H])([H])C([H])([H])C(=O)[O-])N([H])C(=O)[C@]([H])(C([H])(C([H])([H])[H])C([H])([H])[H])N([H])C(=O)[H])[H])[H])O[H])[H]', '[H]C(=O)N([H])[C@]([H])(C(=O)N([H])[C

In [17]:
logger.info(f"Complex in Solvent: {complex_solv * KJ_TO_KCAL:.3f} kcal/mol")
logger.info(f"Ligand in Solvent: {ligand_solv * KJ_TO_KCAL:.3f} kcal/mol")
logger.info(f"Receptor in Solvent: {receptor_solv * KJ_TO_KCAL:.3f} kcal/mol")
logger.info(f"Complex in Vacuum : {complex_vac * KJ_TO_KCAL:.3f} kcal/mol")
logger.info(f"Ligand in Vacuum : {ligand_vac * KJ_TO_KCAL:.3f} kcal/mol")
logger.info(f"Receptor in Vacuum : {receptor_vac * KJ_TO_KCAL:.3f} kcal/mol")

logger.info(f"Delta Delta G in Solvent = {ddG_solv * KJ_TO_KCAL:.3f} kcal/mol")

[32m2025-12-05 10:17:11.655[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mComplex in Solvent: -2160.276 kcal/mol[0m
[32m2025-12-05 10:17:11.657[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mLigand in Solvent: -434.620 kcal/mol[0m
[32m2025-12-05 10:17:11.658[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m3[0m - [1mReceptor in Solvent: -1829.572 kcal/mol[0m
[32m2025-12-05 10:17:11.659[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mComplex in Vacuum : -885.515 kcal/mol[0m
[32m2025-12-05 10:17:11.660[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m5[0m - [1mLigand in Vacuum : -376.991 kcal/mol[0m
[32m2025-12-05 10:17:11.661[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m6[0m - [1mReceptor in Vacuum : -446.423 kcal/mol[0m
[32m2025-12-05 10:17:11.661[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m8[0m - 