In [13]:
"""
prep_pipeline.py
Open-source replacement for Schrödinger + Amber protein–ligand preprocessing.
"""

import os
import subprocess
from pdbfixer import PDBFixer
from openmm.app import PDBFile, ForceField, Simulation
from openmm import LangevinIntegrator
from openmm.unit import kelvin, picoseconds
from rdkit import Chem
from rdkit.Chem import AllChem

# ---- Config ----
INPUT_PROTEIN = "1a0q_protein.pdb"   # raw protein PDB
INPUT_LIGAND  = "1a0q_ligand.mol2"   # ligand in mol2
PH            = 7.0

# ---- Step 1: Fix protein ----
def fix_protein(input_pdb, output_pdb):
    fixer = PDBFixer(filename=input_pdb)
    fixer.findMissingResidues()
    fixer.findNonstandardResidues() # Added
    fixer.replaceNonstandardResidues() # Added
    fixer.findMissingAtoms()

    fixer.addMissingAtoms()
    fixer.addMissingHydrogens(pH=PH)
    with open(output_pdb, "w") as f:
        PDBFile.writeFile(fixer.topology, fixer.positions, f)

# ---- Step 2: Protonate ligand (RDKit + Dimorphite-DL) ----
def protonate_ligand(smiles_file, output_sdf):
    with open(smiles_file) as f:
        smiles = f.readline().strip()
    mol = Chem.MolFromSmiles(smiles)
    mol = Chem.AddHs(mol)
    AllChem.EmbedMolecule(mol, randomSeed=42)
    Chem.MolToMolFile(mol, output_sdf)

# ---- Step 3: PROPKA (via subprocess) ----
def run_propka(pdb_file):
    subprocess.run(["propka3", pdb_file, f"--pH={PH}"], check=True)

# ---- Step 4: Minimize protein in OpenMM ----
def minimize_openmm(pdb_in, pdb_out):
    pdb = PDBFile(pdb_in)
    ff = ForceField("amber19-all.xml", "amber19/tip3pfb.xml")
    system = ff.createSystem(pdb.topology, constraints=None)
    integrator = LangevinIntegrator(300*kelvin, 1/picoseconds, 0.002*picoseconds)
    sim = Simulation(pdb.topology, system, integrator)
    sim.context.setPositions(pdb.positions)
    sim.minimizeEnergy(maxIterations=500)
    positions = sim.context.getState(getPositions=True).getPositions()
    with open(pdb_out, "w") as f:
        PDBFile.writeFile(sim.topology, positions, f)

# ---- Step 5: Ligand parametrization with antechamber ----
def run_antechamber(ligand_sdf, prefix="ligand"):
    subprocess.run([
        "antechamber",
        "-i", ligand_sdf, "-fi", "sdf",
        "-o", f"{prefix}.mol2", "-fo", "mol2",
        "-c", "bcc", "-s", "2"
    ], check=True)

# ---- Step 6: tleap system building ----
def run_tleap(protein_pdb, ligand_mol2, out_prefix="complex"):
    tleap_in = f"""
source leaprc.protein.ff14SB
source leaprc.gaff
LIG = loadmol2 {ligand_mol2}
PROT = loadpdb {protein_pdb}
complex = combine {{ PROT LIG }}
set default PBradii mbondi2
saveamberparm complex {out_prefix}.prmtop {out_prefix}.inpcrd
quit
"""
    with open("tleap.in", "w") as f:
        f.write(tleap_in)
    subprocess.run(["tleap", "-f", "tleap.in"], check=True)

# ---- Main workflow ----
def preprocess():
    os.makedirs("prep_out", exist_ok=True)

    fixed_pdb = "protein_fixed.pdb"
    lig_sdf   = "1a0q_ligand.sdf"
    minimized = "protein_minimized.pdb"

    print("Step 1: Fix protein...")
    fix_protein(INPUT_PROTEIN, fixed_pdb)

    # print("Step 2: Protonate ligand...")
    # protonate_ligand(INPUT_LIGAND, lig_sdf)

    # print("Step 3: Run PROPKA...")
    # run_propka(fixed_pdb)

    print("Step 4: Minimize with OpenMM...")
    minimize_openmm(fixed_pdb, minimized)

    print("Step 5: Run antechamber...")
    run_antechamber(lig_sdf, prefix="1a0q_ligand")

    print("Step 6: Run tleap...")
    run_tleap(minimized, "1a0q_ligand.mol2", out_prefix="complex")

    print("All done. Files in .")


In [14]:
preprocess()

Step 1: Fix protein...
Step 4: Minimize with OpenMM...


ValueError: No template found for residue 311 (VAL).  The set of atoms matches NVAL, but the bonds are different.  For more information, see https://github.com/openmm/openmm/wiki/Frequently-Asked-Questions#template

In [18]:
pdb_in = "protein_fixed.pdb"
pdb = PDBFile(pdb_in)
ff = ForceField("amber19-all.xml", "amber19/tip3pfb.xml")
system = ff.createSystem(pdb.topology, constraints=None)

ValueError: No template found for residue 310 (TYR).  The set of atoms matches TYR, but the bonds are different.  Perhaps the chain is missing a terminal group?  For more information, see https://github.com/openmm/openmm/wiki/Frequently-Asked-Questions#template