In [1]:
# %% [markdown]
# # Sfd preparation using OpenFF & Meeko for Ligand Export

# %%
# Essential Imports
import os
from rdkit import Chem

from meeko import PDBQTMolecule
from meeko import RDKitMolCreate

from openff.toolkit import Molecule as OFFMolecule # Aliased to avoid clash with any RDKit Mol
from openff.units import unit as offunit
from openff.toolkit.typing.engines.smirnoff import ForceField as OFFForceField

from openmm.app import PDBFile, Modeller, HBonds, PME
from openmm.app import Simulation as OpenMM_Simulation
from openmm.app import PDBReporter as OpenMM_PDBReporter
from openmm.app import StateDataReporter as OpenMM_StateDataReporter
from openmm import LangevinMiddleIntegrator, Platform
from openmm import unit as omm_unit

from openmmforcefields.generators import SystemGenerator

import parmed

# %% [markdown]
# ## I. Define Input Files and Parameters

# %%
# --- Input Data ---
receptor_pdb_file = "4EY7_fixed_by_pdbfixer.pdb" 

# Top 2 ligands - We only need their Vina output PDBQT paths.
# Meeko will read the SMILES and atom mapping from within these files.
top_ligands_vina_pdbqts = [
    {
        "ligand_id": "CHEMBL4214707",
        "site_id": "A604",
        "vina_pdbqt_path": "A604/vina_results/CHEMBL4214707_docked.pdbqt",
    },
    {
        "ligand_id": "CHEMBL4214707",
        "site_id": "B605",
        "vina_pdbqt_path": "B605/vina_results/CHEMBL4214707_docked.pdbqt",
    },
    {
        "ligand_id": "CHEMBL4218191",
        "site_id": "A604",
        "vina_pdbqt_path": "A604/vina_results/CHEMBL4218191_docked.pdbqt",
    },
    {
        "ligand_id": "CHEMBL4210316",
        "site_id": "A604",
        "vina_pdbqt_path": "A604/vina_results/CHEMBL4210316_docked.pdbqt",
    }
]

# --- Output Directories ---
base_output_dir = "MD_NVT_Simulations_OpenFF_MeekoExport" # New output dir name
os.makedirs(base_output_dir, exist_ok=True)
# No need for temp_ligand_prep_dir if Meeko handles it cleanly
# md_ready_ligands_dir might be useful if we decide to save the RDKit mol from Meeko as SDF
md_ready_ligands_dir = os.path.join(base_output_dir, "md_ready_ligands_from_meeko")
os.makedirs(md_ready_ligands_dir, exist_ok=True)



In [None]:
# %% [markdown]
# ## II. Ligand Preparation Loop (using Meeko)

# %%
prepared_md_inputs = [] # To store RDKit Mol objects and other info

for ligand_info in top_ligands_vina_pdbqts:
    ligand_id = ligand_info["ligand_id"]
    site_id = ligand_info["site_id"]
    vina_pdbqt_path = ligand_info["vina_pdbqt_path"]

    print(f"\n--- Processing Ligand with Meeko: {ligand_id} (Site: {site_id}) ---")
    print(f"  Input Vina PDBQT: {vina_pdbqt_path}")

    if not os.path.exists(vina_pdbqt_path):
        print(f"  ERROR: Vina PDBQT file not found: {vina_pdbqt_path}. Skipping.")
        continue

    # --- STEP 1: Convert Vina PDBQT Output to RDKit Molecule using Meeko ---
    # This assumes the Vina PDBQT contains Meeko's REMARK lines for SMILES & atom mapping.
    meeko_pdbqt_obj = PDBQTMolecule.from_file(vina_pdbqt_path, skip_typing=True)
    
    # RDKitMolCreate.from_pdbqt_mol can return a list of RDKit molecules.
    # If only one ligand was docked, this list should have one element.
    # Each RDKit molecule can have multiple conformers (poses).
    rdkit_mols_list_from_meeko = RDKitMolCreate.from_pdbqt_mol(meeko_pdbqt_obj)

    if not rdkit_mols_list_from_meeko or rdkit_mols_list_from_meeko[0] is None:
        print(f"  ERROR: Meeko's RDKitMolCreate failed to generate an RDKit molecule for {ligand_id} from {vina_pdbqt_path}.")
        print(f"    Ensure the PDBQT file was prepared by Meeko and contains 'REMARK MEKO ... SMILES ... MAP ...' lines.")
        continue
    
    # Assuming the first molecule in the list is our target ligand,
    # and its first conformer corresponds to Vina's best pose.
    rdkit_mol_from_meeko = rdkit_mols_list_from_meeko[0]
    
    if rdkit_mol_from_meeko.GetNumConformers() == 0:
        print(f"  ERROR: RDKit molecule from Meeko for {ligand_id} has no conformers. Docked pose coordinates not found/transferred.")
        continue

    # For MD, we typically use only one pose (conformer).
    # Let's create a new molecule with only the first conformer to ensure clarity.
    # (Though OpenFF from_rdkit would likely use the first one by default if multiple exist)
    conf = rdkit_mol_from_meeko.GetConformer(0)
    final_rdkit_mol_for_openff = Chem.Mol(rdkit_mol_from_meeko) # Create a new Mol object from the template
    final_rdkit_mol_for_openff.RemoveAllConformers() # Remove all conformers from the copy
    final_rdkit_mol_for_openff.AddConformer(conf, assignId=True) # Add only the best pose conformer

    print(f"  STEP 1: RDKit Mol object created by Meeko for {ligand_id}. NumAtoms: {final_rdkit_mol_for_openff.GetNumAtoms()}, NumConformers: {final_rdkit_mol_for_openff.GetNumConformers()}")
    print(f"    Sanity check SMILES from Meeko-RDKit mol: {Chem.MolToSmiles(final_rdkit_mol_for_openff, isomericSmiles=True)}")
    
    # Optionally, save this Meeko-generated RDKit molecule as an SDF for inspection
    md_ready_sdf_path = os.path.join(md_ready_ligands_dir, f"{ligand_id}_meeko_MD_pose.sdf")
    with Chem.SDWriter(md_ready_sdf_path) as writer:
        writer.write(final_rdkit_mol_for_openff)
    print(f"    MD-ready ligand (from Meeko) saved for inspection: {md_ready_sdf_path}")

    prepared_md_inputs.append({
        "ligand_id": ligand_id,
        "site_id": site_id,
        "rdkit_mol": final_rdkit_mol_for_openff, # Pass the RDKit Mol object directly
        "output_dir": os.path.join(base_output_dir, site_id, ligand_id)
    })


--- Processing Ligand with Meeko: CHEMBL4214707 (Site: A604) ---
  Input Vina PDBQT: A604/vina_results/CHEMBL4214707_docked.pdbqt
  STEP 1: RDKit Mol object created by Meeko for CHEMBL4214707. NumAtoms: 49, NumConformers: 1
    Sanity check SMILES from Meeko-RDKit mol: [H]c1c([H])c(Cl)c([H])c(C(=O)N2C(=O)C([H])([H])c3c([H])c([H])c(N([H])C(=O)c4c([H])c([H])c([H])c(C([H])([H])[H])c4[H])c([H])c3C2([H])[H])c1[H]
    MD-ready ligand (from Meeko) saved for inspection: MD_NVT_Simulations_OpenFF_MeekoExport/md_ready_ligands_from_meeko/CHEMBL4214707_meeko_MD_pose.sdf

--- Processing Ligand with Meeko: CHEMBL4214707 (Site: B605) ---
  Input Vina PDBQT: B605/vina_results/CHEMBL4214707_docked.pdbqt
  STEP 1: RDKit Mol object created by Meeko for CHEMBL4214707. NumAtoms: 49, NumConformers: 1
    Sanity check SMILES from Meeko-RDKit mol: [H]c1c([H])c(Cl)c([H])c(C(=O)N2C(=O)C([H])([H])c3c([H])c([H])c(N([H])C(=O)c4c([H])c([H])c([H])c(C([H])([H])[H])c4[H])c([H])c3C2([H])[H])c1[H]
    MD-ready ligand (