In [61]:
# %% [markdown]
# # NVT Simulation for Top 2 Ligands using OpenFF & Meeko for Ligand Export

# %%
# Essential Imports
import os
# import re # Not needed if Meeko handles SMILES from its remarks
# import subprocess # Not needed if Meeko handles PDBQT conversion
from rdkit import Chem # Still useful for inspection if needed
# from rdkit.Chem import AllChem # Meeko should handle conformer generation

# Meeko imports for PDBQT processing
from meeko import PDBQTMolecule
from meeko import RDKitMolCreate

from openff.toolkit import Molecule as OFFMolecule # Aliased to avoid clash with any RDKit Mol
from openff.units import unit as offunit
# from openff.interchange import Interchange # Not strictly needed if using SystemGenerator
from openff.toolkit.typing.engines.smirnoff import ForceField as OFFForceField

from openmm.app import PDBFile, Modeller, HBonds, PME
from openmm.app import Simulation as OpenMM_Simulation
from openmm.app import PDBReporter as OpenMM_PDBReporter
from openmm.app import StateDataReporter as OpenMM_StateDataReporter
from openmm import LangevinMiddleIntegrator, Platform
from openmm import unit as omm_unit

from openmmforcefields.generators import SystemGenerator

import parmed

# %% [markdown]
# ## I. Define Input Files and Parameters

# %%
# --- Input Data ---
receptor_pdb_file = "4EY7_fixed_by_pdbfixer.pdb" 

# Top 2 ligands - We only need their Vina output PDBQT paths.
# Meeko will read the SMILES and atom mapping from within these files.
top_ligands_vina_pdbqts = [
    {
        "ligand_id": "CHEMBL4214707",
        "site_id": "A604",
        "vina_pdbqt_path": "A604/vina_results/CHEMBL4214707_docked.pdbqt",
    },
    {
        "ligand_id": "CHEMBL4214707",
        "site_id": "B605",
        "vina_pdbqt_path": "B605/vina_results/CHEMBL4214707_docked.pdbqt",
    },
]

# --- MD Parameters (Same as before) ---
simulation_time = 1.0 * omm_unit.nanoseconds
dt = 2.0 * omm_unit.femtoseconds
temperature = 300.0 * omm_unit.kelvin
friction_coeff = 1.0 / omm_unit.picosecond
reporting_interval_trajectory_ps = 50.0
reporting_interval_data_ps = 10.0
equilibration_time_ps = 100.0

# --- Output Directories ---
base_output_dir = "MD_NVT_Simulations_OpenFF_MeekoExport" # New output dir name
os.makedirs(base_output_dir, exist_ok=True)
# No need for temp_ligand_prep_dir if Meeko handles it cleanly
# md_ready_ligands_dir might be useful if we decide to save the RDKit mol from Meeko as SDF
md_ready_ligands_dir = os.path.join(base_output_dir, "md_ready_ligands_from_meeko")
os.makedirs(md_ready_ligands_dir, exist_ok=True)

In [62]:
# %% [markdown]
# ## II. Ligand Preparation Loop (using Meeko)

# %%
prepared_md_inputs = [] # To store RDKit Mol objects and other info

for ligand_info in top_ligands_vina_pdbqts:
    ligand_id = ligand_info["ligand_id"]
    site_id = ligand_info["site_id"]
    vina_pdbqt_path = ligand_info["vina_pdbqt_path"]

    print(f"\n--- Processing Ligand with Meeko: {ligand_id} (Site: {site_id}) ---")
    print(f"  Input Vina PDBQT: {vina_pdbqt_path}")

    if not os.path.exists(vina_pdbqt_path):
        print(f"  ERROR: Vina PDBQT file not found: {vina_pdbqt_path}. Skipping.")
        continue

    # --- STEP 1: Convert Vina PDBQT Output to RDKit Molecule using Meeko ---
    # This assumes the Vina PDBQT contains Meeko's REMARK lines for SMILES & atom mapping.
    meeko_pdbqt_obj = PDBQTMolecule.from_file(vina_pdbqt_path, skip_typing=True)
    
    # RDKitMolCreate.from_pdbqt_mol can return a list of RDKit molecules.
    # If only one ligand was docked, this list should have one element.
    # Each RDKit molecule can have multiple conformers (poses).
    rdkit_mols_list_from_meeko = RDKitMolCreate.from_pdbqt_mol(meeko_pdbqt_obj)

    if not rdkit_mols_list_from_meeko or rdkit_mols_list_from_meeko[0] is None:
        print(f"  ERROR: Meeko's RDKitMolCreate failed to generate an RDKit molecule for {ligand_id} from {vina_pdbqt_path}.")
        print(f"    Ensure the PDBQT file was prepared by Meeko and contains 'REMARK MEKO ... SMILES ... MAP ...' lines.")
        continue
    
    # Assuming the first molecule in the list is our target ligand,
    # and its first conformer corresponds to Vina's best pose.
    rdkit_mol_from_meeko = rdkit_mols_list_from_meeko[0]
    
    if rdkit_mol_from_meeko.GetNumConformers() == 0:
        print(f"  ERROR: RDKit molecule from Meeko for {ligand_id} has no conformers. Docked pose coordinates not found/transferred.")
        continue

    # For MD, we typically use only one pose (conformer).
    # Let's create a new molecule with only the first conformer to ensure clarity.
    # (Though OpenFF from_rdkit would likely use the first one by default if multiple exist)
    conf = rdkit_mol_from_meeko.GetConformer(0)
    final_rdkit_mol_for_openff = Chem.Mol(rdkit_mol_from_meeko) # Create a new Mol object from the template
    final_rdkit_mol_for_openff.RemoveAllConformers() # Remove all conformers from the copy
    final_rdkit_mol_for_openff.AddConformer(conf, assignId=True) # Add only the best pose conformer

    print(f"  STEP 1: RDKit Mol object created by Meeko for {ligand_id}. NumAtoms: {final_rdkit_mol_for_openff.GetNumAtoms()}, NumConformers: {final_rdkit_mol_for_openff.GetNumConformers()}")
    print(f"    Sanity check SMILES from Meeko-RDKit mol: {Chem.MolToSmiles(final_rdkit_mol_for_openff, isomericSmiles=True)}")
    
    # Optionally, save this Meeko-generated RDKit molecule as an SDF for inspection
    md_ready_sdf_path = os.path.join(md_ready_ligands_dir, f"{ligand_id}_meeko_MD_pose.sdf")
    with Chem.SDWriter(md_ready_sdf_path) as writer:
        writer.write(final_rdkit_mol_for_openff)
    print(f"    MD-ready ligand (from Meeko) saved for inspection: {md_ready_sdf_path}")

    prepared_md_inputs.append({
        "ligand_id": ligand_id,
        "site_id": site_id,
        "rdkit_mol": final_rdkit_mol_for_openff, # Pass the RDKit Mol object directly
        "output_dir": os.path.join(base_output_dir, site_id, ligand_id)
    })
#rmsd rmsf ruchliwosc lexzy siedzxi i jak jest ru chliwe to szuka swojego iejsca i trzeba go znalezc, scanning alaninowy sprawdzic jak poszczxegolne wsplkwaja na wsiazania, jak bardz ligsand jest zahgniezdzony do wody mdtray alasys dynamika, krok po kroku co jest juicy w trajektoraich zeby byl pytry biotoite, (najbadziejprzyjemny jesli chodzi op oblsuge, i warto robuc cos przejrzystego) pytrryu ma wiecej pcji, 


--- Processing Ligand with Meeko: CHEMBL4214707 (Site: A604) ---
  Input Vina PDBQT: A604/vina_results/CHEMBL4214707_docked.pdbqt
  STEP 1: RDKit Mol object created by Meeko for CHEMBL4214707. NumAtoms: 49, NumConformers: 1
    Sanity check SMILES from Meeko-RDKit mol: [H]c1c([H])c(Cl)c([H])c(C(=O)N2C(=O)C([H])([H])c3c([H])c([H])c(N([H])C(=O)c4c([H])c([H])c([H])c(C([H])([H])[H])c4[H])c([H])c3C2([H])[H])c1[H]
    MD-ready ligand (from Meeko) saved for inspection: MD_NVT_Simulations_OpenFF_MeekoExport/md_ready_ligands_from_meeko/CHEMBL4214707_meeko_MD_pose.sdf

--- Processing Ligand with Meeko: CHEMBL4214707 (Site: B605) ---
  Input Vina PDBQT: AB/vina_results/CHEMBL4214707_docked.pdbqt
  ERROR: Vina PDBQT file not found: AB/vina_results/CHEMBL4214707_docked.pdbqt. Skipping.


In [63]:
protein_ff_name = 'amber14/protein.ff14SB.xml'
water_ff_name = 'amber14/tip3p.xml'
openff_ligand_ff_name = 'openff_unconstrained-2.1.0.offxml' # Sage 2.1.0

In [None]:
# %%
# %% [markdown]
# ## III. MD Simulation Loop (Using RDKit Mol object for OpenFF)

# %%
# Ensure essential OpenFF Toolkit components are imported for this cell
import os 
import sys # <--- IMPORT SYS HERE
import openff.toolkit 
from openff.toolkit import Molecule as OFFMolecule 
from openff.toolkit.typing.engines.smirnoff import ForceField as OFFForceField
from openff.interchange import Interchange 
from openff.units import unit as offunit


# Ensure OpenMM components are imported
from openmm.app import PDBFile, Modeller, HBonds, PME 
from openmm.app import Simulation as OpenMM_Simulation
from openmm.app import PDBReporter as OpenMM_PDBReporter
from openmm.app import StateDataReporter as OpenMM_StateDataReporter
from openmm import LangevinMiddleIntegrator, Platform
from openmm import unit as omm_unit

# Ensure SystemGenerator is imported
from openmmforcefields.generators import SystemGenerator

# Print the OpenFF Toolkit version being used by this kernel for confirmation
print(f"OpenFF Toolkit Version in use by this kernel: {openff.toolkit.__version__}")

# --- These variables are assumed to be defined in your Section I or earlier cells ---
# receptor_pdb_file = "your_protein_fixed.pdb" 
# protein_ff_name = 'amber14/protein.ff14SB.xml'
# water_ff_name = 'amber14/tip3p.xml'
# openff_ligand_ff_name = 'openff_unconstrained-2.1.0.offxml'
# dt, temperature, friction_coeff, reporting_interval_trajectory_ps, etc.
# prepared_md_inputs = [] # This should be populated by your Section II
# --- End of assumed definitions ---

# Calculate reporting intervals in steps
report_steps_traj = int(reporting_interval_trajectory_ps * omm_unit.picoseconds / dt)
report_steps_data = int(reporting_interval_data_ps * omm_unit.picoseconds / dt)
equilibration_steps = int(equilibration_time_ps * omm_unit.picoseconds / dt)
total_production_steps = int(simulation_time / dt)

# Load OpenFF force field for ligand
openff_ligand_ff = OFFForceField(openff_ligand_ff_name, allow_cosmetic_attributes=True)


for md_run_info in prepared_md_inputs:
    ligand_id = md_run_info["ligand_id"]
    site_id = md_run_info["site_id"]
    rdkit_mol_for_ligand = md_run_info["rdkit_mol"] 
    sim_output_dir = md_run_info["output_dir"]
    os.makedirs(sim_output_dir, exist_ok=True)

    print(f"\n--- Starting MD Simulation for: {ligand_id} (Site: {site_id}) ---")
    print(f"  Output Directory: {sim_output_dir}")

    # Load receptor PDB
    print(f"  Loading receptor: {receptor_pdb_file}") 
    protein_pdb = PDBFile(receptor_pdb_file)

    # Convert RDKit Mol to OpenFF Molecule
    print(f"  Converting RDKit Mol for {ligand_id} to OpenFF Molecule...")
    ligand_off = OFFMolecule.from_rdkit(rdkit_mol_for_ligand, allow_undefined_stereo=True)
    print(f"    OpenFF Molecule created. SMILES: {ligand_off.to_smiles(isomeric=True, explicit_hydrogens=False)}")

    # Prepare OpenMM Modeller with protein
    modeller = Modeller(protein_pdb.topology, protein_pdb.positions)
    
    # Get OpenMM topology and positions for the ligand
    ligand_positions_nm = ligand_off.conformers[0].m_as(offunit.nanometer) * omm_unit.nanometer
    print(f"    Attempting to get OpenMM topology for ligand: {ligand_id}")
    openmm_ligand_topology = None
    off_topology_for_ligand = ligand_off.to_topology()

    # Attempt 1: Direct method (OpenFF Toolkit >= 0.10)
    if hasattr(off_topology_for_ligand, 'to_openmm_topology') and callable(getattr(off_topology_for_ligand, 'to_openmm_topology', None)):
        print("    Trying direct 'Topology.to_openmm_topology()' method...")
        try:
            openmm_ligand_topology = off_topology_for_ligand.to_openmm_topology(ensure_unique_atom_names='residues') 
            print("    Successfully obtained OpenMM topology via direct Topology.to_openmm_topology().")
        except Exception as e_direct_method:
            print(f"    ERROR during direct Topology.to_openmm_topology() call: {e_direct_method}")
            openmm_ligand_topology = None
    else:
        print("    Direct 'Topology.to_openmm_topology()' method not found or not callable on the object. Likely older OpenFF Toolkit.")

    # Attempt 2: Via Interchange (Fallback or primary for older versions / specific needs)
    if openmm_ligand_topology is None:
        print("    Trying to get OpenMM topology via Interchange...")
        try:
            ligand_interchange = Interchange.from_smirnoff(force_field=openff_ligand_ff, topology=[ligand_off])
            openmm_ligand_topology = ligand_interchange.to_openmm_topology(ensure_unique_atom_names='residues')
            print("    Successfully obtained OpenMM topology via Interchange.")
        except Exception as e_interchange:
            print(f"    ERROR: Failed to get OpenMM topology via Interchange for {ligand_id}: {e_interchange}")
            import traceback
            traceback.print_exc()
            
    if openmm_ligand_topology is None:
        raise ValueError(f"CRITICAL: Could not obtain OpenMM topology for ligand {ligand_id}.")

    # Add ligand to Modeller
    modeller.add(openmm_ligand_topology, ligand_positions_nm)
    print(f"  Protein and ligand added to Modeller. Total atoms: {modeller.topology.getNumAtoms()}")

    # Initialize SystemGenerator
    print("  Initializing SystemGenerator...")
    periodic_ff_kwargs = {
        'nonbondedMethod': PME,
        'nonbondedCutoff': 1.0 * omm_unit.nanometer,
        'constraints': HBonds
    }
    system_generator = SystemGenerator(
        forcefields=[protein_ff_name, water_ff_name],
        small_molecule_forcefield=openff_ligand_ff_name,
        molecules=[ligand_off], 
        cache=os.path.join(sim_output_dir, 'ff_cache.json'),
        periodic_forcefield_kwargs=periodic_ff_kwargs
    )
    
    # Add solvent using Modeller.
    print("  Adding solvent and ions with Modeller...")
    modeller.addSolvent(system_generator.forcefield, model='tip3p', padding=1.0*omm_unit.nanometer, ionicStrength=0.15*omm_unit.molar)
    print(f"  System solvated. Total atoms: {modeller.topology.getNumAtoms()}")

    print(f"  Final topology before system creation: NumResidues={modeller.topology.getNumResidues()}, NumAtoms={modeller.topology.getNumAtoms()}")

    # Create the OpenMM System
    print("  Creating final OpenMM system...")
    system = system_generator.create_system(modeller.topology)

    # Set up and run simulation
    integrator = LangevinMiddleIntegrator(temperature, friction_coeff, dt)
    
    # Platform selection
    platform = None
    platform_properties = {}
    selected_platform_name = "Unknown"

    try:
        platform = Platform.getPlatformByName('CUDA')
        platform_properties = {'Precision': 'mixed'}
        selected_platform_name = 'CUDA'
        print(f"  Successfully selected {selected_platform_name} platform.")
    except Exception as e_cuda:
        print(f"  CUDA platform not available or failed: {e_cuda}")
        # If CUDA fails, check operating system
        if sys.platform == "darwin": # 'darwin' is the system identifier for macOS
            print("  On macOS and CUDA not available. Defaulting to CPU platform to avoid potential OpenCL issues.")
            try:
                platform = Platform.getPlatformByName('CPU')
                platform_properties = {} # No specific properties needed for CPU
                selected_platform_name = 'CPU (macOS default)'
            except Exception as e_cpu_mac:
                # This should be very rare, as CPU platform is almost always available
                print(f"  FATAL: CPU platform not found on macOS: {e_cpu_mac}. This is highly unusual.")
                raise 
        else: # Not on macOS, attempt OpenCL then CPU
            try:
                print("  Attempting to use OpenCL platform.")
                platform = Platform.getPlatformByName('OpenCL')
                platform_properties = {'Precision': 'mixed'}
                selected_platform_name = 'OpenCL'
            except Exception as e_opencl:
                print(f"  OpenCL platform not available or failed: {e_opencl}")
                print("  Defaulting to CPU platform.")
                try:
                    platform = Platform.getPlatformByName('CPU')
                    platform_properties = {}
                    selected_platform_name = 'CPU (OpenCL fallback)'
                except Exception as e_cpu_fallback:
                    print(f"  FATAL: CPU platform also not available: {e_cpu_fallback}. This is highly unusual.")
                    raise 
    
    if platform is None:
        # This block is an ultimate fallback, should ideally not be reached.
        print("  No platform explicitly selected, attempting CPU as final resort.")
        try:
            platform = Platform.getPlatformByName('CPU')
            platform_properties = {}
            selected_platform_name = 'CPU (Ultimate fallback)'
        except Exception as e_final_cpu:
             print(f"  FATAL: CPU platform failed even as ultimate fallback: {e_final_cpu}")
             raise

    print(f"  Using Platform: {selected_platform_name} (Actual OpenMM Platform Name: {platform.getName() if platform else 'None'})")

    simulation = OpenMM_Simulation(modeller.topology, system, integrator, platform, platform_properties)
    simulation.context.setPositions(modeller.positions)

    print("  Minimizing energy...")
    simulation.minimizeEnergy() 
    min_state = simulation.context.getState(getEnergy=True, getPositions=True)
    min_pdb_path = os.path.join(sim_output_dir, f"{ligand_id}_minimized.pdb")
    with open(min_pdb_path, 'w') as f:
        PDBFile.writeFile(simulation.topology, min_state.getPositions(), f)
    print(f"    Minimized structure saved to: {min_pdb_path}")

    print(f"  Running NVT equilibration for {equilibration_time_ps} ps...")
    simulation.context.setVelocitiesToTemperature(temperature)
    simulation.step(equilibration_steps)
    
    print(f"  Running NVT production for {simulation_time.value_in_unit(omm_unit.nanoseconds)} ns...")
    traj_pdb_path = os.path.join(sim_output_dir, f"{ligand_id}_trajectory.pdb")
    data_csv_path = os.path.join(sim_output_dir, f"{ligand_id}_sim_data.csv")
    
    simulation.reporters.append(OpenMM_PDBReporter(traj_pdb_path, report_steps_traj))
    simulation.reporters.append(OpenMM_StateDataReporter(data_csv_path, report_steps_data,
                                                       step=True, time=True, potentialEnergy=True, temperature=True,
                                                       volume=True, density=True, speed=True, remainingTime=True,
                                                       totalSteps=total_production_steps, 
                                                       separator=','))
    simulation.currentStep = 0 
    simulation.step(total_production_steps)
    print(f"  Simulation for {ligand_id} finished. Trajectory: {traj_pdb_path}, Data: {data_csv_path}")

print("\nAll MD simulations complete.")

OpenFF Toolkit Version in use by this kernel: 0.16.4

--- Starting MD Simulation for: CHEMBL4214707 (Site: A604) ---
  Output Directory: MD_NVT_Simulations_OpenFF_MeekoExport/A604/CHEMBL4214707
  Loading receptor: 4EY7_fixed_by_pdbfixer.pdb
  Converting RDKit Mol for CHEMBL4214707 to OpenFF Molecule...
    OpenFF Molecule created. SMILES: Cc1cccc(C(=O)Nc2ccc3c(c2)CN(C(=O)c2cccc(Cl)c2)C(=O)C3)c1
    Attempting to get OpenMM topology for ligand: CHEMBL4214707
    Direct 'Topology.to_openmm_topology()' method not found or not callable on the object. Likely older OpenFF Toolkit.
    Trying to get OpenMM topology via Interchange...
    Successfully obtained OpenMM topology via Interchange.
  Protein and ligand added to Modeller. Total atoms: 16389
  Initializing SystemGenerator...
  Adding solvent and ions with Modeller...
  System solvated. Total atoms: 329489
  Final topology before system creation: NumResidues=105824, NumAtoms=329489
  Creating final OpenMM system...
  CUDA platform not 

In [68]:
# Temporary test
from openmm.app import ForceField
print("Testing protein parameterization alone...")
protein_topology = PDBFile(receptor_pdb_file).topology
# Use your absolute path for protein_ff_name, e.g., from amber14/protein.ff14SB.xml
# and water_ff_name e.g. from amber14/tip3pfb.xml
omm_ff = ForceField(protein_ff_name, water_ff_name) 
try:
    system = omm_ff.createSystem(protein_topology, nonbondedMethod=PME, constraints=HBonds)
    print("SUCCESS: Protein topology parameterized by Amber FFs directly.")
except ValueError as e:
    print(f"ERROR parameterizing protein directly: {e}")
    # This would likely give the "No template found for residue..." error if ALA 530 is still an issue for Amber FF

Testing protein parameterization alone...
SUCCESS: Protein topology parameterized by Amber FFs directly.


In [59]:
# Minimal test focusing on addSolvent for protein
from openmm.app import PDBFile, Modeller
from openmm import unit as omm_unit
from openmm.app import ForceField # Import OpenMM's ForceField directly

# Ideally, use the PDBFixer output. If not, use your original.
receptor_pdb_file_to_test = "4EY7_fixed_by_pdbfixer.pdb"
# receptor_pdb_file_to_test = "4EY7_prepped.pdb" # Your current file causing issues

protein_ff_path = 'amber14/protein.ff14SB.xml'
water_ff_path = 'amber14/tip3p.xml' # TIP3P water model

print(f"Loading protein from: {receptor_pdb_file_to_test}")
protein_pdb = PDBFile(receptor_pdb_file_to_test)
modeller = Modeller(protein_pdb.topology, protein_pdb.positions)

print("Initializing OpenMM ForceField for protein (amber14/protein.ff14SB.xml) and water (amber14/tip3p.xml)...")
# This mimics the 'forcefield' object that system_generator would pass to addSolvent
# This ForceField object will contain templates for protein and water.
ff_for_solvent_test = ForceField(protein_ff_path, water_ff_path)

print("Attempting to add solvent to protein-only Modeller object...")
try:
    # Add solvent using the explicitly loaded Amber protein and TIP3P water force fields
    modeller.addSolvent(ff_for_solvent_test, model='tip3p', padding=1.0*omm_unit.nanometer, ionicStrength=0.15*omm_unit.molar)
    print("Successfully added solvent to protein-only Modeller.")
except ValueError as e:
    print(f"ERROR during addSolvent for protein-only: {e}")
    print("This strongly indicates the issue is with the protein PDB structure's compatibility with ff14SB.")
    print("Please use PDBFixer on your original PDB file.")
except Exception as e_gen:
    print(f"An unexpected error occurred during the protein-only solvent test: {e_gen}")

Loading protein from: 4EY7_fixed_by_pdbfixer.pdb
Initializing OpenMM ForceField for protein (amber14/protein.ff14SB.xml) and water (amber14/tip3p.xml)...
Attempting to add solvent to protein-only Modeller object...
Successfully added solvent to protein-only Modeller.


In [57]:
# Add these imports to your script, typically near the top
from pdbfixer import PDBFixer
from openmm.app import PDBFile as OpenMMPDBFile # Use an alias to avoid potential PDBFile name clashes

# --- Define your input and output PDB filenames ---
original_receptor_pdb_file = "4EY7_prepped.pdb"  # Your current PDB file
fixed_receptor_pdb_file = "4EY7_fixed_by_pdbfixer.pdb" # Name for the cleaned PDB file

print(f"Attempting to fix '{original_receptor_pdb_file}' using PDBFixer...")

try:
    # Initialize PDBFixer with your PDB file
    fixer = PDBFixer(filename=original_receptor_pdb_file)

    # Apply common fixes:
    fixer.findMissingResidues()      # Identifies gaps in chains (does not build them by default)
    fixer.findNonstandardResidues()  # Identifies non-standard residue names
    fixer.replaceNonstandardResidues() # Attempts to convert common non-standard to standard
    fixer.findMissingAtoms()         # Finds residues with missing atoms
    fixer.addMissingAtoms()          # Adds missing heavy atoms and OXT for C-termini if needed

    # Add missing hydrogens. This is crucial as force fields expect them.
    # pH 7.0 is a common default.
    fixer.addMissingHydrogens(7.0)

    # Write the fixed PDB structure to a new file
    with open(fixed_receptor_pdb_file, 'w') as outfile:
        OpenMMPDBFile.writeFile(fixer.topology, fixer.positions, outfile, keepIds=True)
    
    print(f"PDBFixer processing complete. Cleaned PDB saved as: '{fixed_receptor_pdb_file}'")
    
    # !!! IMPORTANT !!!
    # You MUST now update your main script to use this new, fixed PDB file.
    # Change the line:
    # receptor_pdb_file = "4EY7_prepped.pdb"
    # TO:
    # receptor_pdb_file = "4EY7_fixed_by_pdbfixer.pdb"

except FileNotFoundError:
    print(f"ERROR: The PDB file '{original_receptor_pdb_file}' was not found. Please check the path.")
except Exception as e:
    print(f"An error occurred during PDBFixer processing: {e}")
    print("Please ensure your PDB file is valid and PDBFixer is installed correctly.")

Attempting to fix '4EY7_prepped.pdb' using PDBFixer...
PDBFixer processing complete. Cleaned PDB saved as: '4EY7_fixed_by_pdbfixer.pdb'


In [58]:
# (This is a snippet from the previous PDBFixer code)
from pdbfixer import PDBFixer
from openmm.app import PDBFile as OpenMMPDBFile 

original_receptor_pdb_file = "4EY7_prepped.pdb"
fixed_receptor_pdb_file = "4EY7_fixed_by_pdbfixer.pdb" 

print(f"Attempting to fix '{original_receptor_pdb_file}' using PDBFixer...")
try:
    fixer = PDBFixer(filename=original_receptor_pdb_file)
    fixer.findMissingResidues()      
    fixer.findNonstandardResidues()  
    fixer.replaceNonstandardResidues() 
    fixer.findMissingAtoms()         
    fixer.addMissingAtoms() # This is key for adding OXT etc.        
    fixer.addMissingHydrogens(7.0) # Also key for correct terminal chemistry
    with open(fixed_receptor_pdb_file, 'w') as outfile:
        OpenMMPDBFile.writeFile(fixer.topology, fixer.positions, outfile, keepIds=True)
    print(f"PDBFixer processing complete. Cleaned PDB saved as: '{fixed_receptor_pdb_file}'")
except Exception as e:
    print(f"An error occurred during PDBFixer processing: {e}")

Attempting to fix '4EY7_prepped.pdb' using PDBFixer...
PDBFixer processing complete. Cleaned PDB saved as: '4EY7_fixed_by_pdbfixer.pdb'
