In [8]:

# Prepare protein using PDBfixer

# The code read a PDB file from the PDB_files directory and prepare the protein using PDBfixer.

# All the heteratoms such as Water, ions, ligands  are removed

In [1]:
# Importing necessary libraries
from pathlib import Path
import os
import openmm.app as app
import pdbfixer

In [2]:
# Create a Path object for the new directory
new_directory = Path("protein_prepared")

# Create the directory
new_directory.mkdir(exist_ok=True)

print(f"Directory '{new_directory}' created successfully.")

Directory 'protein_prepared' created successfully.


In [3]:
# prepare a drectory name PDB_files and save the PDB file in it

In [4]:
pdb_file = Path("PDB/5vfi.pdb")

In [5]:
def prepare_protein(
    pdb_file, ignore_missing_residues=True, ignore_terminal_missing_residues=True, ph=7.0
):
    """
    Use pdbfixer to prepare the protein from a PDB file. Hetero atoms such as ligands are
    removed and non-standard residues replaced. Missing atoms to existing residues are added.
    Missing residues are ignored by default, but can be included.

    Parameters
    ----------
    pdb_file: pathlib.Path or str
        PDB file containing the system to simulate.
    ignore_missing_residues: bool, optional
        If missing residues should be ignored or built.
    ignore_terminal_missing_residues: bool, optional
        If missing residues at the beginning and the end of a chain should be ignored or built.
    ph: float, optional
        pH value used to determine protonation state of residues

    Returns
    -------
    fixer: pdbfixer.pdbfixer.PDBFixer
        Prepared protein system.
    """
    fixer = pdbfixer.PDBFixer(str(pdb_file))
    fixer.removeHeterogens(False)  # True do not remove crystal water, co-crystallized ligands are unknown to PDBFixer
    fixer.findMissingResidues()  # identify missing residues, needed for identification of missing atoms

    # if missing terminal residues shall be ignored, remove them from the dictionary
    if ignore_terminal_missing_residues:
        chains = list(fixer.topology.chains())
        keys = fixer.missingResidues.keys()
        for key in list(keys):
            chain = chains[key[0]]
            if key[1] == 0 or key[1] == len(list(chain.residues())):
                del fixer.missingResidues[key]

    # if all missing residues shall be ignored ignored, clear the dictionary
    if ignore_missing_residues:
        fixer.missingResidues = {}

    fixer.findNonstandardResidues()  # find non-standard residue
    fixer.replaceNonstandardResidues()  # replace non-standard residues with standard one
    fixer.findMissingAtoms()  # find missing heavy atoms
    fixer.addMissingAtoms()  # add missing atoms and residues
    fixer.addMissingHydrogens(ph)  # add missing hydrogens
    return fixer

In [6]:
# prepare protein and build only missing non-terminal residues
prepared_protein = prepare_protein(pdb_file, ignore_missing_residues=False)

In [7]:
# Save the prepared protein to a PDB file
output_pdb_path = new_directory / f"prepared_{pdb_file.stem}.pdb"
with open(output_pdb_path, 'w') as f:
    app.PDBFile.writeFile(prepared_protein.topology, prepared_protein.positions, f)
print(f"Prepared protein saved to '{output_pdb_path}'.")

Prepared protein saved to 'protein_prepared/prepared_5vfi.pdb'.
