In [None]:
import pdbfixer
import mdtraj as md
from simtk.openmm.app import PDBFile

In [None]:
# Load the file into PDBFixer
fixer = pdbfixer.PDBFixer(filename='../data/renumbered/4jha.pdb')

In [None]:
# Identify missing residues
fixer.findMissingResidues()
fixer.missingResidues

In [None]:
# Remove missing residues if they are > than 10 residues long
chains = list(fixer.topology.chains())
keys = fixer.missingResidues.keys()
for key in list(keys): # Declare as list because makes a copy of the dict keys
    chain = chains[key[0]]
    if chain.index == 0: # Do not add back KS at the c-term of chain 0
        fixer.missingResidues[key] = ['LYS', 'SER']


In [None]:
fixer.missingResidues

In [None]:
# Identify nonstandard residues
fixer.findNonstandardResidues()
fixer.nonstandardResidues
# fixer.replaceNonstandardResidues()

In [None]:
# Remove heterogens

# The argument specifies whether to keep water molecules. 
# False removes all heterogens including water. 
# True keeps water molecules while removing all other heterogens.

fixer.removeHeterogens(False)


In [None]:
# findMissingAtoms() identifies all missing heavy atoms 
# and stores them into two fields called missingAtoms and missingTerminals. 
# Each of these is a dictionary whose keys are Residue objects and whose values are lists of atom names. 
# missingAtoms contains standard atoms that should be present in any residue of that type, 
# while missingTerminals contains missing terminal atoms that should be present at the start or end of a chain. 
# You are free to remove atoms from these dictionaries before continuing, if you want to prevent certain atoms 
# from being added.

fixer.findMissingAtoms()


In [None]:
# addMissingAtoms() is the point at which all heavy atoms get added. 
# This includes the ones identified by findMissingAtoms() as well 
# as the missing residues identified by findMissingResidues(). 
# Also, if you used replaceNonstandardResidues() to modify any residues, 
# that will have removed any atoms that do not belong in the replacement residue, 
# but it will not have added ones that are missing from the original residue. 
# addMissingAtoms() is the point when those get added.

fixer.addMissingAtoms()

In [None]:
PDBFile.writeFile(fixer.topology, fixer.positions, open('../data/renumbered/4jha_clean.pdb', 'w'), keepIds=True)
