In [3]:
import pdbfixer
import mdtraj as md
from simtk.openmm.app import PDBFile
from simtk.openmm.app.topology import Topology

# 5UDC

In [26]:
# Load the file into PDBFixer
fixer = pdbfixer.PDBFixer(filename='../data/rerefinement_gyorgy/5udc_final_v2_refmac1_splitchain_capped.pdb')

In [18]:
# Identify missing residues
fixer.findMissingResidues()
fixer.missingResidues

{(0, 0): ['GLN'],
 (0, 139): ['SER', 'LYS', 'SER', 'THR', 'SER', 'GLY'],
 (0, 217): ['GLU', 'PRO', 'LYS', 'SER'],
 (1, 211): ['GLY', 'GLU', 'CYS'],
 (2, 0): ['MET',
  'GLU',
  'LEU',
  'LEU',
  'ILE',
  'LEU',
  'LYS',
  'ALA',
  'ASN',
  'ALA',
  'ILE',
  'THR',
  'THR',
  'ILE',
  'LEU',
  'THR',
  'ALA',
  'VAL',
  'THR',
  'PHE',
  'CYS',
  'PHE',
  'ALA',
  'SER',
  'GLY',
  'GLN'],
 (4, 0): ['GLN'],
 (4, 139): ['SER', 'LYS', 'SER', 'THR', 'SER', 'GLY', 'GLY'],
 (4, 218): ['LYS', 'SER'],
 (5, 211): ['GLY', 'GLU', 'CYS'],
 (6, 0): ['MET',
  'GLU',
  'LEU',
  'LEU',
  'ILE',
  'LEU',
  'LYS',
  'ALA',
  'ASN',
  'ALA',
  'ILE',
  'THR',
  'THR',
  'ILE',
  'LEU',
  'THR',
  'ALA',
  'VAL',
  'THR',
  'PHE',
  'CYS',
  'PHE',
  'ALA',
  'SER',
  'GLY',
  'GLN'],
 (8, 139): ['SER', 'SER', 'LYS', 'SER', 'THR', 'SER', 'GLY', 'GLY'],
 (9, 213): ['CYS'],
 (10, 0): ['MET',
  'GLU',
  'LEU',
  'LEU',
  'ILE',
  'LEU',
  'LYS',
  'ALA',
  'ASN',
  'ALA',
  'ILE',
  'THR',
  'THR',
  'ILE',
 

In [19]:
# Remove missing residues if they are part of terminal fragments 
chains = list(fixer.topology.chains())
keys = fixer.missingResidues.keys()
for key in list(keys): # Declare as list because makes a copy of the dict keys
    chain = chains[key[0]]
    if key[1] == 0 or key[1] == len(list(chain.residues())):
        if len(fixer.missingResidues[key]) > 10: # Do not add back terminal fragment if its longer than 10 residues
            if fixer.missingResidues[key][0] == 'LEU':
                fixer.missingResidues[key] = ['LEU'] # Add LEU back in even though loop will not be added back in
            elif fixer.missingResidues[key][-1] == 'GLN':
                fixer.missingResidues[key] = ['GLN'] # Add GLN back in even though loop will not be added back in
            else:
                print(key, chain, fixer.missingResidues[key])
                del fixer.missingResidues[key]

In [20]:
fixer.missingResidues

{(0, 0): ['GLN'],
 (0, 139): ['SER', 'LYS', 'SER', 'THR', 'SER', 'GLY'],
 (0, 217): ['GLU', 'PRO', 'LYS', 'SER'],
 (1, 211): ['GLY', 'GLU', 'CYS'],
 (2, 0): ['GLN'],
 (4, 0): ['GLN'],
 (4, 139): ['SER', 'LYS', 'SER', 'THR', 'SER', 'GLY', 'GLY'],
 (4, 218): ['LYS', 'SER'],
 (5, 211): ['GLY', 'GLU', 'CYS'],
 (6, 0): ['GLN'],
 (8, 139): ['SER', 'SER', 'LYS', 'SER', 'THR', 'SER', 'GLY', 'GLY'],
 (9, 213): ['CYS'],
 (10, 0): ['GLN'],
 (10, 71): ['GLN']}

In [21]:
# Identify nonstandard residues
fixer.findNonstandardResidues()
fixer.nonstandardResidues

[]

In [22]:
# Remove heterogens

# The argument specifies whether to keep water molecules. 
# False removes all heterogens including water. 
# True keeps water molecules while removing all other heterogens.

fixer.removeHeterogens(False)


In [23]:
# findMissingAtoms() identifies all missing heavy atoms 
# and stores them into two fields called missingAtoms and missingTerminals. 
# Each of these is a dictionary whose keys are Residue objects and whose values are lists of atom names. 
# missingAtoms contains standard atoms that should be present in any residue of that type, 
# while missingTerminals contains missing terminal atoms that should be present at the start or end of a chain. 
# You are free to remove atoms from these dictionaries before continuing, if you want to prevent certain atoms 
# from being added.

fixer.findMissingAtoms()

In [24]:
# addMissingAtoms() is the point at which all heavy atoms get added. 
# This includes the ones identified by findMissingAtoms() as well 
# as the missing residues identified by findMissingResidues(). 
# Also, if you used replaceNonstandardResidues() to modify any residues, 
# that will have removed any atoms that do not belong in the replacement residue, 
# but it will not have added ones that are missing from the original residue. 
# addMissingAtoms() is the point when those get added.

fixer.addMissingAtoms()

In [25]:
PDBFile.writeFile(fixer.topology, fixer.positions, open('../data/rerefinement_gyorgy/5udc_final_v2_refmac1_clean.pdb', 'w'))


# 4JHW

In [8]:
# Load the file into PDBFixer
fixer = pdbfixer.PDBFixer(filename="../data/rerefinement_gyorgy/4jhw_final_v2_refmac1_splitchain_capped.pdb")


In [9]:
# Identify missing residues
fixer.findMissingResidues()
fixer.missingResidues

{(0, 224): ['GLU', 'PRO', 'LYS', 'SER', 'CYS', 'ASP'],
 (1, 211): ['GLY', 'GLU', 'CYS'],
 (2, 72): ['GLN']}

In [10]:
# Remove missing residues if they are part of terminal fragments 
chains = list(fixer.topology.chains())
keys = fixer.missingResidues.keys()
for key in list(keys): # Declare as list because makes a copy of the dict keys
    chain = chains[key[0]]
    if chain.index == 0: # Do not add back KS at the c-term of chain 0
        fixer.missingResidues[key] = ['LYS', 'SER']
    if key[1] == 0 or key[1] == len(list(chain.residues())):
        if len(fixer.missingResidues[key]) > 10: # Do not add back terminal fragment if its longer than 10 residues
            print(key, chain)
            del fixer.missingResidues[key]

In [11]:
fixer.missingResidues

{(0, 224): ['LYS', 'SER'], (1, 211): ['GLY', 'GLU', 'CYS'], (2, 72): ['GLN']}

In [12]:
# Identify nonstandard residues
fixer.findNonstandardResidues()
fixer.nonstandardResidues
# fixer.replaceNonstandardResidues()

[]

In [13]:
# Remove heterogens

# The argument specifies whether to keep water molecules. 
# False removes all heterogens including water. 
# True keeps water molecules while removing all other heterogens.

fixer.removeHeterogens(False)


In [14]:
# findMissingAtoms() identifies all missing heavy atoms 
# and stores them into two fields called missingAtoms and missingTerminals. 
# Each of these is a dictionary whose keys are Residue objects and whose values are lists of atom names. 
# missingAtoms contains standard atoms that should be present in any residue of that type, 
# while missingTerminals contains missing terminal atoms that should be present at the start or end of a chain. 
# You are free to remove atoms from these dictionaries before continuing, if you want to prevent certain atoms 
# from being added.

fixer.findMissingAtoms()


In [15]:
# addMissingAtoms() is the point at which all heavy atoms get added. 
# This includes the ones identified by findMissingAtoms() as well 
# as the missing residues identified by findMissingResidues(). 
# Also, if you used replaceNonstandardResidues() to modify any residues, 
# that will have removed any atoms that do not belong in the replacement residue, 
# but it will not have added ones that are missing from the original residue. 
# addMissingAtoms() is the point when those get added.

fixer.addMissingAtoms()

In [16]:
PDBFile.writeFile(fixer.topology, fixer.positions, open("../data/rerefinement_gyorgy/4jhw_final_v2_refmac1_clean.pdb", 'w'))
