In [5]:
from simtk.openmm.app import PDBFile
from simtk.openmm.app.topology import Topology
import pdbfixer
import string
import pickle

# 5UDC

In [14]:
# Split chain and cap on re-refined PDB (after removing part of FP and c-term not present in 5k6f)
split_chains = [2, 5] # Index(es) of chain(s) to split
split_before_resid =  98 # ID of last residue to include in chain before split
split_after_resid = 145 # ID of first residue after the chain split
input_filename = '../data/renumbered/test/5udc_final_v2_refmac1.pdb'
cterm_before_resid = 509 # ID of last residue to include before cutting off rest of the the c-terminal residues

split_chain(split_chains, split_before_resid, split_after_resid, input_filename, cterm_before_resid)

In [15]:
# Split chain and cap on re-refined PDB (after removing part of FP and c-term not present in 5k6f)
# Split chain 8 (now chain 10) at 97/137, make sure to switch the appropriate read/write lines in the functions below
split_chains = [10] # Index(es) of chain(s) to split
split_before_resid =  97 # ID of last residue to include in chain before split
split_after_resid = 145 # ID of first residue after the chain split
input_filename = '../data/renumbered/test/5udc_final_v2_refmac1_splitchain.pdb'
cterm_before_resid = 509 # ID of last residue to include before cutting off rest of the the c-terminal residues

split_chain(split_chains, split_before_resid, split_after_resid, input_filename, cterm_before_resid, first_split=False)
cap_chain(input_filename, first_split=False)

# 4JHW

In [11]:
# Split chain and cap on re-refined PDB (after removing part of FP and c-term not present in 5k6f)
split_chains = [2] # Index(es) of chain(s) to split
split_before_resid =  97 # ID of last residue to include in chain before split
split_after_resid = 145 # ID of first residue after the chain split
input_filename = '../data/renumbered/test/4jhw_final_v2_refmac1.pdb'
cterm_before_resid = 509 # ID of last residue to include before cutting off rest of the the c-terminal residues

split_chain(split_chains, split_before_resid, split_after_resid, input_filename, cterm_before_resid)
cap_chain(input_filename)

# 5k6f

In [20]:
# Split chain and capping on re-refined PDB
split_chains = [0] # Index(es) of chain(s) to split
split_before_resid =  98 # ID of last residue to include in chain before split
split_after_resid = 145 # ID of first residue after the chain split
input_filename = '../data/renumbered/keep_chain_id_res_num/5k6f.pdb'

split_chain(split_chains, split_before_resid, split_after_resid, input_filename)
cap_chain(input_filename)



# 4jhw trimer + single antibody

In [8]:
# Split chain and cap on re-refined PDB (after removing part of FP and c-term not present in 5k6f)
split_chains = [2, 5, 8] # Index(es) of chain(s) to split
split_before_resid =  97 # ID of last residue to include in chain before split
split_after_resid = 145 # ID of first residue after the chain split
input_filename = '../data/renumbered/keep_chain_id_res_num/4jhw_trimer.pdb'
cterm_before_resid = 509 # ID of last residue to include before cutting off rest of the the c-terminal residues

split_chain(split_chains, split_before_resid, split_after_resid, input_filename, cterm_before_resid)
cap_chain(input_filename)

# Functions

In [2]:
# Use OpenMM's Topology object to create a new topology with chains, residues, and atoms copied over 
# and desired chains split. Also allows optional deletion of c-terminal residues in the chains to be split.
# NOTE: THIS FUNCTION MAINTAINS INSERTION CODES
# split_chains : Index(es) of chain(s) to split
# split_before_resid : ID of last residue to include before the chain split
# split_after_resid : ID of first residue after the chain split
# input_filename : name of input PDB file
# cterm_before_resid : (optional) ID of last residue to include before cutting off rest of the the c-terminal residues

def split_chain(split_chains, split_before_resid, split_after_resid, input_filename, cterm_before_resid=10000, first_split=True):
    d_chains = {"F": "X", "A": "Y", "D": "Z"}
    
    if cterm_before_resid==10000:
        print("Warning: cterm_before_resid not set")
    
    # Load topology
    pdb = PDBFile(input_filename)
    old_topology = pdb.getTopology()

    # Create new topology
    new_topology = Topology()
    new_topology.setPeriodicBoxVectors(old_topology.getPeriodicBoxVectors())

    # Copy residues and atoms to new topology and create new chain for residues after split
    # Renumber residues so that they are contiguous and maintain gaps
    d_old_to_new = {} # Key: atom in old topology, Value: atom in new topology
    d_new_to_old_res = {} # Key: residue in old topology, Value: residue in new topology
    n_split_chains = 0
    for chain in old_topology.chains():
        old_chain_id = chain.id
        new_chain = new_topology.addChain(id=old_chain_id)
        
        previous_res_cur = 0
        previous_res_old = 0
        
        if chain.index in split_chains:
            new_chain_split = new_topology.addChain(id=d_chains[old_chain_id])
            for res in chain.residues(): 
                
                # Copy residues and atoms
                if int(res.id) <= split_before_resid:
                    new_res = new_topology.addResidue(res.name, new_chain, id=res.id, insertionCode=res.insertionCode)
                    for atom in res.atoms():
                        new_atom = new_topology.addAtom(atom.name, atom.element, new_res)
                        d_old_to_new[atom] = new_atom
                elif int(res.id) >= split_after_resid:
                    if int(res.id) > cterm_before_resid: # Do not add residues after specified last cterm residue
                        continue
                    new_res = new_topology.addResidue(res.name, new_chain_split, id=res.id, insertionCode=res.insertionCode)
                    for atom in res.atoms():
                        new_atom = new_topology.addAtom(atom.name, atom.element, new_res)
                        d_old_to_new[atom] = new_atom

            n_split_chains += 1
        else:
            for res in chain.residues():
                # Copy residues and atoms
                new_res = new_topology.addResidue(res.name, new_chain, id=res.id, insertionCode=res.insertionCode)
                
                for atom in res.atoms():
                    new_atom = new_topology.addAtom(atom.name, atom.element, new_res)
                    d_old_to_new[atom] = new_atom
    

    # Copy bonds to new topology, except bonds that connects chains to split
    atoms_to_delete = []
    for res in old_topology.residues():
        if res.chain.index in split_chains:
            if ((int(res.id) > split_before_resid) and (int(res.id) < split_after_resid)) or (int(res.id) > cterm_before_resid):
                for atom in res.atoms():
                    atoms_to_delete.append(atom)
    for bond in old_topology.bonds():
        atom_1 = bond[0]
        atom_2 = bond[1]
        if (atom_1.residue.chain.index in split_chains) and (atom_2.residue.chain.index in split_chains):
            if (int(atom_1.residue.id) == split_before_resid) and (int(atom_2.residue.id) == split_after_resid):
                continue
            elif (atom_1 in atoms_to_delete) or (atom_2 in atoms_to_delete):
                continue
        atom_1_new = d_old_to_new[atom_1]
        atom_2_new = d_old_to_new[atom_2]
        new_topology.addBond(atom_1_new, atom_2_new)
        
    # Delete positions of atoms to be deleted
    atoms_to_delete.reverse() # Reverse elements of list so that indices of atoms to be deleted are not affected by previous deletions
    for atom in atoms_to_delete:
        del[pdb.positions[atom.index]]
 
    # Write topology and positions to pdb
    if first_split:
        filename = input_filename[:-4] + '_splitchain.pdb'
    else:
        filename = input_filename
    
    pdb_file = open(filename, 'w')
    PDBFile.writeFile(new_topology, pdb.positions, pdb_file, keepIds=True)
    

In [3]:
# Use PDBFixer to cap the chains (add missing terminal atoms)

def cap_chain(input_filename, first_split=True):
    # Use PDBFixer to cap chains
    if first_split:
        filename = input_filename[:-4] + '_splitchain.pdb'
    else:
        filename = input_filename
    fixer = pdbfixer.PDBFixer(filename=filename)
    
    fixer.findMissingResidues()
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    fixer.removeHeterogens(False)

    # Write to PDB
    file = open(filename[:-4] + '_capped.pdb', 'w')
    
    PDBFile.writeFile(fixer.topology, fixer.positions, file, keepIds=True)


