In [139]:
import csv
from simtk.openmm.app import PDBFile
from simtk.openmm.app.topology import Topology
import pdbfixer

# 5UDC

In [174]:
# Split chains 2 and 5 at 72/73
split_chains = [2, 5] # Index(es) of chain(s) to split
split_before_resid =  72 # ID of last residue to include in chain before split
split_after_resid = 73 # ID of first residue to after the chain split
input_filename = '../data/5udc/5udc_clean_nolongterms_noloop_noseqres_nogap.pdb'

split_chain(split_chains, split_before_resid, split_after_resid, input_filename)
cap_chain(input_filename)


In [177]:
# Note: make sure the filename lines are set correctly in functions below before runnning this cell
# Split chain 8 at 73/74 -- but since this takes in the file that has already split chains 2 and 5, 
# the chain index here is really 10
split_chains = [10] # Index(es) of chain(s) to split
split_before_resid =  73 # ID of last residue to include in chain before split
split_after_resid = 74 # ID of first residue to after the chain split
input_filename = '../data/5udc/5udc_clean_nolongterms_noloop_noseqres_nogap_splitchain_capped.pdb'

split_chain(split_chains, split_before_resid, split_after_resid, input_filename)
cap_chain(input_filename)

# 4JHW

In [158]:
split_chains = [2] # Index(es) of chain(s) to split
split_before_resid =  72 # ID of last residue to include in chain before split
split_after_resid = 73 # ID of first residue to after the chain split
input_filename = '../data/4jhw/4jhw_clean.pdb'

split_chain(split_chains, split_before_resid, split_after_resid, input_filename)
cap_chain(input_filename)




In [176]:
# Use OpenMM's Topology object to create a new topology with chains, residues, and atoms copied over 
# and desired chains split 

def split_chain(split_chains, split_before_resid, split_after_resid, input_filename):
    # Load topology
    pdb = PDBFile(input_filename)
    old_topology = pdb.getTopology()

    # Create new topology
    new_topology = Topology()

    # Copy residues and atoms to new topology and create new chain for residues after split
    d_old_to_new = {} # Key: atom in old topology, Value: atom in new topology
    for chain in old_topology.chains():
        new_chain = new_topology.addChain()
        if chain.index in split_chains:
            new_chain_split = new_topology.addChain()
            for res in chain.residues(): 
                if int(res.id)  <= split_before_resid:
                    new_res = new_topology.addResidue(res.name, new_chain, int(res.id))
                    for atom in res.atoms():
                        new_atom = new_topology.addAtom(atom.name, atom.element, new_res)
                        d_old_to_new[atom] = new_atom
                else:
                    new_res = new_topology.addResidue(res.name, new_chain_split, int(res.id))
                    for atom in res.atoms():
                        new_atom = new_topology.addAtom(atom.name, atom.element, new_res)
                        d_old_to_new[atom] = new_atom
        else:
            for res in chain.residues():
                new_res = new_topology.addResidue(res.name, new_chain, int(res.id))
                for atom in res.atoms():
                    new_atom = new_topology.addAtom(atom.name, atom.element, new_res)
                    d_old_to_new[atom] = new_atom


    # Copy bonds to new topology, except bond that connects chains to split
    split_before_residue = [res for res in old_topology.residues() if int(res.id) == split_before_resid and res.chain.index in split_chains][0]
    split_after_residue = [res for res in old_topology.residues() if int(res.id) == split_after_resid and res.chain.index in split_chains][0]
    atoms_in_split_before_residue = [atom for atom in split_before_residue.atoms()]
    for bond in old_topology.bonds():
        atom_1 = bond[0]
        atom_2 = bond[1]
        if atom_1 in atoms_in_split_before_residue:
            if atom_2.residue.name == split_after_residue.name:
                continue
        atom_1_new = d_old_to_new[atom_1]
        atom_2_new = d_old_to_new[atom_2]
        new_topology.addBond(atom_1_new, atom_2_new)

    # Write topology and positions to pdb
#     file = open(input_filename[:-11] + '.pdb', 'w')
    file = open(input_filename[:-4] + '_splitchain.pdb', 'w')

    PDBFile.writeFile(new_topology, pdb.positions, file)

In [175]:
# Use PDBFixer to cap the chains (add missing terminal atoms)

def cap_chain(input_filename):
    # Use PDBFixer to cap chains
#     fixer = pdbfixer.PDBFixer(filename=input_filename[:-11] + '.pdb')
    fixer = pdbfixer.PDBFixer(filename=input_filename[:-4] + '_splitchain.pdb')

    fixer.findMissingResidues()
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()

    # Write to PDB
#     PDBFile.writeFile(fixer.topology, fixer.positions, open(input_filename, 'w'))
    PDBFile.writeFile(fixer.topology, fixer.positions, open(input_filename[:-4] + '_splitchain_capped.pdb', 'w'))
