In [148]:
from simtk.openmm.app import PDBFile
from simtk.openmm.app.topology import Topology
import pdbfixer
import string

# 5UDC

In [82]:
split_chains = [2, 5, 8] # Index(es) of chain(s) to split
split_before_resid =  73 # ID of last residue to include in chain before split
split_after_resid = 74 # ID of first residue to after the chain split
input_filename = '../data/5udc/5udc_clean_nolongterms_noloop_noseqres_nogap.pdb'

split_chain(split_chains, split_before_resid, split_after_resid, input_filename)
cap_chain(input_filename)


In [149]:
# Check if fragmented loop exists when directly splitting chain and capping on raw PDB
# Note: chain 8 still needs to be split, but at 97/137 because Q is missing
split_chains = [2, 5] # Index(es) of chain(s) to split
split_before_resid =  98 # ID of last residue to include in chain before split
split_after_resid = 137 # ID of first residue to after the chain split
input_filename = '../data/5udc/5udc.pdb'

new_top = split_chain(split_chains, split_before_resid, split_after_resid, input_filename)
# cap_chain(input_filename)



In [151]:
# Check if fragmented loop exists when directly splitting chain and capping on raw PDB
# Split chain 8 (now chain 10) at 97/137, make sure to switch the appropriate read/write lines in the functions below
split_chains = [10] # Index(es) of chain(s) to split
split_before_resid =  97 # ID of last residue to include in chain before split
split_after_resid = 137 # ID of first residue to after the chain split
input_filename = '../data/5udc/5udc_splitchain.pdb'

split_chain(split_chains, split_before_resid, split_after_resid, input_filename)
cap_chain(input_filename)


In [157]:
# Split chain and capping on re-refined PDB

# Note: chain 8 still needs to be split, but at 97/137 because Q is missing
split_chains = [2, 5] # Index(es) of chain(s) to split
split_before_resid =  98 # ID of last residue to include in chain before split
split_after_resid = 137 # ID of first residue to after the chain split
input_filename = '../data/rerefinement_gyorgy/5udc_final_v2_refmac1.pdb'

new_top = split_chain(split_chains, split_before_resid, split_after_resid, input_filename)
# cap_chain(input_filename)


In [161]:
# Check if fragmented loop exists when directly splitting chain and capping on raw PDB
# Split chain 8 (now chain 10) at 97/137, make sure to switch the appropriate read/write lines in the functions below
split_chains = [10] # Index(es) of chain(s) to split
split_before_resid =  97 # ID of last residue to include in chain before split
split_after_resid = 137 # ID of first residue to after the chain split
input_filename = '../data/rerefinement_gyorgy/5udc_final_v2_refmac1_splitchain.pdb'

split_chain(split_chains, split_before_resid, split_after_resid, input_filename)
cap_chain(input_filename)


# 4JHW

In [42]:
split_chains = [2] # Index(es) of chain(s) to split
split_before_resid =  72 # ID of last residue to include in chain before split
split_after_resid = 73 # ID of first residue to after the chain split
input_filename = '../data/4jhw/4jhw_clean.pdb'

split_chain(split_chains, split_before_resid, split_after_resid, input_filename)
cap_chain(input_filename)

In [43]:
# Check if fragmented loop exists when directly splitting chain and capping on raw PDB
split_chains = [2] # Index(es) of chain(s) to split
split_before_resid =  97 # ID of last residue to include in chain before split
split_after_resid = 137 # ID of first residue to after the chain split
input_filename = '../data/4jhw/4jhw.pdb'

split_chain(split_chains, split_before_resid, split_after_resid, input_filename)
cap_chain(input_filename)



In [156]:
# Split chain and capping on re-refined PDB
split_chains = [2] # Index(es) of chain(s) to split
split_before_resid =  97 # ID of last residue to include in chain before split
split_after_resid = 137 # ID of first residue to after the chain split
input_filename = '../data/rerefinement_gyorgy/4jhw_final_v2_refmac1.pdb'

split_chain(split_chains, split_before_resid, split_after_resid, input_filename)
cap_chain(input_filename)



# Functions

In [158]:
# Use OpenMM's Topology object to create a new topology with chains, residues, and atoms copied over 
# and desired chains split 

def split_chain(split_chains, split_before_resid, split_after_resid, input_filename):
    # Load topology
    pdb = PDBFile(input_filename)
    old_topology = pdb.getTopology()

    # Create new topology
    new_topology = Topology()

    # Copy residues and atoms to new topology and create new chain for residues after split
    # Renumber residues so that they are contiguous and maintain gaps
    d_old_to_new = {} # Key: atom in old topology, Value: atom in new topology
    for chain in old_topology.chains():
        chain_id = string.ascii_lowercase[chain.index].upper()
        new_chain = new_topology.addChain(id=chain_id)
        
        previous_res_cur = 0
        previous_res_old = 0
        
        if chain.index in split_chains:
            new_chain_id = string.ascii_lowercase[chain.index+1].upper()
            new_chain_split = new_topology.addChain(id=new_chain_id)
            for res in chain.residues(): 
                
                res_old = int(res.id)
                if int(res.id) == previous_res_cur:
                    res.id = str(int(res.id) + 1)
                elif int(res.id) == previous_res_old:
                    res.id = str(previous_res_cur + 1)
                elif int(res.id) < previous_res_cur:
                    res.id = str(previous_res_cur + (res_old - previous_res_old))
                
                if int(res.id)  <= split_before_resid:
                    new_res = new_topology.addResidue(res.name, new_chain, id=res.id)
                    for atom in res.atoms():
                        new_atom = new_topology.addAtom(atom.name, atom.element, new_res)
                        d_old_to_new[atom] = new_atom
                else:
                    new_res = new_topology.addResidue(res.name, new_chain_split, id=res.id)
                    for atom in res.atoms():
                        new_atom = new_topology.addAtom(atom.name, atom.element, new_res)
                        d_old_to_new[atom] = new_atom
        
                previous_res_cur = int(res.id)
                previous_res_old = int(res_old)
        else:
            for res in chain.residues():
                
                res_old = int(res.id)
                if int(res.id) == previous_res_cur:
                    res.id = str(int(res.id) + 1)
                elif int(res.id) == previous_res_old:
                    res.id = str(previous_res_cur + 1)
                elif int(res.id) < previous_res_cur:
                    res.id = str(previous_res_cur + (res_old - previous_res_old))
                
                new_res = new_topology.addResidue(res.name, new_chain, id=res.id)
                
                for atom in res.atoms():
                    new_atom = new_topology.addAtom(atom.name, atom.element, new_res)
                    d_old_to_new[atom] = new_atom
                    
                previous_res_cur = int(res.id)
                previous_res_old = int(res_old)
    

    # Copy bonds to new topology, except bond that connects chains to split
    split_before_residue = [res for res in old_topology.residues() if int(res.id) == split_before_resid and res.chain.index in split_chains][0]
    split_after_residue = [res for res in old_topology.residues() if int(res.id) == split_after_resid and res.chain.index in split_chains][0]
    atoms_in_split_before_residue = [atom for atom in split_before_residue.atoms()]
    for bond in old_topology.bonds():
        atom_1 = bond[0]
        atom_2 = bond[1]
        if atom_1 in atoms_in_split_before_residue:
            if atom_2.residue.name == split_after_residue.name:
                continue
        atom_1_new = d_old_to_new[atom_1]
        atom_2_new = d_old_to_new[atom_2]
        new_topology.addBond(atom_1_new, atom_2_new)

    # Write topology and positions to pdb
#     file = open(input_filename[:-4] + '_splitchain.pdb', 'w')
    file = open(input_filename, 'w')

    PDBFile.writeFile(new_topology, pdb.positions, file, keepIds=True)
    return new_topology

In [159]:
# Use PDBFixer to cap the chains (add missing terminal atoms)

def cap_chain(input_filename):
    # Use PDBFixer to cap chains
#     fixer = pdbfixer.PDBFixer(filename=input_filename[:-4] + '_splitchain.pdb')
    fixer = pdbfixer.PDBFixer(filename=input_filename)


    fixer.findMissingResidues()
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()

    # Write to PDB
#     file = open(input_filename[:-4] + '_splitchain_capped.pdb', 'w')
    file = open(input_filename[:-4] + '_capped.pdb', 'w')
    PDBFile.writeFile(fixer.topology, fixer.positions, file, keepIds=True)

