In [1]:
from pathlib import Path
import IMP.pmi.restraints.crosslinking
import IMP.pmi.restraints.stereochemistry
import IMP.pmi.restraints.em
import IMP.pmi.restraints.basic
import IMP.bayesianem
import IMP.bayesianem.restraint
import IMP.algebra
import IMP.pmi.restraints.stereochemistry
import IMP.pmi.restraints.basic
import IMP
import IMP.pmi
import IMP.pmi.topology
import IMP.pmi.dof
import IMP.pmi.macros
import time
import sys
import math
sys.path.append("/home/matthew/mtorc2/src")
sys.path.append("/wynton/home/sali/mhancock/mtorc2/src")
from restraints import get_xl_restraint

In [48]:
m = IMP.Model()
s = IMP.pmi.topology.System(m)

glob_data_dir = Path(Path.home(), "mtorc2/data")
fasta_file = Path(glob_data_dir, "fasta/mtorc2.domain.fasta")
seqs = IMP.pmi.topology.Sequences(fasta_fn=str(fasta_file))
st = s.create_state()

structures = dict()
pdb_file = Path(Path.home(), "mtorc2/data/pdb/1601720.no_bead.pdb")
structures["MTOR"] = (pdb_file, "A", "orange", "J")
structures["RICTOR"] = (pdb_file, "B", "blue", "K")
structures["MLST8"] = (pdb_file, "C", "green", "L")
structures["MSIN1"] = (pdb_file, "D", "red", "M")
structures["CRIM"] = (pdb_file, "E", "purple", "N")
structures["RBD"] = (pdb_file, "F", "brown", "O")
structures["MSIN1PH"] = (pdb_file, "G", "tan", "P")
structures["AKT1PH"] = (pdb_file, "H", "salmon", "Q")
structures["KINASE"] = (pdb_file, "I", "black", "R")

clones = dict()
mols = dict()
for component in structures.keys():
    print(component)
    pdb_file, chain, color, clone_chain = structures[component]

    mol = st.create_molecule(
        name=component,
        sequence=seqs[component],
        chain_id=chain
    )
    mols[component] = mol

    atom = mol.add_structure(
        pdb_fn=str(pdb_file),
        chain_id=chain,
        soft_check=True
    )
    
    atomic_res = mol.get_atomic_residues()
    non_atomic_res = mol.get_non_atomic_residues()
    seq_length = len(atomic_res) + len(non_atomic_res)
    
    print(seq_length)

    atomic_res_ids = list() 
    for res in atomic_res: 
        atomic_res_ids.append(int(str(res).split("_")[-1][1:]))

    non_atomic_res_ids = list() 
    for res in non_atomic_res: 
        non_atomic_res_ids.append(int(str(res).split("_")[-1][1:]))
        
        
    i = 1 
    entries = "" 
    prev_flex = False 
    while i < seq_length + 1: 
        cur_flex = False 

        if i in non_atomic_res_ids: 
            cur_flex = True 

        if cur_flex and not prev_flex: 
            start = i 

        if (prev_flex and not cur_flex) or (cur_flex and i == seq_length): 
            end = i 
            entry = "({},{},{})".format(start, end-1, '"{}"'.format(component))
            entries = entries + entry + ","

        prev_flex = cur_flex     
        i = i+1
        
    print(entries)

MTOR
2549
(1,60,"MTOR"),(75,81,"MTOR"),(247,257,"MTOR"),(290,385,"MTOR"),(405,409,"MTOR"),(467,477,"MTOR"),(549,578,"MTOR"),(634,643,"MTOR"),(904,932,"MTOR"),(1223,1260,"MTOR"),(1815,1866,"MTOR"),(2437,2491,"MTOR"),
RICTOR
1708
(1,25,"RICTOR"),(503,523,"RICTOR"),(638,646,"RICTOR"),(858,872,"RICTOR"),(1006,1422,"RICTOR"),(1441,1514,"RICTOR"),(1530,1604,"RICTOR"),(1696,1707,"RICTOR"),
MLST8
326
(1,7,"MLST8"),(325,325,"MLST8"),
MSIN1
157
(50,76,"MSIN1"),(133,156,"MSIN1"),
CRIM
122
(110,121,"CRIM"),
RBD
102
(75,101,"RBD"),
MSIN1PH
141
(33,48,"MSIN1PH"),(108,140,"MSIN1PH"),
AKT1PH
145
(117,144,"AKT1PH"),
KINASE
335
(301,334,"KINASE"),


In [None]:
for i in range(seq_length): 
    atomic_res
    

In [9]:
print(len(mol.get_atomic_residues()) + len(mol.get_non_atomic_residues()))

2549


In [7]:
print(mol.get_atomic_residues())

OrderedSet([0_KINASE_0_T1, 0_KINASE_0_M2, 0_KINASE_0_N3, 0_KINASE_0_E4, 0_KINASE_0_F5, 0_KINASE_0_E6, 0_KINASE_0_Y7, 0_KINASE_0_L8, 0_KINASE_0_K9, 0_KINASE_0_L10, 0_KINASE_0_L11, 0_KINASE_0_G12, 0_KINASE_0_K13, 0_KINASE_0_G14, 0_KINASE_0_T15, 0_KINASE_0_F16, 0_KINASE_0_G17, 0_KINASE_0_K18, 0_KINASE_0_V19, 0_KINASE_0_I20, 0_KINASE_0_L21, 0_KINASE_0_V22, 0_KINASE_0_K23, 0_KINASE_0_E24, 0_KINASE_0_K25, 0_KINASE_0_A26, 0_KINASE_0_T27, 0_KINASE_0_G28, 0_KINASE_0_R29, 0_KINASE_0_Y30, 0_KINASE_0_Y31, 0_KINASE_0_A32, 0_KINASE_0_M33, 0_KINASE_0_K34, 0_KINASE_0_I35, 0_KINASE_0_L36, 0_KINASE_0_K37, 0_KINASE_0_K38, 0_KINASE_0_E39, 0_KINASE_0_V40, 0_KINASE_0_I41, 0_KINASE_0_V42, 0_KINASE_0_A43, 0_KINASE_0_K44, 0_KINASE_0_D45, 0_KINASE_0_E46, 0_KINASE_0_V47, 0_KINASE_0_A48, 0_KINASE_0_H49, 0_KINASE_0_T50, 0_KINASE_0_L51, 0_KINASE_0_T52, 0_KINASE_0_E53, 0_KINASE_0_N54, 0_KINASE_0_R55, 0_KINASE_0_V56, 0_KINASE_0_L57, 0_KINASE_0_Q58, 0_KINASE_0_N59, 0_KINASE_0_S60, 0_KINASE_0_R61, 0_KINASE_0_H62, 0_KIN