In [6]:
from pathlib import Path
import pandas as pd

import IMP
import IMP.pmi
import IMP.pmi.topology
import IMP.pmi.dof
import IMP.pmi.macros
import IMP.rmf
import RMF

In [7]:
def get_continous_ranges(numbers):
    start, end = numbers[0], numbers[0]
    ranges = list()
    for num in numbers[1:]:
        if num == end + 1:
            end = num
        else:
            ranges.append((start, end))
            start = num
            end = num

    return ranges

In [8]:
m = IMP.Model()
s = IMP.pmi.topology.System(m)

data_dir = Path(Path.home(), "Documents/mtorc2/data")
fasta_file = Path(data_dir, "fasta/mtorc2.fasta")
seqs = IMP.pmi.topology.Sequences(fasta_fn=str(fasta_file))
st = s.create_state()

param_df = pd.read_csv(Path(data_dir, "params/130.csv"))

for i in range(len(param_df)):
    pdb_file_orig = Path(param_df.iloc[i]["pdb_file"])
    new_path = Path(Path.home(), "Documents", *pdb_file_orig.parts[5:])
    param_df.at[i, "pdb_file"] = str(new_path)

print(param_df.head())


     name subunit   color  rb_start  rb_end  \
0    MTOR    MTOR    blue         1    2549   
1  RICTOR  RICTOR   green         1    1708   
2   MLST8   MLST8  yellow         1     326   
3   MSIN1   MSIN1     red         1     154   
4    CRIM   MSIN1     red       155     277   

                                            pdb_file  \
0  /Users/matthew/Documents/mtorc2/data/em/models...   
1  /Users/matthew/Documents/mtorc2/data/em/models...   
2  /Users/matthew/Documents/mtorc2/data/em/models...   
3  /Users/matthew/Documents/mtorc2/data/em/models...   
4  /Users/matthew/Documents/mtorc2/data/em/models...   

                                           em_prefix  res_per_comp  start_pdb  \
0                                                NaN             0          1   
1                                                NaN             0          1   
2                                                NaN             0          1   
3                                                NaN    

In [9]:
clones = dict()
mols = dict()
subunits = ["MTOR", "RICTOR", "MLST8", "MSIN1", "AKT1"]
for subunit in subunits:
    print(subunit)
    subunit_row_ids = list(param_df[param_df["subunit"] == subunit].index)
    color = param_df.iloc[subunit_row_ids[0], param_df.columns.get_loc("color")]
    chain = param_df.iloc[subunit_row_ids[0], param_df.columns.get_loc("model_chain")]

    mol = st.create_molecule(
        name=subunit,
        sequence=seqs[subunit],
        chain_id=chain
    )
    mols[subunit] = mol

    print(subunit_row_ids)
    for row_id in subunit_row_ids:
        name = param_df.iloc[row_id]["name"]
        pdb_file = Path(param_df.iloc[row_id]["pdb_file"])
        pdb_chain = param_df.iloc[row_id]["pdb_chain"]
        start_pdb = param_df.iloc[row_id]["start_pdb"]
        end_pdb = param_df.iloc[row_id]["end_pdb"]
        offset = int(param_df.iloc[row_id]["offset"])
        em_prefix = param_df.iloc[row_id]["em_prefix"]
        res_per_comp = int(param_df.iloc[row_id]["res_per_comp"])
        print(name, pdb_file, pdb_chain, start_pdb, end_pdb, offset, em_prefix)

        atom_res = mol.add_structure(
            pdb_fn=str(pdb_file),
            chain_id=pdb_chain,
            soft_check=True,
            res_range=(start_pdb, end_pdb),
            offset=offset,
            model_num=1
        )

        mol.add_representation(
            residues=atom_res,
            resolutions=[1,10],
            color=color
        )

    if subunit == "AKT1":
        print(mol.get_non_atomic_residues())
        flex_res = list(mol.get_non_atomic_residues())[:22]
    else:
        flex_res = list(mol.get_non_atomic_residues())

    mol.add_representation(
        flex_res,
        resolutions=[10],
        color=color,
        setup_particles_as_densities=False
    )

    print(subunit, len(mol.get_residues()))
    print(subunit, len(mol.get_atomic_residues()))
    print(subunit, len(mol.get_non_atomic_residues()))

    mol.get_non_atomic_residues()

    flex_res = list(mol.get_non_atomic_residues())
    indices = [res.get_index() for res in flex_res]

    print("flexible regions: ", get_continous_ranges(indices))
    print("structural coverage: ", 1-len(mol.get_non_atomic_residues())/len(mol.get_residues()))

# root_hier = s.build()
# dof = IMP.pmi.dof.DegreesOfFreedom(m)


MTOR
[0]
MTOR /Users/matthew/Documents/mtorc2/data/em/models/mTORC2-Akt_v1113_Dimer.pdb F 1 2549 0 nan




MTOR 2549
MTOR 2184
MTOR 365
flexible regions:  [(1, 13), (23, 45), (54, 79), (94, 119), (136, 140), (158, 161), (198, 202), (227, 232), (246, 256), (296, 359), (552, 569), (596, 598), (638, 645), (906, 925), (1241, 1261), (1815, 1869)]
structural coverage:  0.8568065908199294
RICTOR
[1]
RICTOR /Users/matthew/Documents/mtorc2/data/em/models/mTORC2-Akt_v1113_Dimer.pdb H 1 1708 0 nan
begin read_pdb:
end read_pdb
RICTOR 1708
RICTOR 1171
RICTOR 537
flexible regions:  [(1, 21), (859, 865), (1018, 1418), (1450, 1477), (1540, 1605)]
structural coverage:  0.6855971896955504
MLST8
[2]
MLST8 /Users/matthew/Documents/mtorc2/data/em/models/mTORC2-Akt_v1113_Dimer.pdb G 1 326 0 nan
MLST8 326
MLST8 317
MLST8 9
flexible regions:  [(1, 7)]
structural coverage:  0.9723926380368099
MSIN1
[3, 4, 5, 6]
MSIN1 /Users/matthew/Documents/mtorc2/data/em/models/mTORC2-Akt_v1113_Dimer.pdb I 1 154 0 nan
CRIM /Users/matthew/Documents/mtorc2/data/em/models/mTORC2-Akt_v1113_Dimer.pdb I 155 267 0 /wynton/home/sali/mhan



RBD /Users/matthew/Documents/mtorc2/data/pdb/7lc1.pdb B 278 353 0 nan
MSIN1PH /Users/matthew/Documents/mtorc2/data/pdb/7lc1.pdb B 383 481 0 nan
MSIN1 522
MSIN1 403
MSIN1 119
flexible regions:  [(40, 65), (184, 191), (268, 277), (354, 382), (416, 420)]
structural coverage:  0.7720306513409961
AKT1
[7, 8, 9]
AKT1PH /Users/matthew/Documents/mtorc2/data/pdb/akt1_af.pdb H 1 123 0 nan
KINASEN /Users/matthew/Documents/mtorc2/data/em/models/mTORC2-Akt_v1113_Dimer.pdb J 144 229 0 /wynton/home/sali/mhancock/mtorc2/data/em/comps/1131/10/KINASEN
begin read_pdb:
end read_pdb
KINASEC /Users/matthew/Documents/mtorc2/data/em/models/mTORC2-Akt_v1113_Dimer.pdb J 234 426 0 /wynton/home/sali/mhancock/mtorc2/data/em/comps/1131/10/KINASEC
begin read_pdb:
end read_pdb
OrderedSet([0_AKT1_0_S124, 0_AKT1_0_P125, 0_AKT1_0_S126, 0_AKT1_0_D127, 0_AKT1_0_N128, 0_AKT1_0_S129, 0_AKT1_0_G130, 0_AKT1_0_A131, 0_AKT1_0_E132, 0_AKT1_0_E133, 0_AKT1_0_M134, 0_AKT1_0_E135, 0_AKT1_0_V136, 0_AKT1_0_S137, 0_AKT1_0_L138, 0_AKT1_

### Count the number of unmodeled residues

In [27]:
unmodeled_residues = list()
for subunit in subunits:
    flex_res = mols[subunit].get_non_atomic_residues()
    unmodeled_residues.extend(flex_res)
    print(subunit, len(flex_res))

MTOR 365
RICTOR 537
MLST8 9
MSIN1 119
AKT1 84


In [28]:
unmodeled_residues.extend(mols["AKT1"].residue_range(1,143))
unmodeled_residues.extend(mols["MSIN1"].residue_range(278,522))

In [32]:
len(set(unmodeled_residues))

1406

In [33]:
all_res = list()
for subunit in subunits:
    all_res.extend(mols[subunit].get_residues())

print(len(all_res))


5585
