In [1]:
import Bio.PDB
import numpy as np
import pandas as pd
from pathlib import Path
from matplotlib import pyplot as plt 

In [16]:
pdb_file = Path(Path.home(), "Documents/mtorc2/single_traj_experiments/exp_5/34/output_0/pdbs/model.0.pdb")
intra_xl_file = Path(Path.home(), "Documents/mtorc2/data/xlms/xl_intra_formatted.csv")
inter_xl_file = Path(Path.home(), "Documents/mtorc2/data/xlms/xl_inter_formatted.csv")


In [17]:
aas = ["ALA", "ARG", "ASN", "ASP", "CYS", "GLU", "GLN", "GLY", "HIS", "ILE", "LEU", "LYS", "MET", "PHE", "PRO", "SER", "THR", "TRP", "TYR", "VAL"]
pdb_parser = Bio.PDB.PDBParser(QUIET=True)
model = pdb_parser.get_structure("reference", str(pdb_file))[0]


In [23]:
chain_ids = dict() 
chain_ids["MTOR"] = "A"
chain_ids["RICTOR"] = "B"
chain_ids["MLST8"] = "C"
chain_ids["MTOR_B"] = "D"
chain_ids["RICTOR_B"] = "E"
chain_ids["MLST8_B"] = "F"


In [24]:
chains = dict() 
for chain in model:
    chains[chain.id] = chain

In [25]:
xls_df = pd.read_csv(inter_xl_file)
xls_df = pd.concat([xls_df, pd.read_csv(intra_xl_file)], axis=0)
xls_df.head()

Unnamed: 0.1,Unnamed: 0,prot1,res1,prot2,res2
0,0,MTOR,1197,RICTOR,516
1,1,MTOR,1218,RICTOR,516
2,2,RICTOR,1642,MSIN1,102
3,3,RICTOR,1092,MTOR,1993
4,4,RICTOR,1642,MSIN1,104


In [26]:
xls_df['contains'] = False
xls_df['satisfied'] = False
xls_df.head()

Unnamed: 0.1,Unnamed: 0,prot1,res1,prot2,res2,contains,satisfied
0,0,MTOR,1197,RICTOR,516,False,False
1,1,MTOR,1218,RICTOR,516,False,False
2,2,RICTOR,1642,MSIN1,102,False,False
3,3,RICTOR,1092,MTOR,1993,False,False
4,4,RICTOR,1642,MSIN1,104,False,False


In [27]:
# Remove any duplicate entries.
duplicates = list() 
for i in range(len(xls_df)): 
    prot1 = xls_df.iloc[i, xls_df.columns.get_loc("prot1")]
    prot2 = xls_df.iloc[i, xls_df.columns.get_loc("prot2")]
    res1 = xls_df.iloc[i, xls_df.columns.get_loc("res1")]
    res2 = xls_df.iloc[i, xls_df.columns.get_loc("res2")]
    
    for j in range(i+1, len(xls_df)): 
        cur_prot1 = xls_df.iloc[j, xls_df.columns.get_loc("prot1")]
        cur_prot2 = xls_df.iloc[j, xls_df.columns.get_loc("prot2")]
        cur_res1 = xls_df.iloc[j, xls_df.columns.get_loc("res1")]
        cur_res2 = xls_df.iloc[j, xls_df.columns.get_loc("res2")]
        
        if prot1 == cur_prot1 and prot2 == cur_prot2 and res1 == cur_res1 and res2 == cur_res2: 
            duplicates.append((i,j))
        elif prot1 == cur_prot2 and prot2 == cur_prot1 and res1 == cur_res2 and res2 == cur_res1:
            duplicates.append((i,j))

            

In [28]:
xls_df = xls_df.reset_index()
xls_df = xls_df.drop(columns=["Unnamed: 0"])
xls_df = xls_df.drop(columns=["index"])

In [29]:
xls_df = xls_df.drop([second for first, second in duplicates])
xls_df.tail()

Unnamed: 0,prot1,res1,prot2,res2,contains,satisfied
213,MTOR,298,MTOR,309,False,False
214,AKT1,140,AKT1,214,False,False
215,MTOR,980,MTOR,1256,False,False
217,MTOR,309,MTOR,2370,False,False
218,RICTOR,1092,RICTOR,1107,False,False


In [37]:
# Flip MTOR so that it is always prot1. 
for i in range(len(xls_df)): 
    if xls_df.iloc[i, xls_df.columns.get_loc("prot2")] == "MTOR": 
        prot1_copy = xls_df.iloc[i, xls_df.columns.get_loc("prot1")]
        res1_copy = xls_df.iloc[i, xls_df.columns.get_loc("res1")]
        
        xls_df.iloc[i, xls_df.columns.get_loc("prot1")] = xls_df.iloc[i, xls_df.columns.get_loc("prot2")]
        xls_df.iloc[i, xls_df.columns.get_loc("res1")] = xls_df.iloc[i, xls_df.columns.get_loc("res2")]

        xls_df.iloc[i, xls_df.columns.get_loc("prot2")] = prot1_copy
        xls_df.iloc[i, xls_df.columns.get_loc("res2")] = res1_copy


In [38]:
print(len(xls_df))

200


In [39]:
xls_df["copy"] = "A"
xls_df.head()

Unnamed: 0,prot1,res1,prot2,res2,contains,satisfied,copy
0,MTOR,1197,RICTOR,516,False,False,A
1,MTOR,1218,RICTOR,516,False,False,A
2,RICTOR,1642,MSIN1,102,False,False,A
3,MTOR,1993,RICTOR,1092,False,False,A
4,RICTOR,1642,MSIN1,104,False,False,A


In [42]:
cutoff = 35
for i in range(len(xls_df)): 
    prot1 = xls_df.iloc[i]["prot1"]
    prot2 = xls_df.iloc[i]["prot2"]
    res1 = int(xls_df.iloc[i]["res1"])
    res2 = int(xls_df.iloc[i]["res2"])

    if prot1 == "AKT1" or prot2 == "AKT1": 
        continue
    if prot1 == "MSIN1" or prot2 == "MSIN1": 
        continue
        
    # Check if contains. 
    chain1 = chains[chain_ids[prot1]]
    chain2 = chains[chain_ids[prot2]]
    chain2_alt = chains[chain_ids[prot2+"_B"]]
    if chain1.__contains__(res1) and chain2.__contains__(res2): 
        # Check if satisfied. 
        xls_df.iloc[i, xls_df.columns.get_loc('contains')] = True
        pos1 = chain1.__getitem__(res1)['CA'].coord
        pos2 = chain2.__getitem__(res2)['CA'].coord
        l2norm = np.linalg.norm(pos1 - pos2)
        if l2norm < cutoff: 
#             print(prot1, prot2, res1, res2, l2norm)
            xls_df.iloc[i, xls_df.columns.get_loc('satisfied')] = True
            
        # Check if alternative location satisfies.
        pos2_alt = chain2_alt.__getitem__(res2)['CA'].coord
        l2norm_alt = np.linalg.norm(pos1 - pos2_alt)

        if l2norm_alt < l2norm: 
            xls_df.iloc[i, xls_df.columns.get_loc('copy')] = "B" 
            
            if l2norm_alt < cutoff: 
                xls_df.iloc[i, xls_df.columns.get_loc('satisfied')] = True
        
        print(prot1, prot2, res1, res2, l2norm, l2norm_alt)

MTOR MLST8 2374 86 17.096285 106.830894
MTOR RICTOR 2113 274 13.305445 85.71684
MTOR RICTOR 84 699 125.9005 32.909336
MTOR RICTOR 1766 586 133.25186 103.421715
MTOR RICTOR 2113 270 9.060133 85.87732
MTOR RICTOR 2370 126 53.77476 122.172
MTOR MTOR 2218 900 9.976622 121.841
RICTOR RICTOR 791 813 11.376289 152.46321
RICTOR RICTOR 764 800 12.168651 137.65236
RICTOR RICTOR 791 826 18.622501 162.20186
MTOR MTOR 1745 1655 17.491098 175.21837
RICTOR RICTOR 374 734 9.027147 106.351425
RICTOR RICTOR 541 582 15.148683 68.10061
RICTOR RICTOR 800 856 14.955779 134.9573
RICTOR RICTOR 800 734 17.629555 126.27381
RICTOR RICTOR 921 856 20.65406 140.76921
MTOR MTOR 1293 1267 12.758774 62.639378
RICTOR RICTOR 692 719 13.855777 123.041115
RICTOR RICTOR 239 916 10.650702 160.1669
RICTOR RICTOR 374 800 20.31657 121.011734
RICTOR RICTOR 764 856 15.677738 130.97762
MTOR MTOR 226 1277 10.873947 109.89177
MTOR MTOR 900 2352 19.275232 111.53141
MTOR MTOR 1406 2301 15.142129 73.017914
MTOR MTOR 2090 2045 14.40777

In [43]:
for i in range(len(xls_df)): 
    if xls_df.iloc[i, xls_df.columns.get_loc("contains")]: 
        if xls_df.iloc[i, xls_df.columns.get_loc("satisfied")]: 
            color="green"
        else: 
            color="red"
        
        
        res1 = xls_df.iloc[i, xls_df.columns.get_loc("res1")]
        res2 = xls_df.iloc[i, xls_df.columns.get_loc("res2")]
        
        chain1 = chain_ids[xls_df.iloc[i, xls_df.columns.get_loc("prot1")]]
        chain2 = chain_ids[xls_df.iloc[i, xls_df.columns.get_loc("prot2")]]
        
        if xls_df.iloc[i, xls_df.columns.get_loc("copy")] == "B": 
            chain2 = chain_ids[xls_df.iloc[i, xls_df.columns.get_loc("prot2")]+"_B"]

        print("#0:{}.{}@CA #0:{}.{}@CA {}".format(res1, chain1, res2, chain2, color))

#0:2374.A@CA #0:86.C@CA green
#0:2113.A@CA #0:274.B@CA green
#0:84.A@CA #0:699.E@CA green
#0:1766.A@CA #0:586.E@CA red
#0:2113.A@CA #0:270.B@CA green
#0:2370.A@CA #0:126.B@CA red
#0:2218.A@CA #0:900.A@CA green
#0:791.B@CA #0:813.B@CA green
#0:764.B@CA #0:800.B@CA green
#0:791.B@CA #0:826.B@CA green
#0:1745.A@CA #0:1655.A@CA green
#0:374.B@CA #0:734.B@CA green
#0:541.B@CA #0:582.B@CA green
#0:800.B@CA #0:856.B@CA green
#0:800.B@CA #0:734.B@CA green
#0:921.B@CA #0:856.B@CA green
#0:1293.A@CA #0:1267.A@CA green
#0:692.B@CA #0:719.B@CA green
#0:239.B@CA #0:916.B@CA green
#0:374.B@CA #0:800.B@CA green
#0:764.B@CA #0:856.B@CA green
#0:226.A@CA #0:1277.A@CA green
#0:900.A@CA #0:2352.A@CA green
#0:1406.A@CA #0:2301.A@CA green
#0:2090.A@CA #0:2045.A@CA green
#0:2370.A@CA #0:2166.A@CA green
#0:226.A@CA #0:1566.A@CA green
#0:84.A@CA #0:128.A@CA green
#0:1662.A@CA #0:898.A@CA green
#0:764.B@CA #0:791.B@CA green
#0:230.A@CA #0:1277.A@CA green
#0:1655.A@CA #0:1702.A@CA green
#0:1471.A@CA #0:1500.A@C