Here we will find AKT conformations that best satisfy the A site XLs.

In [22]:
import pandas as pd
from pathlib import Path
import IMP
import IMP.rmf
import RMF
import pandas as pd
import IMP.atom
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme()
import math

IMP.set_log_level(IMP.SILENT)

In [23]:
a_xl_file = Path(Path.home(), "Documents/mtorc2/data/xlms/csvs/a_site.csv")
mtorc_pdb_file = Path(Path.home(), "Documents/mtorc2/data/models/submission_3/136_2_1/cluster.0/cluster_center_model.pdb")
akt_pdb_dir = Path("../data/pdbs_align")
akt_rot_pdb_dir = Path("../data/pdbs_rot")

In [24]:
xl_df = pd.read_csv(Path(Path.home(), "Documents/mtorc2/data/xlms/csvs/a_site.csv"))
xl_df.head()

Unnamed: 0,prot1,res1,prot2,res2
0,AKT1,182,RICTOR,516
1,AKT1,189,MTOR,533
2,AKT1,276,RICTOR,800
3,MTOR,42,AKT1,284
4,MTOR,1256,AKT1,420


In [25]:
m_mtorc = IMP.Model()
h_mtorc = IMP.atom.read_pdb(str(mtorc_pdb_file), m_mtorc, IMP.atom.AllPDBSelector())

In [26]:
# Get all PDB files in the directory
pdb_files = list()
for pdb_file in Path(akt_pdb_dir).glob("*.pdb"):
    pdb_files.append(pdb_file)

for pdb_file in Path(akt_rot_pdb_dir).glob("*.pdb"):
    pdb_files.append(pdb_file)

akt_ms, akt_hs = list(), list()
for pdb_file in pdb_files:
    m_akt = IMP.Model()
    h_akt = IMP.atom.read_pdb(str(pdb_file), m_akt)
    akt_ms.append(m_akt)
    akt_hs.append(h_akt)





In [27]:
# Initialize new columns in the dataframe
xl_df['min_distance'] = np.inf
xl_df['best_akt_model'] = -1

chain_dict = {"MTOR": "A", "RICTOR": "B", "MLST8": "C", "MSIN1": "D", "AKT1": "E"}

# Function to calculate distance between two residues
def calculate_xl_distance(h1, h2, chain1, res1, chain2, res2):
    """Calculate distance between CA atoms of two residues"""
    try:
        # Get CA atoms for both residues
        sel1 = IMP.atom.Selection(h1, chain_id=chain1, residue_index=res1, atom_type=IMP.atom.AT_CA)
        sel2 = IMP.atom.Selection(h2, chain_id=chain2, residue_index=res2, atom_type=IMP.atom.AT_CA)

        pids1 = sel1.get_selected_particle_indexes()
        pids2 = sel2.get_selected_particle_indexes()

        if len(pids1) == 0 or len(pids2) == 0:
            return np.inf

        # Get coordinates
        p1 = h1.get_model().get_particle(pids1[0])
        p2 = h2.get_model().get_particle(pids2[0])

        coord1 = IMP.core.XYZ(p1).get_coordinates()
        coord2 = IMP.core.XYZ(p2).get_coordinates()

        # Calculate Euclidean distance
        distance = IMP.algebra.get_distance(coord1, coord2)
        return distance
    except:
        return np.inf

print("Processing crosslinks...")


Processing crosslinks...


In [28]:
# Main loop: iterate through each crosslink and find the AKT model with minimum distance
for xl_idx in range(len(xl_df)):
    prot1 = xl_df.iloc[xl_idx]['prot1']
    prot2 = xl_df.iloc[xl_idx]['prot2']
    res1 = xl_df.iloc[xl_idx]['res1']
    res2 = xl_df.iloc[xl_idx]['res2']

    min_distance = np.inf
    best_model = -1

    for i in range(len(akt_ms)):
        # Load the AKT model
        pdb_file = Path(pdb_files[i])
        m_akt = IMP.Model()
        h_akt = IMP.atom.read_pdb(str(pdb_file), m_akt, IMP.atom.AllPDBSelector())

        # Determine which hierarchies to use for each protein
        if prot1 == "AKT1":
            h1 = h_akt
        else:
            h1 = h_mtorc

        if prot1 == "MTOR" and res1 == 42:
            res1 = 38

        if prot2 == "AKT1":
            h2 = h_akt
        else:
            h2 = h_mtorc

        # Calculate distance for this AKT model
        distance = calculate_xl_distance(h1, h2, chain_dict[prot1], res1, chain_dict[prot2], res2)

        # Update minimum if this is better
        if distance < min_distance:
            min_distance = distance
            best_model = pdb_file

    # Store results in dataframe
    xl_df.at[xl_idx, 'min_distance'] = min_distance
    xl_df.at[xl_idx, 'best_akt_model'] = best_model

    print(f"XL {xl_idx}: {prot1}_{res1} - {prot2}_{res2}, Min distance: {min_distance:.2f} Å (Model {best_model})")

print("\\nProcessing complete!")
print(f"\\nSummary:")
print(f"Total crosslinks processed: {len(xl_df)}")
print(f"Crosslinks with finite distances: {(xl_df['min_distance'] != np.inf).sum()}")
print(f"Mean minimum distance: {xl_df[xl_df['min_distance'] != np.inf]['min_distance'].mean():.2f} Å")


  xl_df.at[xl_idx, 'best_akt_model'] = best_model


XL 0: AKT1_182 - RICTOR_516, Min distance: 18.23 Å (Model ../data/pdbs_align/23.pdb)
XL 1: AKT1_189 - MTOR_533, Min distance: 28.52 Å (Model ../data/pdbs_rot/45.pdb)
XL 2: AKT1_276 - RICTOR_800, Min distance: 22.31 Å (Model ../data/pdbs_align/58.pdb)
XL 3: MTOR_38 - AKT1_284, Min distance: 22.11 Å (Model ../data/pdbs_rot/12.pdb)
XL 4: MTOR_1256 - AKT1_420, Min distance: 15.10 Å (Model ../data/pdbs_align/62.pdb)
XL 5: MTOR_2090 - AKT1_189, Min distance: 47.59 Å (Model ../data/pdbs_align/10.pdb)
XL 6: RICTOR_719 - AKT1_163, Min distance: 32.33 Å (Model ../data/pdbs_align/23.pdb)
XL 7: RICTOR_813 - AKT1_163, Min distance: 9.33 Å (Model ../data/pdbs_align/58.pdb)
XL 8: RICTOR_545 - AKT1_163, Min distance: 13.53 Å (Model ../data/pdbs_align/37.pdb)
XL 9: AKT1_30 - RICTOR_813, Min distance: 23.03 Å (Model ../data/pdbs_align/45.pdb)
\nProcessing complete!
\nSummary:
Total crosslinks processed: 10
Crosslinks with finite distances: 10
Mean minimum distance: 23.21 Å


In [29]:
# Display the updated dataframe with new columns
print("Updated crosslink dataframe with minimum distances and best AKT models:")
xl_df


Updated crosslink dataframe with minimum distances and best AKT models:


Unnamed: 0,prot1,res1,prot2,res2,min_distance,best_akt_model
0,AKT1,182,RICTOR,516,18.229057,../data/pdbs_align/23.pdb
1,AKT1,189,MTOR,533,28.522141,../data/pdbs_rot/45.pdb
2,AKT1,276,RICTOR,800,22.313786,../data/pdbs_align/58.pdb
3,MTOR,42,AKT1,284,22.113582,../data/pdbs_rot/12.pdb
4,MTOR,1256,AKT1,420,15.098311,../data/pdbs_align/62.pdb
5,MTOR,2090,AKT1,189,47.593658,../data/pdbs_align/10.pdb
6,RICTOR,719,AKT1,163,32.325489,../data/pdbs_align/23.pdb
7,RICTOR,813,AKT1,163,9.334296,../data/pdbs_align/58.pdb
8,RICTOR,545,AKT1,163,13.525757,../data/pdbs_align/37.pdb
9,AKT1,30,RICTOR,813,23.027704,../data/pdbs_align/45.pdb


In [None]:
import shutil
import os

# Create output directory if it doesn't exist
output_dir = Path("../data/pdbs_by_xl")
output_dir.mkdir(exist_ok=True)

# Copy best AKT models with XL names
for idx, row in xl_df.iterrows():
    if row['best_akt_model'] != -1 and row['min_distance'] != np.inf:
        # Create filename from crosslink info
        xl_name = f"{row['prot1']}_{row['res1']}-{row['prot2']}_{row['res2']}.pdb"

        # Source and destination paths
        src_path = Path(row['best_akt_model'])
        dst_path = output_dir / xl_name

        # Copy the file
        shutil.copy2(src_path, dst_path)
        print(f"Copied {src_path.name} -> {xl_name}")

print(f"\nSaved {len(xl_df[xl_df['best_akt_model'] != -1])} AKT models to {output_dir}")


Copied 23.pdb -> 0.pdb
Copied 45.pdb -> 1.pdb
Copied 58.pdb -> 2.pdb
Copied 12.pdb -> 3.pdb
Copied 62.pdb -> 4.pdb
Copied 10.pdb -> 5.pdb
Copied 23.pdb -> 6.pdb
Copied 58.pdb -> 7.pdb
Copied 37.pdb -> 8.pdb
Copied 45.pdb -> 9.pdb

Saved 10 AKT models to ../data/pdbs_by_xl
