In [1]:
import sys, os
from pathlib import Path
source_location = Path().resolve()
sys.path.append(source_location)
from scripts.utils import get_sabdab_details

import pandas as pd
import mdtraj as md
import networkx as nx
from scripts.abag_interactions import *
casa_dir = Path("/home/pbarletta/labo/22/AbAgInterface")
str_dir = Path.joinpath(casa_dir, "structures/raw")

In [2]:
df_sabdab_all = pd.read_csv(Path.joinpath(source_location,
    'structures/sabdab_summary_all.tsv'), sep="\t")
df_sabdab_90 = pd.read_csv(Path.joinpath(source_location,
    'structures/sabdab_summary_90.tsv'), sep="\t")

df_buried = pd.read_pickle(Path.joinpath(source_location,
    'data/epitope_buried.pickle'))

df_interactions = pd.read_pickle(Path.joinpath(source_location,
    'data/interactions.pickle'))

In [3]:
# protein_antigens = df_sabdab_90[df_sabdab_90['antigen_type'].str.contains('protein', na = False)]
protein_antigens = df_sabdab_90.query("antigen_type == antigen_type and antigen_type.str.contains('protein')", engine = 'python').drop_duplicates()
ab_protein_antigens = set(protein_antigens.pdb.values)
all_saddab_proteins = set(df_sabdab_90.pdb.values)
print(
    f"SabDab protein antigen:\n"
    f"{len(ab_protein_antigens)} proteins out of {len(all_saddab_proteins)}, "
    f"{round(len(ab_protein_antigens) / len(all_saddab_proteins) * 100, 1)}%"
)

SabDab protein antigen:
1154 proteins out of 2017, 57.2%


In [4]:
ab_both_chains = set(protein_antigens.query("Hchain == Hchain and Lchain == Lchain").pdb.values)
ab_single_H_chain = set(protein_antigens.query("Hchain == Hchain").pdb.values)
ab_single_L_chain = set(protein_antigens.query("Lchain == Lchain").pdb.values)

n_ab_no_Hchain = len(ab_protein_antigens) - len(ab_single_H_chain)
n_ab_no_Lchain = len(ab_protein_antigens) - len(ab_single_L_chain)

print(f"All: {len(ab_protein_antigens)}\nNo Hchain: {n_ab_no_Hchain}\nNo Lchain: {n_ab_no_Lchain}\nBoth chains: {len(ab_both_chains)}")

All: 1154
No Hchain: 0
No Lchain: 0
Both chains: 1154


In [5]:
buried_fullab = df_buried[df_buried.idcode.isin(ab_both_chains)]
print(
    f"Buried surfaces of {len(set(df_buried.idcode.values))} proteins\n"
    f"with both chains: {len(set(buried_fullab.idcode.values))}"
)

Buried surfaces of 2492 proteins
with both chains: 867


----

#### Pi-Pi interactions

In [6]:
pdb_idcode = '7mhy'
pdb_filename = Path.joinpath(str_dir, pdb_idcode + ".pdb")
trj_in = md.load(pdb_filename)
ab_chains =\
[ fila.chainID for i, fila in buried_fullab[buried_fullab.idcode == pdb_idcode].iterrows() ]



In [7]:
cutoff_ring = .5
cutoff_dot_plane = .9
CG_rings, CoM_rings_xyz, normal_vectors = get_ring_data(trj_in, ab_chains)

for i, (com_i, v_i) in enumerate(zip(CoM_rings_xyz["antigen"], normal_vectors["antigen"])):
# for i, (com_i, v_i) in enumerate(zip(CoM_rings_xyz["antibody"], normal_vectors["antibody"])):
    for j, (com_j, v_j)in enumerate(zip(CoM_rings_xyz["antibody"], normal_vectors["antibody"])):
        if i != j:
            if np.linalg.norm(com_i - com_j) < cutoff_ring and\
                np.dot(v_i, v_j) > cutoff_dot_plane:
                print(f"{i=}, {j=}  --  {com_i*10=} - {com_j*10=}")

In [11]:

ring_ON_pairs_ag = np.array(
        list(
            itertools.product(
                CG_rings["antigen"], ids_ON_cdr_atoms)))                

ring_ON_distancias_ag = md.compute_distances(
        trj_in, ring_ON_pairs_ag).reshape(
        (len(CG_rings["antigen"]),
         len(ids_ON_cdr_atoms)))

In [22]:
def get_ion_ring_interactions(CG_rings, ids_ON_atoms, CoM_rings_xyz,
    normal_vectors, positions, cutoff_ring):

    ring_ON_pairs = np.array(
            list(itertools.product(CG_rings, ids_ON_atoms)))

    ring_ON_distancias = md.compute_distances(
        trj_in, ring_ON_pairs).reshape(
        (len(CG_rings),
         len(ids_ON_atoms)))

    indices_close_ON_CG = np.where(ring_ON_distancias < cutoff_ring)
    for i, j in zip(*indices_close_ON_CG):
        com_xyz = CoM_rings_xyz[i]
        normal = normal_vectors[i]
        ON_xyz = positions[ids_ON_atoms[j]]
        ON_vector = ON_xyz - com_xyz
        norm_ON_vector = ON_vector / np.linalg.norm(ON_vector)
        
        if np.linalg.norm(ON_xyz - com_xyz) < cutoff_ring and\
                np.dot(norm_ON_vector, normal) > cutoff_dot_plane:
                print(f"{i=}, {j=}  --  {com_i*10=} - {com_j*10=}")

In [23]:
_, _, ids_ON_epitope_atoms, ids_ON_cdr_atoms = get_ids_CON(trj_in.topology,
    buried_fullab, pdb_idcode)

In [25]:
get_ion_ring_interactions(CG_rings["antibody"], ids_ON_epitope_atoms,
    CoM_rings_xyz["antibody"], normal_vectors["antibody"], trj_in.xyz[0], cutoff_ring)

get_ion_ring_interactions(CG_rings["antigen"], ids_ON_cdr_atoms,
    CoM_rings_xyz["antigen"], normal_vectors["antigen"], trj_in.xyz[0], cutoff_ring)

#### Hydrophobic clusters

In [9]:
G = get_carbons_graph(trj_in, buried_fullab, pdb_idcode, .5)
pre_clusteres = get_putative_clusters(G)
clusters = refine_clusters(trj_in, pre_clusteres, .2)

connected_clusters={0, 3}
connected_clusters={1, 4}


In [11]:
draw_clusters(trj_in, buried_fullab, pdb_idcode, clusters, "b.py")