In [14]:
import sys, os
from pathlib import Path
import itertools
import pandas as pd
import mdtraj as md
import networkx as nx
from collections import namedtuple
PiPiPair = namedtuple('PiPiPair', ['antibody', 'antigen'])

source_location = Path().resolve()
sys.path.append(source_location)
from scripts.utils import get_sabdab_details

from scripts.abag_interactions_hydrophobic import *
from scripts.abag_interactions_rings import *

casa_dir = Path("/home/pbarletta/labo/22/AbAgInterface")
str_dir = Path.joinpath(casa_dir, "structures/raw")

In [2]:
df_sabdab_all = pd.read_csv(Path.joinpath(source_location,
    'structures/sabdab_summary_all.tsv'), sep="\t")
df_sabdab_90 = pd.read_csv(Path.joinpath(source_location,
    'structures/sabdab_summary_90.tsv'), sep="\t")

df_buried = pd.read_pickle(Path.joinpath(source_location,
    'data/epitope_buried.pickle'))

df_interactions = pd.read_pickle(Path.joinpath(source_location,
    'data/interactions.pickle'))

protein_antigens = df_sabdab_90.query("antigen_type == antigen_type and antigen_type.str.contains('protein')", engine = 'python').drop_duplicates()
ab_protein_antigens = set(protein_antigens.pdb.values)
all_saddab_proteins = set(df_sabdab_90.pdb.values)
print(
    f"SabDab protein antigen:\n"
    f"{len(ab_protein_antigens)} proteins out of {len(all_saddab_proteins)}, "
    f"{round(len(ab_protein_antigens) / len(all_saddab_proteins) * 100, 1)}%"
)    

ab_both_chains = set(protein_antigens.query("Hchain == Hchain and Lchain == Lchain").pdb.values)
ab_single_H_chain = set(protein_antigens.query("Hchain == Hchain").pdb.values)
ab_single_L_chain = set(protein_antigens.query("Lchain == Lchain").pdb.values)

n_ab_no_Hchain = len(ab_protein_antigens) - len(ab_single_H_chain)
n_ab_no_Lchain = len(ab_protein_antigens) - len(ab_single_L_chain)

print(f"All: {len(ab_protein_antigens)}\nNo Hchain: {n_ab_no_Hchain}\nNo Lchain: {n_ab_no_Lchain}\nBoth chains: {len(ab_both_chains)}")

buried_fullab = df_buried[df_buried.idcode.isin(ab_both_chains)]
print(
    f"Buried surfaces of {len(set(df_buried.idcode.values))} proteins\n"
    f"with both chains: {len(set(buried_fullab.idcode.values))}"
)

SabDab protein antigen:
1154 proteins out of 2017, 57.2%
All: 1154
No Hchain: 0
No Lchain: 0
Both chains: 1154
Buried surfaces of 2492 proteins
with both chains: 867


----

In [28]:
# Some useful data and parameters
ring_atoms = {
    'TRP': ['CG', 'CD1', 'CD2', 'NE1', 'CE2', 'CE3', 'CZ2', 'CZ3', 'CH2'],
    'PHE': ['CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ'],
    'TYR': ['CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ'],
    'HIS': ['CG', 'ND1', 'CD2', 'CE1', 'NE2']}

ring_atoms_pi_ion = {
    'TRP': ['CG', 'CD1', 'CD2', 'NE1', 'CE2', 'CE3', 'CZ2', 'CZ3', 'CH2'],
    'PHE': ['CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ'],
    'TYR': ['CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ']}

cutoff_ring = .5
cutoff_angle_pipi = .9
cutoff_angle_pion = .8
cutoff_clusters = .35
cutoff_carbons = .5

In [90]:
# pdb_list = list(set(buried_fullab.idcode))
# pdb_idcode = pdb_list[0]
# buried_fullab[buried_fullab.idcode == pdb_idcode]

pdb_idcode = '4ydk'
pdb_filename = Path.joinpath(str_dir, pdb_idcode + ".pdb")
trj_in = md.load(pdb_filename)
ab_chains =\
    [ fila.chainID for i, fila in buried_fullab[buried_fullab.idcode == pdb_idcode].iterrows() ]
ag_chains = \
    [ chain.chain_id for chain in trj_in.topology.chains if chain.chain_id not in ab_chains ]

#### Hydrophobic clusters

In [30]:
cutoff_clusters = .35
cutoff_carbons = .5

G = get_carbons_graph(trj_in, buried_fullab, pdb_idcode, cutoff_carbons)
pre_clusteres = get_putative_clusters(G)
clusters = merge_clusters(trj_in, pre_clusteres, cutoff_clusters)

connected_clusters={0, 2, 3, 4}
connected_clusters={1, 5, 7}
connected_clusters={6}


In [None]:
draw_clusters(trj_in, buried_fullab, pdb_idcode, clusters, "hydro.py")

#### Pi-Pi interactions

In [132]:
CG_rings, CoM_rings_xyz, normal_vectors = get_ring_data(trj_in, ab_chains, ring_atoms)

pipi_ring_pairs = get_pipi_interactions(trj_in, CG_rings, CoM_rings_xyz, 
    normal_vectors, cutoff_ring, cutoff_angle_pipi)

In [109]:
if len(pipi_ring_pairs) == 0:
    atomos_anillo_ab = [ atom.index for pipi_pair in pipi_ring_pairs\
        for atom in pipi_pair.antibody.atoms ]
    atomos_anillo_ag = [ atom.index for pipi_pair in pipi_ring_pairs\
        for atom in pipi_pair.antigen.atoms ]

    draw_pipi_rings(trj_in, buried_fullab, pdb_idcode,
        [atomos_anillo_ab, atomos_anillo_ag], "pipi.py")

#### Pi-ion

In [135]:
def get_ion_ring_interactions(trj_in, CG_rings, ids_ON_atoms, CoM_rings_xyz,
    normal_vectors, positions, cutoff_distance, cutoff_angle = .7):

    ring_ON_pairs = np.array(
            list(itertools.product(CG_rings, ids_ON_atoms)))

    ring_ON_distancias = md.compute_distances(
        trj_in, ring_ON_pairs).reshape(
        (len(CG_rings),
         len(ids_ON_atoms)))

    indices_close_ON_CG = np.where(ring_ON_distancias < cutoff_distance)
    anion_ring_pairs = []
    cation_ring_pairs = []
    for i, j in zip(*indices_close_ON_CG):
        com_xyz = CoM_rings_xyz[i]
        normal = normal_vectors[i]
        ON_xyz = positions[ids_ON_atoms[j]]
        ON_vector = ON_xyz - com_xyz
        norm_ON_vector = ON_vector / np.linalg.norm(ON_vector)
    
        distance = np.linalg.norm(ON_xyz - com_xyz)
        angle = np.abs(np.dot(norm_ON_vector, normal))

        if distance < cutoff_distance and angle > cutoff_angle:
            ring = trj_in.topology.atom(CG_rings[i]).residue
            ion = trj_in.topology.atom(ids_ON_atoms[j])
            if ion.element.symbol == 'O':
                anion_ring_pairs.append((ring, ion))
            elif ion.element.symbol == 'N':
                cation_ring_pairs.append((ring, ion))
            else:
                raise ValueError
    return anion_ring_pairs, cation_ring_pairs

In [136]:
CG_rings, CoM_rings_xyz, normal_vectors = get_ring_data(trj_in, ab_chains, ring_atoms_pi_ion)

_, _, ids_ON_epitope_atoms, ids_ON_cdr_atoms = get_ids_CON(trj_in.topology,
    buried_fullab, pdb_idcode)

In [137]:
anion_ring_pairs_ab, cation_ring_pairs_ab = get_ion_ring_interactions(
    trj_in, CG_rings["antibody"], ids_ON_epitope_atoms,  CoM_rings_xyz["antibody"],
    normal_vectors["antibody"], trj_in.xyz[0], cutoff_ring, cutoff_angle_pion)

In [138]:
anion_ring_pairs_ag, cation_ring_pairs_ag = get_ion_ring_interactions(
    trj_in, CG_rings["antigen"], ids_ON_cdr_atoms, CoM_rings_xyz["antigen"],
    normal_vectors["antigen"], trj_in.xyz[0], cutoff_ring, cutoff_angle_pion)

In [139]:
anion_ring_pairs_ag, cation_ring_pairs_ag

([], [])

In [147]:
anion_ring_pairs_ab, cation_ring_pairs_ab = get_ion_ring_interactions(
    trj_in, CG_rings["antigen"], ids_ON_epitope_atoms,  CoM_rings_xyz["antigen"],
    normal_vectors["antigen"], trj_in.xyz[0], cutoff_ring, cutoff_angle_pion)

In [148]:
cation_ring_pairs_ab

[(TRP427, GLN428-N)]