In [15]:
import sys, os
from pathlib import Path
import itertools
import pandas as pd
import mdtraj as md
import networkx as nx
import logging
from collections import namedtuple
PiPiPair = namedtuple('PiPiPair', ['antibody', 'antigen'])
PionPair = namedtuple('PionPair', ['ring', 'ion'])

source_location = Path().resolve()
sys.path.append(source_location)
from scripts.utils import get_sabdab_details

from scripts.abag_interactions_hydrophobic import *
from scripts.abag_interactions_rings import *
from scripts.more_utils import *

casa_dir = Path("/home/pbarletta/labo/22/AbAgInterface")
str_dir = Path.joinpath(casa_dir, "structures/raw")

In [3]:
df_sabdab_all = pd.read_csv(Path.joinpath(source_location,
    'structures/sabdab_summary_all.tsv'), sep="\t")
df_sabdab_90 = pd.read_csv(Path.joinpath(source_location,
    'structures/sabdab_summary_90.tsv'), sep="\t")

df_buried = pd.read_pickle(Path.joinpath(source_location,
    'data/epitope_buried.pickle'))

df_interactions = pd.read_pickle(Path.joinpath(source_location,
    'data/interactions.pickle'))

protein_antigens = df_sabdab_90.query("antigen_type == antigen_type and antigen_type.str.contains('protein')", engine = 'python').drop_duplicates()
ab_protein_antigens = set(protein_antigens.pdb.values)
all_saddab_proteins = set(df_sabdab_90.pdb.values)
print(
    f"SabDab protein antigen:\n"
    f"{len(ab_protein_antigens)} proteins out of {len(all_saddab_proteins)}, "
    f"{round(len(ab_protein_antigens) / len(all_saddab_proteins) * 100, 1)}%"
)    

ab_both_chains = set(protein_antigens.query("Hchain == Hchain and Lchain == Lchain").pdb.values)
ab_single_H_chain = set(protein_antigens.query("Hchain == Hchain").pdb.values)
ab_single_L_chain = set(protein_antigens.query("Lchain == Lchain").pdb.values)

n_ab_no_Hchain = len(ab_protein_antigens) - len(ab_single_H_chain)
n_ab_no_Lchain = len(ab_protein_antigens) - len(ab_single_L_chain)

print(f"All: {len(ab_protein_antigens)}\nNo Hchain: {n_ab_no_Hchain}\nNo Lchain: {n_ab_no_Lchain}\nBoth chains: {len(ab_both_chains)}")

buried_fullab = df_buried[df_buried.idcode.isin(ab_both_chains)]
print(
    f"Buried surfaces of {len(set(df_buried.idcode.values))} proteins\n"
    f"with both chains: {len(set(buried_fullab.idcode.values))}"
)

SabDab protein antigen:
1154 proteins out of 2017, 57.2%
All: 1154
No Hchain: 0
No Lchain: 0
Both chains: 1154
Buried surfaces of 2492 proteins
with both chains: 867


----

In [4]:
# Some useful data and parameters
ring_atoms = {
    'TRP': ['CG', 'CD1', 'CD2', 'NE1', 'CE2', 'CE3', 'CZ2', 'CZ3', 'CH2'],
    'PHE': ['CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ'],
    'TYR': ['CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ'],
    'HIS': ['CG', 'ND1', 'CD2', 'CE1', 'NE2']}

ring_atoms_pi_ion = {
    'TRP': ['CG', 'CD1', 'CD2', 'NE1', 'CE2', 'CE3', 'CZ2', 'CZ3', 'CH2'],
    'PHE': ['CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ'],
    'TYR': ['CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ']}

cutoff_ring = .5
cutoff_angle_pipi = .9
cutoff_angle_pion = .8
cutoff_clusters = .35
cutoff_carbons = .5

In [82]:
pdb_list = list(set(buried_fullab.idcode))
pdb_idcode = '6iut'
buried_fullab[buried_fullab.idcode == pdb_idcode][0:2]

Unnamed: 0,idcode,chainID,chain_type,cdr,cdr_seq,cdr_begin,cdr_end,cdr_atoms,epitope_atoms,epitope_residues,ag_ab_interface,ag_cdrchain_interface,ag_cdr_interface,ab_ag_interface,ag_ab_interface_res,ag_cdrchain_interface_res,ag_cdr_interface_res,ab_ag_interface_res
6312,6iut,H,H,1,GGSINSY,25,31,"[172, 173, 174, 175, 176, 177, 178, 179, 180, ...","[4139, 4140, 4142, 4143, 4144, 4145, 4146, 414...","[161, 161, 161, 161, 161, 161, 162, 162, 162, ...","{1795, 1029, 1798, 1799, 1800, 1161, 1674, 167...","{1795, 1798, 1799, 1800, 1161, 1674, 1678, 180...","{1763, 1739, 1740, 1741, 1719}","{4096, 4097, 4099, 4100, 3719, 3720, 4103, 410...","[(A, 165, ASN, N, 1), (A, 116, ILE, HA, 1), (A...","[(A, 165, ASN, N, 1), (A, 165, ASN, O, 1), (A,...","[(A, 163, SER, N, 1), (A, 162, ARG, N, 1), (A,...","[(H, 51, PHE, HE2, 1), (H, 51, PHE, HZ, 1), (H..."
6313,6iut,H,H,2,FDSGS,51,55,"[388, 389, 390, 391, 392, 393, 394, 395, 396, ...","[3823, 3824, 3837, 3839, 3841, 3845, 3846, 384...","[121, 121, 122, 122, 123, 123, 123, 124, 124, ...","{1795, 1029, 1798, 1799, 1800, 1161, 1674, 167...","{1795, 1798, 1799, 1800, 1161, 1674, 1678, 180...","{1728, 1729, 1161, 1674, 1131, 1707, 1709, 167...","{4096, 4097, 4099, 4100, 3719, 3720, 4103, 410...","[(A, 165, ASN, N, 2), (A, 116, ILE, HA, 2), (A...","[(A, 165, ASN, N, 2), (A, 165, ASN, O, 2), (A,...","[(A, 161, LYS, HB2, 2), (A, 161, LYS, HB3, 2),...","[(H, 51, PHE, HE2, 2), (H, 51, PHE, HZ, 2), (H..."


In [83]:
pdb_filename = Path.joinpath(str_dir, pdb_idcode + ".pdb")
trj_in = md.load(pdb_filename)
ab_chains =\
    [ fila.chainID for i, fila in buried_fullab[buried_fullab.idcode == pdb_idcode].iterrows() ]
ag_chains = \
    [ chain.chain_id for chain in trj_in.topology.chains if chain.chain_id not in ab_chains ]

print(ab_chains)
print(ag_chains)    

['H', 'H', 'H', 'L', 'L', 'L']
['A', 'A', 'A']


In [71]:
draw_interface(trj_in, buried_fullab, pdb_idcode, ag_chains, "interface.py")

#### Hydrophobic clusters

In [72]:
G = get_carbons_graph(trj_in, buried_fullab, pdb_idcode, cutoff_carbons)
pre_clusteres = get_putative_clusters(G)
clusters = merge_clusters(trj_in, pre_clusteres, cutoff_clusters)

In [7]:
draw_clusters(trj_in, buried_fullab, pdb_idcode, ag_chains, clusters, "hydro.py")

#### Pi-Pi interactions

In [33]:
CG_rings, CoM_rings_xyz, normal_vectors = get_ring_data(trj_in, ab_chains, ring_atoms)

pipi_ring_pairs = get_pipi_interactions(trj_in, CG_rings, CoM_rings_xyz, 
    normal_vectors, cutoff_ring, cutoff_angle_pipi)

In [34]:
if len(pipi_ring_pairs) != 0:
    print(f"------------- {pdb_idcode} -------------")
    
    atomos_anillo_ab = [ atom.index for pipi_pair in pipi_ring_pairs\
        for atom in pipi_pair.antibody.atoms ]
    atomos_anillo_ag = [ atom.index for pipi_pair in pipi_ring_pairs\
        for atom in pipi_pair.antigen.atoms ]

    draw_pi_rings(trj_in, buried_fullab, pdb_idcode,
        [atomos_anillo_ab, atomos_anillo_ag], "pipi.py")

#### Pi-ion

In [10]:
CG_rings, CoM_rings_xyz, normal_vectors = get_ring_data(trj_in, ab_chains, ring_atoms_pi_ion)

_, _, ids_ON_epitope_atoms, ids_ON_cdr_atoms = get_ids_CON(trj_in.topology,
    buried_fullab, pdb_idcode)

In [11]:
anion_ring_pairs_ab, cation_ring_pairs_ab = get_ion_ring_interactions(
    trj_in, CG_rings["antibody"], ids_ON_epitope_atoms,  CoM_rings_xyz["antibody"],
    normal_vectors["antibody"], trj_in.xyz[0], cutoff_ring, cutoff_angle_pion)

In [12]:
anion_ring_pairs_ag, cation_ring_pairs_ag = get_ion_ring_interactions(
    trj_in, CG_rings["antigen"], ids_ON_cdr_atoms, CoM_rings_xyz["antigen"],
    normal_vectors["antigen"], trj_in.xyz[0], cutoff_ring, cutoff_angle_pion)

In [13]:
if len(anion_ring_pairs_ab) != 0:
    atomos_anillo_ab = [ atom.index for piion_pair in anion_ring_pairs_ab\
        for atom in piion_pair.ring.atoms ]
    ion_ag = [ piion_pair.ion.index for piion_pair in anion_ring_pairs_ab ]

    draw_pi_rings(trj_in, buried_fullab, pdb_idcode,
        [atomos_anillo_ab, ion_ag], "pion.py")

if len(cation_ring_pairs_ab) != 0:
    atomos_anillo_ab = [ atom.index for piion_pair in cation_ring_pairs_ab\
        for atom in piion_pair.ring.atoms ]
    ion_ag = [ piion_pair.ion.index for piion_pair in cation_ring_pairs_ab ]

    draw_pi_rings(trj_in, buried_fullab, pdb_idcode,
        [atomos_anillo_ab, ion_ag], "pion.py")

In [14]:
if len(anion_ring_pairs_ag) != 0:
    atomos_anillo_ag = [ atom.index for piion_pair in anion_ring_pairs_ag\
        for atom in piion_pair.ring.atoms ]
    ion_ab = [ piion_pair.ion.index for piion_pair in anion_ring_pairs_ag ]

    draw_pi_rings(trj_in, buried_fullab, pdb_idcode,
        [atomos_anillo_ag, ion_ab], "pion.py")

if len(cation_ring_pairs_ag) != 0:
    atomos_anillo_ag = [ atom.index for piion_pair in cation_ring_pairs_ag\
        for atom in piion_pair.ring.atoms ]
    ion_ab = [ piion_pair.ion.index for piion_pair in cation_ring_pairs_ag ]

    draw_pi_rings(trj_in, buried_fullab, pdb_idcode,
        [atomos_anillo_ag, ion_ab], "pion.py")

----

In [6]:
pdb_idcode = '6w7s'
pdb_filename = Path.joinpath(str_dir, pdb_idcode + ".pdb")
trj_in = md.load(pdb_filename)
ab_chains = [
    fila.chainID for i,
    fila in buried_fullab[buried_fullab.idcode == pdb_idcode].iterrows()]
ag_chains = [
    chain.chain_id for chain in trj_in.topology.chains
    if chain.chain_id not in ab_chains]

In [11]:
[ fila.epitope_atoms for i, fila in buried_fullab[buried_fullab.idcode == pdb_idcode].iterrows()]

[[3373, 3374], [], [], [], [], []]

In [14]:
buried_fullab[buried_fullab.idcode == pdb_idcode][1:]

Unnamed: 0,idcode,chainID,chain_type,cdr,cdr_seq,cdr_begin,cdr_end,cdr_atoms,epitope_atoms,epitope_residues,ag_ab_interface,ag_cdrchain_interface,ag_cdr_interface,ab_ag_interface,ag_ab_interface_res,ag_cdrchain_interface_res,ag_cdr_interface_res,ab_ag_interface_res
13102,6w7s,H,H,2,SYDGSN,54,59,"[3797, 3798, 3799, 3800, 3801, 3802, 3803, 380...",[],[],"{1952, 1953, 227, 1956, 6732, 1966, 1940, 249,...","{1952, 1953, 227, 1956, 6732, 1966, 1940, 249,...",{},"{6822, 6823, 6825, 6826, 9231, 6831, 9236, 923...","[(A, 1581, PHE, N, 2), (A, 1581, PHE, CA, 2), ...","[(A, 1581, PHE, N, 2), (A, 1581, PHE, CA, 2), ...",[],"[(H, 3, GLU, O, 2), (H, 3, GLU, CB, 2), (H, 3,..."
13103,6w7s,H,H,3,APIQVMVRGVMAPDY,101,115,"[4178, 4179, 4180, 4181, 4182, 4183, 4184, 418...",[],[],"{1952, 1953, 227, 1956, 6732, 1966, 1940, 249,...","{1952, 1953, 227, 1956, 6732, 1966, 1940, 249,...",{},"{6822, 6823, 6825, 6826, 9231, 6831, 9236, 923...","[(A, 1581, PHE, N, 3), (A, 1581, PHE, CA, 3), ...","[(A, 1581, PHE, N, 3), (A, 1581, PHE, CA, 3), ...",[],"[(H, 3, GLU, O, 3), (H, 3, GLU, CB, 3), (H, 3,..."
13104,6w7s,L,L,1,TLRSGINVGTYRIY,38,51,"[5187, 5188, 5189, 5190, 5191, 5192, 5193, 519...",[],[],"{1952, 1953, 227, 1956, 6732, 1966, 1940, 249,...",{},{},"{6822, 6823, 6825, 6826, 9231, 6831, 9236, 923...","[(A, 1581, PHE, N, 1), (A, 1581, PHE, CA, 1), ...",[],[],"[(H, 3, GLU, O, 1), (H, 3, GLU, CB, 1), (H, 3,..."
13105,6w7s,L,L,2,YKSDSDKQQGS,67,77,"[5432, 5433, 5434, 5435, 5436, 5437, 5438, 543...",[],[],"{1952, 1953, 227, 1956, 6732, 1966, 1940, 249,...",{},{},"{6822, 6823, 6825, 6826, 9231, 6831, 9236, 923...","[(A, 1581, PHE, N, 2), (A, 1581, PHE, CA, 2), ...",[],[],"[(H, 3, GLU, O, 2), (H, 3, GLU, CB, 2), (H, 3,..."
13106,6w7s,L,L,3,MTWHSSAYV,112,120,"[5764, 5765, 5766, 5767, 5768, 5769, 5770, 577...",[],[],"{1952, 1953, 227, 1956, 6732, 1966, 1940, 249,...",{},{},"{6822, 6823, 6825, 6826, 9231, 6831, 9236, 923...","[(A, 1581, PHE, N, 3), (A, 1581, PHE, CA, 3), ...",[],[],"[(H, 3, GLU, O, 3), (H, 3, GLU, CB, 3), (H, 3,..."


### Run all

In [16]:
pdb_list = list(set(buried_fullab.idcode))
bad_pdbs = []
bad_pdbs_hydro = []

for pdb_idcode in pdb_list:
    print(f"{pdb_idcode}")

    if lacks_epitope_atoms(buried_fullab, pdb_idcode):
        print(f" ----- BAD: {pdb_idcode} ----- ")
        bad_pdbs.append(pdb_idcode)
        continue

    pdb_filename = Path.joinpath(str_dir, pdb_idcode + ".pdb")
    trj_in = md.load(pdb_filename)
    ab_chains = [
        fila.chainID for i,
        fila in buried_fullab[buried_fullab.idcode == pdb_idcode].iterrows()]
    ag_chains = [
        chain.chain_id for chain in trj_in.topology.chains
        if chain.chain_id not in ab_chains]

    ############
    # Pi-Pi
    ############
    CG_rings, CoM_rings_xyz, normal_vectors = get_ring_data(
        trj_in, ab_chains, ring_atoms)

    pipi_ring_pairs = get_pipi_interactions(
        trj_in, CG_rings, CoM_rings_xyz, normal_vectors, cutoff_ring,
        cutoff_angle_pipi)

    ############
    # Pi-ion
    ############
    CG_rings, CoM_rings_xyz, normal_vectors = get_ring_data(
        trj_in, ab_chains, ring_atoms_pi_ion)

    _, _, ids_ON_epitope_atoms, ids_ON_cdr_atoms = get_ids_CON(
        trj_in.topology, buried_fullab, pdb_idcode)

    anion_ring_pairs_ab, cation_ring_pairs_ab = get_ion_ring_interactions(
        trj_in, CG_rings["antibody"], ids_ON_epitope_atoms, CoM_rings_xyz["antibody"],
        normal_vectors["antibody"], trj_in.xyz[0], cutoff_ring, cutoff_angle_pion)

    anion_ring_pairs_ag, cation_ring_pairs_ag = get_ion_ring_interactions(
        trj_in, CG_rings["antigen"], ids_ON_cdr_atoms, CoM_rings_xyz["antigen"],
        normal_vectors["antigen"], trj_in.xyz[0], cutoff_ring, cutoff_angle_pion)

    ############
    # Hydrophobic clusters
    ############
    try:
        G = get_carbons_graph(trj_in, buried_fullab, pdb_idcode, cutoff_carbons)
        pre_clusteres = get_putative_clusters(G)
        clusters = merge_clusters(trj_in, pre_clusteres, cutoff_clusters)
    except Exception as e:
        bad_pdbs_hydro.append(pdb_idcode)
        logging.warning(
            f"- {pdb_idcode} raised {e.__class__} during hydrophobic "
            f"interactions calculation.")

6xdg




6xdg has alternate positions for atom index: 1282 (not atom serial number).
6xdg has alternate positions for atom index: 1284 (not atom serial number).
6xdg has alternate positions for atom index: 1286 (not atom serial number).
6xdg has alternate positions for atom index: 1288 (not atom serial number).
6xdg has alternate positions for atom index: 1290 (not atom serial number).
6xdg has alternate positions for atom index: 1282 (not atom serial number).
6xdg has alternate positions for atom index: 1284 (not atom serial number).
6xdg has alternate positions for atom index: 1286 (not atom serial number).
6xdg has alternate positions for atom index: 1288 (not atom serial number).
6xdg has alternate positions for atom index: 1290 (not atom serial number).
6uda
7e7y
7l2d
6oor
4hg4
3s88
 ----- BAD: 3s88 ----- 
6pzf
6wn1


KeyboardInterrupt: 