In [1]:
import sys
import glob
import imp
from pathlib import Path
import itertools
import pandas as pd
import mdtraj as md
import logging
import plotly.graph_objects as go
import plotly.express as px
import plotly.express as px
import plotly.io as pio
px.defaults.template = 'ggplot2'
px.defaults.template = 'simple_white'
pio.templates.default = 'simple_white'
cdr_colors = ["#74D2F5", "#5495D6", "#3763F5", "#CF81EB", "#E564F5", "#A04AD6",
    'Aqua', 'AquaMarine']
from collections import Counter
import pickle
import subprocess
from joblib import Parallel, delayed
import importlib
from collections import namedtuple
TYRs = namedtuple('TYRs', ['heavy', 'light', 'antigen'])
ResiCount = namedtuple('ResiCount', ['antibody', 'antigen'])
PolarCount = namedtuple('PolarCount', ['cdr_SC', 'cdr_BB', 'epi_SC', 'epi_BB'])
Chains = namedtuple('Chains', ['antibody', 'antigen'])
InterfaceAtoms = namedtuple('InterfaceAtoms', ['antibody', 'antigen'])
PiPiPair = namedtuple('PiPiPair', ['antibody', 'antigen'])
PionPair = namedtuple('PionPair', ['ring', 'ion'])
HBondAtom = namedtuple('HBondAtom', ['chainID', 'chain_type',
                       'CDR', 'resSeq', 'resname', 'index', 'serial', 'element', 'is_sidechain'])
HBond = namedtuple('HBond', ['donor', 'acceptor'])
Atom = namedtuple('Atom', ['index', 'serial', 'element', 'is_sidechain', 'resSeq',
                  'resSeq_str', 'resname', 'chain_ID', 'chain_type', 'CDR'])
Ring = namedtuple('Ring', ['indices', 'serials', 'resSeq', 'resSeq_str',
                           'resname', 'chain_ID', 'chain_type', 'CDR'])
ShieldingAtom = namedtuple(
    'ShieldingAtom', ['chainID', 'chain_type', 'CDR', 'resSeq', 'resname', 'index',
                      'serial', 'element', 'is_sidechain'])
res_SSE = namedtuple('res_SSE', ['index', 'resSeq', 'name', 'DSSP'])

source_location = Path().resolve()
sys.path.append(source_location)

from abag_interactions_hydrophobic import *
from abag_interactions_rings import *
from interactions_polar import *

casa_dir = Path("/home/pbarletta/labo/22/AbAgInterface")
data_dir = Path.joinpath(casa_dir, "data")
str_dir = Path.joinpath(casa_dir, "structures/raw")
exposed_dir = Path.joinpath(casa_dir, "structures/exposed")

In [2]:
print("Reading epitope_buried_cleaned.pickle")
with open(Path.joinpath(
        casa_dir, "data", 'epitope_buried_cleaned.pickle'), 'rb') as file:
    epitope_buried_cleaned = pickle.load(file)

print("Reading buried_interface_res.pickle")
with open(Path.joinpath(casa_dir, "data", 'buried_interface_res.pickle'), 'rb') as file:
    buried_interface_res = pickle.load(file)

with open(Path.joinpath(data_dir, 'pdb.list'), 'r') as file:
    pdb_list = [ linea.strip() for linea in file ]

with open(Path.joinpath(data_dir, 'interface_atoms.pkl'), 'rb') as file:
        interface_atoms = pickle.load(file)

Reading epitope_buried_cleaned.pickle
Reading buried_interface_res.pickle


----

## Hydrophobic

In [3]:
with open(Path.joinpath(casa_dir, 'data', 'chains.pkl'), 'rb') as file:
    chains = pickle.load(file)
with(open(Path.joinpath(data_dir, 'pdb.list'), 'r')) as file:
    pdb_list = [ linea.strip() for linea in file ]
with open(Path.joinpath(casa_dir, 'data', 'filenames.pkl'), 'rb') as file:
    filenames = pickle.load(file)
with open(Path.joinpath(data_dir, 'interface_atoms.pkl'), 'rb') as file:
    interface_atoms = pickle.load(file)
with open(Path.joinpath(data_dir, 'hydrophobic.pkl'), 'rb') as file:
    hydrophobic = pickle.load(file)

In [12]:
import abag_interactions_hydrophobic
imp.reload(abag_interactions_hydrophobic)
from abag_interactions_hydrophobic import *

In [93]:
check_pdb = '3cvh'
idx = pdb_list.index(check_pdb)
for pdb_idcode in [pdb_list[idx]]:
# for pdb_idcode in pdb_list:
    logging.info(pdb_idcode)

    pdb_filename = Path(filenames[pdb_idcode])
    trj_in = md.load(Path.joinpath(exposed_dir, pdb_idcode, pdb_filename))
    ab_chains = chains[pdb_idcode].antibody 
    ag_chains = chains[pdb_idcode].antigen
    #
    # Hydrophobic clusters
    #
    try:
        G = get_carbons_graph(trj_in, interface_atoms[pdb_idcode], cutoff_carbons)
        pre_clusteres = get_putative_clusters(G)
        clusters = merge_clusters(trj_in, pre_clusteres, cutoff_clusters)
    except Exception as e:
        logging.warning(
            f"- {pdb_idcode} raised: {e.__class__}, saying: {e}, during hydrophobic "
            f"interactions calculation. Probably has no hydrophobic interactions.")
        raise e

In [94]:
draw_clusters(Path.joinpath(exposed_dir, pdb_idcode, pdb_filename),
    interface_atoms[pdb_idcode], ag_chains, clusters,
    str( Path.joinpath(casa_dir, 'aux', "clusters.py")))

In [88]:
PiCation[pdb_idcode]

(PionPair(ring=Ring(indices=(2970, 2971, 2972, 2973, 2974, 2975, 2976, 2977, 2978, 2979, 2980, 2981), serials=(2972, 2973, 2974, 2975, 2976, 2977, 2978, 2979, 2980, 2981, 2982, 2983), resSeq=52, resSeq_str='', resname='TYR', chain_ID='H', chain_type='H', CDR=2), ion=Atom(index=1094, serial=1095, element='N', is_sidechain=True, resSeq=148, resSeq_str='148', resname='ARG', chain_ID='A', chain_type='.', CDR=-1)),)

In [285]:
cadenas = []
for i, row in enumerate(epitope_buried_cleaned.query(f"idcode == '{check_pdb}'").iterrows()):
    beg = int(row[1].cdr_begin)
    end = int(row[1].cdr_end) + 1
    chainID = row[1].chainID
    chain_type = row[1].chain_type
    resis = range(beg, end)
    cadena = chain_type + str(i%3+1)
    cadenas.append(cadena)

    print(f"cdr{cadena} = '", end='')
    for resi in resis[:-1]:
        print(f"resi {resi} and chain {chainID}", end = ' or ')
    print(f"resi {resi} and chain {chainID}'")

cdrH1 = 'resi 26 and chain H or resi 27 and chain H or resi 28 and chain H or resi 29 and chain H or resi 30 and chain H or resi 31 and chain H or resi 31 and chain H'
cdrH2 = 'resi 52 and chain H or resi 53 and chain H or resi 54 and chain H or resi 55 and chain H or resi 56 and chain H or resi 56 and chain H'
cdrH3 = 'resi 99 and chain H or resi 100 and chain H or resi 101 and chain H or resi 102 and chain H or resi 103 and chain H or resi 104 and chain H or resi 105 and chain H or resi 106 and chain H or resi 107 and chain H or resi 108 and chain H or resi 109 and chain H or resi 110 and chain H or resi 111 and chain H or resi 112 and chain H or resi 113 and chain H or resi 114 and chain H or resi 114 and chain H'
cdrL1 = 'resi 23 and chain L or resi 24 and chain L or resi 25 and chain L or resi 26 and chain L or resi 27 and chain L or resi 28 and chain L or resi 29 and chain L or resi 30 and chain L or resi 31 and chain L or resi 32 and chain L or resi 32 and chain L'
cdrL2 = 'resi

In [286]:
for i, (color, cadena) in enumerate(zip(cdr_colors, cadenas)):
    print(f"cmd.color('0x{color.lstrip('#')}', cdr{cadena})")

cmd.color('0x74D2F5', cdrH1)
cmd.color('0x5495D6', cdrH2)
cmd.color('0x3763F5', cdrH3)
cmd.color('0xCF81EB', cdrL1)
cmd.color('0xE564F5', cdrL2)
cmd.color('0xA04AD6', cdrL3)


In [221]:
polars = list(shielding_dict['1lk3'].keys())
print(f"polares = '", end='')
for pp in polars[:-1]:
    p =  trj_in.topology.atom(pp).serial
    print(f"id {p}", end = ' or ')
print(f"id {p}'")
print(f"cmd.show('spheres', polares)")

polares = 'id 5313 or id 5304 or id 7922 or id 5305 or id 5291 or id 6198 or id 5314 or id 5308 or id 4848 or id 5303 or id 5296 or id 5312 or id 4839 or id 4849 or id 5277 or id 4901 or id 5274 or id 4879 or id 4872 or id 7899 or id 4911 or id 4875 or id 5346 or id 4910 or id 5738 or id 5738'
cmd.show('spheres', polares)


In [220]:
'0x74D2F5'
'0x5495D6'
'0x3763F5'
'0xCF81EB'
'0xE564F5'
'0xA04AD6'

5738

In [217]:
with open(Path.joinpath(casa_dir, 'data', 'shielding.pkl'), 'rb') as file:
    shielding_dict = pickle.load(file)

In [201]:
cadenas

['H1', 'H2', 'H3', 'K1', 'K2', 'K3']

In [127]:
[ (atm.index, atm)  for atm in trj_in.topology.atoms if atm.serial == 261]

[(255, HIS79-CB)]

In [173]:
epitope_buried_cleaned.query(f"idcode == '1fbi'")

Unnamed: 0,idcode,chainID,chain_type,cdr,cdr_seq,cdr_begin,cdr_end,cdr_atoms,epitope_atoms,epitope_residues,ag_ab_interface,ag_cdrchain_interface,ag_cdr_interface,ab_ag_interface,ag_interface,ag_ab_interface_res,ag_cdrchain_interface_res,ag_cdr_interface_res,ab_ag_interface_res,ag_interface_res
6090,1fbi,H,H,1,GYTFTSY,26,32,"[1807, 1808, 1809, 1810, 1811, 1812, 1813, 181...","[3409, 3415, 3416, 3419, 3421, 3422, 3423, 342...","[ 19 , 20 , 20 , 20 , 20 , 20 , 2...","{6785, 6786, 7938, 6788, 6789, 7687, 7943, 769...","{6785, 6786, 7938, 6788, 6789, 7943, 6795, 769...","{6880, 6882, 6889, 6860, 6892, 6862, 6863, 6874}","{1544, 791, 4760, 792, 793, 3736, 3737, 794, 1...","{8204, 8205, 8206, 8210, 8212, 8213, 8214, 821...","[(X, 15 , HIS, CA, 1), (X, 15 , HIS, C, 1)...","[(X, 15 , HIS, CA, 1), (X, 15 , HIS, C, 1)...","[(X, 21 , ARG, CD, 1), (X, 21 , ARG, CZ, 1...","[(H, 102 , TYR, O, 1), (H, 52 , ASP, CB, 1)...","[(X, 111 , TRP, HZ2, 1), (X, 111 , TRP, HZ3,..."
6091,1fbi,H,H,2,DPSDSY,52,57,"[2025, 2026, 2027, 2028, 2029, 2030, 2031, 203...","[3367, 3368, 3376, 3377, 3378, 3379, 3380, 338...","[ 14 , 14 , 15 , 15 , 15 , 15 , 1...","{6785, 6786, 7938, 6788, 6789, 7687, 7943, 769...","{6785, 6786, 7938, 6788, 6789, 7943, 6795, 769...","{7968, 6785, 6786, 7938, 6788, 6789, 7970, 794...","{1544, 791, 4760, 792, 793, 3736, 3737, 794, 1...","{8204, 8205, 8206, 8210, 8212, 8213, 8214, 821...","[(X, 15 , HIS, CA, 2), (X, 15 , HIS, C, 2)...","[(X, 15 , HIS, CA, 2), (X, 15 , HIS, C, 2)...","[(X, 96 , LYS, CD, 2), (X, 15 , HIS, CA, 2...","[(H, 102 , TYR, O, 2), (H, 52 , ASP, CB, 2)...","[(X, 111 , TRP, HZ2, 2), (X, 111 , TRP, HZ3,..."
6092,1fbi,H,H,3,LYYYGTSYGVLDY,99,111,"[2376, 2377, 2378, 2379, 2380, 2381, 2382, 238...","[3434, 3436, 3751, 3753, 3769, 3771, 3772, 384...","[ 21 , 21 , 62 , 62 , 63 , 63 , 6...","{6785, 6786, 7938, 6788, 6789, 7687, 7943, 769...","{6785, 6786, 7938, 6788, 6789, 7943, 6795, 769...","{7522, 7691, 7532, 7531, 8049, 7987, 7700, 770...","{1544, 791, 4760, 792, 793, 3736, 3737, 794, 1...","{8204, 8205, 8206, 8210, 8212, 8213, 8214, 821...","[(X, 15 , HIS, CA, 3), (X, 15 , HIS, C, 3)...","[(X, 15 , HIS, CA, 3), (X, 15 , HIS, C, 3)...","[(X, 63 , TRP, CH2, 3), (X, 75 , LEU, CD2,...","[(H, 102 , TYR, O, 3), (H, 52 , ASP, CB, 3)...","[(X, 111 , TRP, HZ2, 3), (X, 111 , TRP, HZ3,..."
6093,1fbi,L,K,1,RASQDISNYLN,24,34,"[157, 158, 159, 160, 161, 162, 163, 164, 165, ...","[3753, 3754, 3756, 3758, 3841, 3842, 3843, 3844]","[ 62 , 62 , 62 , 62 , 73 , 73 , 7...","{6785, 6786, 7938, 6788, 6789, 7687, 7943, 769...","{7649, 7651, 7652, 7653, 7654, 7497, 7658, 769...",{7654},"{1544, 791, 4760, 792, 793, 3736, 3737, 794, 1...","{8204, 8205, 8206, 8210, 8212, 8213, 8214, 821...","[(X, 15 , HIS, CA, 1), (X, 15 , HIS, C, 1)...","[(X, 73 , ARG, O, 1), (X, 73 , ARG, CG, 1)...","[(X, 73 , ARG, CZ, 1)]","[(H, 102 , TYR, O, 1), (H, 52 , ASP, CB, 1)...","[(X, 111 , TRP, HZ2, 1), (X, 111 , TRP, HZ3,..."
6094,1fbi,L,K,2,YTSRLHS,50,56,"[377, 378, 379, 380, 381, 382, 383, 384, 385, ...",[],[],"{6785, 6786, 7938, 6788, 6789, 7687, 7943, 769...","{7649, 7651, 7652, 7653, 7654, 7497, 7658, 769...",{},"{1544, 791, 4760, 792, 793, 3736, 3737, 794, 1...","{8204, 8205, 8206, 8210, 8212, 8213, 8214, 821...","[(X, 15 , HIS, CA, 2), (X, 15 , HIS, C, 2)...","[(X, 73 , ARG, O, 2), (X, 73 , ARG, CG, 2)...",[],"[(H, 102 , TYR, O, 2), (H, 52 , ASP, CB, 2)...","[(X, 111 , TRP, HZ2, 2), (X, 111 , TRP, HZ3,..."
6095,1fbi,L,K,3,QQGYTLPYT,89,97,"[670, 671, 672, 673, 674, 675, 676, 677, 678, ...","[3825, 3826, 3827, 3828, 3830, 3831, 3835, 383...","[ 71 , 71 , 71 , 72 , 72 , 72 , 7...","{6785, 6786, 7938, 6788, 6789, 7687, 7943, 769...","{7649, 7651, 7652, 7653, 7654, 7497, 7658, 769...","{7649, 7651, 7652, 7653, 7497, 7658, 7690, 766...","{1544, 791, 4760, 792, 793, 3736, 3737, 794, 1...","{8204, 8205, 8206, 8210, 8212, 8213, 8214, 821...","[(X, 15 , HIS, CA, 3), (X, 15 , HIS, C, 3)...","[(X, 73 , ARG, O, 3), (X, 73 , ARG, CG, 3)...","[(X, 73 , ARG, O, 3), (X, 73 , ARG, CG, 3)...","[(H, 102 , TYR, O, 3), (H, 52 , ASP, CB, 3)...","[(X, 111 , TRP, HZ2, 3), (X, 111 , TRP, HZ3,..."


### Interface CDRs vs fr

In [20]:
with open(Path.joinpath(data_dir, '2interface_atoms.pkl'), 'rb') as file:
        interface_atoms2 = pickle.load(file)

In [21]:
# Check how many atoms belong to framework.
cola = []
for pdb_idcode in interface_atoms2.keys():
    cderes = [atm.CDR for atm in interface_atoms2[pdb_idcode].antibody.values()]
    cola.append(cderes.count(0) / len(cderes))
px.histogram(cola)

In [22]:
with open(Path.joinpath(data_dir, 'interface_atoms.pkl'), 'rb') as file:
        interface_atoms = pickle.load(file)

In [24]:
# Check how many atoms belong to framework heavy chain.
cola = []
for pdb_idcode in interface_atoms.keys():
    cderes = [atm.CDR for atm in interface_atoms[pdb_idcode].antibody.values()]
    cola.append(cderes.count(0) / len(cderes))
# print(suma / len(interface_atoms.keys()))
px.histogram(cola)

In [94]:
# Check how many atoms belong to framework heavy chain.
cola = []
for pdb_idcode in interface_atoms.keys():
    cderes = [atm.CDR for atm in interface_atoms[pdb_idcode].antibody.values()]
    if (cderes.count(0) / len(cderes)) > .9:
        print(filenames[pdb_idcode])

5u5m_complex_BA_CE.pdb
6w7s_complex_HL_A.pdb
7kpj_complex_AB_E.pdb
7lu9_complex_lk_f.pdb
7rxc_complex_HL_B.pdb


## Pi-Pi

In [179]:
import abag_interactions_rings
imp.reload(abag_interactions_rings)
from abag_interactions_rings import *

In [175]:
PiPi = {}
check_pdb = '5t1d'
idx = pdb_list.index(check_pdb)
for pdb_idcode in [pdb_list[idx]]:
    logging.info(pdb_idcode)

    pdb_filename = Path(filenames[pdb_idcode])
    trj_in = md.load(Path.joinpath(exposed_dir, pdb_idcode, pdb_filename))
    ab_chains = chains[pdb_idcode].antibody
    ag_chains = chains[pdb_idcode].antigen
    PiPi[pdb_idcode] = tuple()

    try:
        CG_rings, CoM_rings_xyz, normal_vectors = get_ring_data(
            trj_in, ab_chains, ring_atoms)
        pipi_ring_pairs = get_pipi_interactions(
            trj_in, CG_rings, CoM_rings_xyz, normal_vectors, cutoff_ring,
            cutoff_angle_pipi)

        rings = get_data_from_ring_ring(pdb_idcode, epitope_buried_cleaned, pipi_ring_pairs)
    except Exception as e:
        logging.error(
            f"- {pdb_idcode} raised: {e.__class__}, saying: {e}, during PiPi "
            f"interactions calculation.")
        raise e
    else:
        PiPi[pdb_idcode] = rings

In [176]:
PiPi

{'5t1d': ()}

In [177]:

CG_rings

{'antibody': [1899,
  1934,
  1980,
  2047,
  2229,
  2313,
  2409,
  2430,
  2502,
  2727,
  2944,
  2965,
  3027,
  3087,
  3107,
  3370,
  3689,
  3710,
  3833,
  3964,
  3995,
  4133,
  4316,
  4474,
  5125,
  5202,
  5242,
  5259,
  5283,
  5506,
  5614,
  5718,
  5818,
  6055,
  6076,
  6164,
  6249,
  6541,
  6764,
  6831,
  6851,
  6991,
  7223,
  7372,
  7572,
  7632,
  7674,
  7758,
  7929],
 'antigen': [39, 240, 380, 745, 765, 982, 1190, 1229, 1322, 1430, 1461, 1485]}

In [171]:
with open(Path.joinpath(data_dir, 'PiPi.pkl'), 'rb') as file:
        PiPi = pickle.load(file)

In [10]:
PiPi['4oqt'][0].antibody

Ring(indices=(5842, 5843, 5844, 5845, 5846, 5847, 5848, 5849, 5850, 5851, 5852), serials=(5845, 5846, 5847, 5848, 5849, 5850, 5851, 5852, 5853, 5854, 5855), resSeq=57, resSeq_str='', resname='PHE', chain_ID='H', chain_type='H', CDR=2)

In [11]:
PiPi['4oqt'][0].antigen

Ring(indices=(937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948), serials=(938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949), resSeq=122, resSeq_str='', resname='TYR', chain_ID='A', chain_type='.', CDR=-1)

In [69]:
a = tuple(interface_atoms[pdb_idcode].antigen.values())

In [97]:
type((*(1, 2), *(3, 4)))

tuple

### Pi-Ion

In [65]:
with open(Path.joinpath(casa_dir, 'data', 'filenames.pkl'), 'rb') as file:
    filenames = pickle.load(file)
with open(Path.joinpath(casa_dir, 'data', 'chains.pkl'), 'rb') as file:
    chains = pickle.load(file)
with(open(Path.joinpath(data_dir, 'pdb.list'), 'r')) as file:
    pdb_list = [linea.strip() for linea in file]
with open(Path.joinpath(data_dir, 'interface_atoms.pkl'), 'rb') as file:
    interface_atoms = pickle.load(file)
print("Starting now.")

Starting now.


In [178]:
import abag_interactions_rings
imp.reload(abag_interactions_rings)
from abag_interactions_rings import *

In [8]:
PiAnion = {}
PiCation = {}
check_pdb = '5i5k'
idx = pdb_list.index(check_pdb)
for pdb_idcode in [pdb_list[idx]]:
    logging.info(pdb_idcode)

    pdb_filename = Path(filenames[pdb_idcode])
    trj_in = md.load(Path.joinpath(exposed_dir, pdb_idcode, pdb_filename))
    ab_chains = chains[pdb_idcode].antibody
    ag_chains = chains[pdb_idcode].antigen
    PiAnion[pdb_idcode] = tuple()
    PiCation[pdb_idcode] = tuple()

    try:
        CG_rings, CoM_rings_xyz, normal_vectors = get_ring_data(
            trj_in, ab_chains, ring_atoms_pi_ion)

        ab_anions, ag_anions, ab_cations, ag_cations = get_ions(
            interface_atoms[pdb_idcode])

        ring_ab_anion_ag = get_ion_ring_interactions(
            trj_in, CG_rings["antibody"], ag_anions, CoM_rings_xyz["antibody"],
            normal_vectors["antibody"], trj_in.xyz[0], cutoff_ring, cutoff_angle_pion)
        ring_ab_cation_ag = get_ion_ring_interactions(
            trj_in, CG_rings["antibody"], ag_cations, CoM_rings_xyz["antibody"],
            normal_vectors["antibody"], trj_in.xyz[0], cutoff_ring, cutoff_angle_pion)

        ring_ag_anion_ab = get_ion_ring_interactions(
            trj_in, CG_rings["antigen"], ab_anions, CoM_rings_xyz["antigen"],
            normal_vectors["antigen"], trj_in.xyz[0], cutoff_ring, cutoff_angle_pion)

        ring_ag_cation_ab = get_ion_ring_interactions(
            trj_in, CG_rings["antigen"], ab_cations, CoM_rings_xyz["antigen"],
            normal_vectors["antigen"], trj_in.xyz[0], cutoff_ring, cutoff_angle_pion)

        data_ring_ab_anion_ag = get_data_from_ring_ab_ion_ag(
            pdb_idcode, epitope_buried_cleaned, ring_ab_anion_ag)

        data_ring_ab_cation_ag = get_data_from_ring_ab_ion_ag(
            pdb_idcode, epitope_buried_cleaned, ring_ab_cation_ag)

        data_ring_ag_anion_ab = get_data_from_ring_ag_ion_ab(
            pdb_idcode, epitope_buried_cleaned, ring_ag_anion_ab)

        data_ring_ag_cation_ab = get_data_from_ring_ag_ion_ab(
            pdb_idcode, epitope_buried_cleaned, ring_ag_cation_ab)

    except Exception as e:
        logging.error(
            f"- {pdb_idcode} raised: {e.__class__}, saying: {e}, during Pi-ion "
            f"interactions calculation. Aborting.")
        raise e
    else:
        PiAnion[pdb_idcode] = (*data_ring_ab_anion_ag, *data_ring_ag_anion_ab)
        PiCation[pdb_idcode] = (*data_ring_ab_cation_ag, *data_ring_ag_cation_ab)



In [77]:
with open(Path.joinpath(data_dir, 'PiAnion.pkl'), 'rb') as file:
        PiAnion = pickle.load(file)
with open(Path.joinpath(data_dir, 'PiCation.pkl'), 'rb') as file:
        PiCation = pickle.load(file)

In [102]:
probar = [ pdb_idcode for pdb_idcode in pdb_list if len(PiCation[pdb_idcode]) > 3 ]

In [103]:
pdb_idcode = probar[0]
PiCation[pdb_idcode]

(PionPair(ring=Ring(indices=(708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721), serials=(709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722), resSeq=94, resSeq_str='', resname='TRP', chain_ID='A', chain_type='L', CDR=3), ion=Atom(index=3643, serial=6938, element='N', is_sidechain=True, resSeq=45, resSeq_str='45', resname='ARG', chain_ID='E', chain_type='.', CDR=-1)),
 PionPair(ring=Ring(indices=(1895, 1896, 1897, 1898, 1899, 1900, 1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908), serials=(1897, 1898, 1899, 1900, 1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909, 1910), resSeq=33, resSeq_str='', resname='TRP', chain_ID='B', chain_type='H', CDR=0), ion=Atom(index=3832, serial=7127, element='N', is_sidechain=True, resSeq=68, resSeq_str='68', resname='ARG', chain_ID='E', chain_type='.', CDR=-1)),
 PionPair(ring=Ring(indices=(1895, 1896, 1897, 1898, 1899, 1900, 1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908), serials=(1897, 1898, 1899, 1900, 1901, 190

In [147]:
def ions(atoms_iterable, resnames, element):
        control_set = set()
        for atom in atoms_iterable:
            if (atom.element == element) and (atom.resname in resnames) and atom.is_sidechain:
                # Get only 1 ion per residue (ASP/GLU or LYS/ARG)
                unique_id = str(atom.resSeq) + atom.resname + atom.chain_ID
                if unique_id not in control_set:
                    control_set.add(unique_id)
                    yield atom

In [156]:
retro = [ pdb_idcode for pdb_idcode in pdb_list if len(PiCation[pdb_idcode]) > 2]
pdb_idcode = retro[0]

In [157]:
a, b, c, d = get_ions(interface_atoms[pdb_idcode])

In [172]:
PiPi['5t1d']

()

In [92]:
def ions(atoms_iterable, resnames, element):
    # Get only 1 ion per residue (ASP/GLU or LYS/ARG)
    control_set = set()
    for atom in atoms_iterable:
        if (atom.element == element) and (atom.resname in resnames) and atom.is_sidechain:
            unique_id = str(atom.resSeq) + atom.resname + atom.chain_ID
            if unique_id not in control_set:
                control_set.add(unique_id)
                yield atom

Atom(index=3830, serial=7125, element='N', is_sidechain=True, resSeq=68, resSeq_str='68', resname='ARG', chain_ID='E', chain_type='.', CDR=-1)
Atom(index=3643, serial=6938, element='N', is_sidechain=True, resSeq=45, resSeq_str='45', resname='ARG', chain_ID='E', chain_type='.', CDR=-1)


In [109]:
gg = ions(interface_atoms[pdb_idcode].antigen.values(), {'LYS', 'ARG'}, 'N')


True

In [90]:
interface_atoms[pdb_idcode].antigen.values()

dict_values([Atom(index=573, serial=574, element='N', is_sidechain=False, resSeq=76, resSeq_str='76', resname='GLU', chain_ID='A', chain_type='.', CDR=-1), Atom(index=574, serial=575, element='C', is_sidechain=False, resSeq=76, resSeq_str='76', resname='GLU', chain_ID='A', chain_type='.', CDR=-1), Atom(index=575, serial=576, element='C', is_sidechain=False, resSeq=76, resSeq_str='76', resname='GLU', chain_ID='A', chain_type='.', CDR=-1), Atom(index=576, serial=577, element='O', is_sidechain=False, resSeq=76, resSeq_str='76', resname='GLU', chain_ID='A', chain_type='.', CDR=-1), Atom(index=577, serial=578, element='C', is_sidechain=True, resSeq=76, resSeq_str='76', resname='GLU', chain_ID='A', chain_type='.', CDR=-1), Atom(index=578, serial=579, element='C', is_sidechain=True, resSeq=76, resSeq_str='76', resname='GLU', chain_ID='A', chain_type='.', CDR=-1), Atom(index=579, serial=580, element='C', is_sidechain=True, resSeq=76, resSeq_str='76', resname='GLU', chain_ID='A', chain_type='.'

------

### hbond

In [4]:
import interactions_polar
importlib.reload(interactions_polar)
from interactions_polar import *

with open(Path.joinpath(casa_dir, 'data', 'filenames.pkl'), 'rb') as file:
    filenames = pickle.load(file)
with open(Path.joinpath(casa_dir, 'data', 'chains.pkl'), 'rb') as file:
    chains = pickle.load(file)

In [6]:
hbonds_dict = {}

check_pdb = '6edu'
idx = pdb_list.index(check_pdb)
for pdb_idcode in [pdb_list[idx]]:
    print(f"{pdb_idcode}", flush=True)
    pdb_filename = Path.joinpath(str_dir, pdb_idcode + '.pdb')
    try:
        trj_in = md.load(pdb_filename)
    except Exception as e:
        logging.error(f" Couldn't read {pdb_idcode}. Skipping.")
        continue
    ab_chains = chains[pdb_idcode].antibody
    ag_chains = chains[pdb_idcode].antigen

    hbond_dir = Path.joinpath(data_dir, "hbonds")
    hbplus = Path.joinpath(source_location, 'hbplus')

    process = subprocess.run([hbplus, pdb_filename, "-A", "0", "0", "0", "-d", "3.9"],
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE, cwd=hbond_dir)
    hb2_file = Path.joinpath(hbond_dir, pdb_filename.name[0:-3] + "hb2")

    hbonds_dict[pdb_idcode] = parse_hb2(
            pdb_idcode, hb2_file, epitope_buried_cleaned, trj_in.topology, ab_chains, ag_chains)

6edu


ERROR:root: Couldn't read 6edu. Skipping.


---

### shielding

In [7]:
import shielding
importlib.reload(shielding)
from shielding import *

with open(Path.joinpath(casa_dir, 'data', 'filenames.pkl'), 'rb') as file:
    filenames = pickle.load(file)
with open(Path.joinpath(casa_dir, 'data', 'chains.pkl'), 'rb') as file:
    chains = pickle.load(file)



In [None]:
check_pdb = '5f96'
idx = pdb_list.index(check_pdb)
for pdb_idcode, pdb_file, is_cpx in zip([pdb_list[idx]], [file_pdb_list[idx]], [is_cpx_pdb[idx]]):
    print(f"{pdb_idcode}", flush=True)
    
    pdb_filename = Path.joinpath(casa_dir, pdb_file)
    trj_in = md.load(pdb_filename)

    all_ab_chains = [
        fila.chainID for i,
        fila in df_dataset[df_dataset.idcode == pdb_idcode].iterrows()]
    ag_chains = [
        chain.chain_id for chain in trj_in.topology.chains
        if chain.chain_id not in all_ab_chains]
    ab_chains = [
        chain.chain_id for chain in trj_in.topology.chains
        if chain.chain_id not in ag_chains]
        
    with open(Path.joinpath(casa_dir, 'data', 'hbonds_1_32.pkl'), 'rb') as file:
        hbonds_dict = pickle.load(file)

In [33]:
shielding_dict = {}
check_pdb = '1adq'
idx = pdb_list.index(check_pdb)
for pdb_idcode in [pdb_list[idx]]:
    # for pdb_idcode in pdb_list:
    logging.info(pdb_idcode)

    pdb_filename = Path.joinpath(str_dir, pdb_idcode + '.pdb')
    try:
        trj_in = md.load(pdb_filename)
    except Exception as e:
        logging.error(f" Couldn't read {pdb_idcode}. Skipping.")
        continue
    ab_chains = chains[pdb_idcode].antibody
    ag_chains = chains[pdb_idcode].antigen

    ab_carbons = [atom.index for atom in interface_atoms[pdb_idcode].antibody.values()
                if atom.element == 'C']

    ag_carbons = [atom.index for atom in interface_atoms[pdb_idcode].antigen.values()
                if atom.element == 'C']

    ab_polars = [atom.index for atom in interface_atoms[pdb_idcode].antibody.values()
                if atom.element in ('N', 'O')]
    ag_polars = [atom.index for atom in interface_atoms[pdb_idcode].antigen.values()
                if atom.element in ('N', 'O')]
    polars = ab_polars + ag_polars

    C_ON_pairs = np.array(list(itertools.product(ab_carbons, polars)))
    C_C_pairs = np.array(list(itertools.product(ab_carbons, ag_carbons)))

    C_ON_distancias = md.compute_distances(
        trj_in, C_ON_pairs).reshape((len(ab_carbons), len(polars)))

    C_C_distancias = md.compute_distances(
        trj_in, C_C_pairs).reshape((len(ab_carbons), len(ag_carbons)))

    G = nx.Graph()
    indices_close_C_C_distancias = np.where(C_C_distancias < cutoff)
    mask_close_C_ON_distancias = C_ON_distancias < cutoff
    shielding_pdb = {}
    for i, j in zip(*indices_close_C_C_distancias):
        C_cdr_id = ab_carbons[i]
        C_epi_id = ag_carbons[j]
        surrounding_ON_ids = [
            polars[i]
            for i in np.where(mask_close_C_ON_distancias[i, :])[0]]

        shielded, ON_id = is_shielded(
            trj_in.xyz[0], C_cdr_id, C_epi_id, surrounding_ON_ids)

        if shielded:
            # I could rewrite this to get the data from `interface_atoms[pdb_idcode]`
            # but I'm in a bit of a hurry.
            chainID = trj_in.topology.atom(ON_id).residue.chain.chain_id
            resSeq = trj_in.topology.atom(ON_id).residue.resSeq
            resname = trj_in.topology.atom(ON_id).residue.name
            chain_type, cdr = get_chain_info(
                epitope_buried_cleaned, pdb_idcode, ab_chains, chainID, resSeq)
            serial = trj_in.topology.atom(ON_id).serial
            element = trj_in.topology.atom(ON_id).element.symbol
            is_sidechain = trj_in.topology.atom(ON_id).is_sidechain

            # Compile all the info on this shielding polar atom.
            shielding_atom = ShieldingAtom(
                chainID=chainID, chain_type=chain_type, CDR=cdr,
                resSeq=resSeq, resname=resname, index=ON_id,
                serial=serial, element=element, is_sidechain=is_sidechain)

            shielding_pdb[ON_id] = shielding_atom

    shielding_dict[pdb_idcode] = shielding_pdb

In [22]:
with open(Path.joinpath(casa_dir, 'data', 'shielding.pkl'), 'rb') as file:
        shielding = pickle.load(file)
with open(Path.joinpath(casa_dir, 'data', 'hydrophobic.pkl'), 'rb') as file:
        hydrophobic = pickle.load(file)

In [11]:
pdb_idcode = '3ze1'
filenames[pdb_idcode]

'3ze1_complex_EF_C.pdb'

In [None]:
hydro

-----

### Count_ONs

In [87]:
with open(Path.joinpath(casa_dir, 'data', 'hbonds_0_39.pkl'), 'rb') as file:
        hbonds = pickle.load(file)
interacting_ab_chains = {}

In [96]:
with open(Path.joinpath(casa_dir, 'data', 'filenames.pkl'), 'rb') as file:
        filenames = pickle.load(file)
with open(Path.joinpath(casa_dir, 'data', 'chains.pkl'), 'rb') as file:
    chains = pickle.load(file)
with open(Path.joinpath(casa_dir, 'data', 'hbonds_0_39.pkl'), 'rb') as file:
    hbonds = pickle.load(file)

pdb_list = list(filenames.keys())
df_dataset = get_df_dataset(casa_dir)

interacting_ab_chains = {}
for pdb_idcode in pdb_list:
    pdb_filename = Path(filenames[pdb_idcode])
    trj_in = md.load(Path.joinpath(exposed_dir, pdb_idcode, pdb_filename))
    ab_chains = chains[pdb_idcode].antibody
    ag_chains = chains[pdb_idcode].antigen

    serial_to_id = {}
    for atomo in trj_in.topology.atoms:
        serial_to_id[atomo.serial] = atomo.index

    ids_oxy_nitro = []
    for lista_hbond in hbonds[pdb_idcode].values():
        for hb in lista_hbond:
            ids_oxy_nitro.append(hb.donor.chainID)
            ids_oxy_nitro.append(hb.acceptor.chainID)

    interacting_ab_chains_pdb = set(Counter(ids_oxy_nitro).keys()) & set(ab_chains)

    interacting_ab_chains[pdb_idcode] = interacting_ab_chains_pdb

SabDab protein antigen:
1154 proteins out of 2017, 57.2%
All: 1154
No Hchain: 0
No Lchain: 0
Both chains: 1154
Buried surfaces of 2492 proteins
with both chains: 867


### DSSP completo

In [67]:
with open(Path.joinpath(casa_dir, 'data', 'filenames.pkl'), 'rb') as file:
    filenames = pickle.load(file)
with open(Path.joinpath(casa_dir, 'data', 'chains.pkl'), 'rb') as file:
    chains = pickle.load(file)
with open(Path.joinpath(casa_dir, 'data', 'interacting_chains.pkl'), 'rb') as file:
    interacting_chains = pickle.load(file)
# with open(Path.joinpath(casa_dir, 'data', 'hbonds_0_39.pkl'), 'rb') as file:
#     hbonds = pickle.load(file)

pdb_list = list(filenames.keys())
df_dataset = get_df_dataset(casa_dir)

SabDab protein antigen:
1154 proteins out of 2017, 57.2%
All: 1154
No Hchain: 0
No Lchain: 0
Both chains: 1154
Buried surfaces of 2492 proteins
with both chains: 867


In [120]:
simplify_sse = {'H': 'H', 'B': 'E', 'E': 'E', 'G': 'H', 'I': 'H',
    'T': 'H', 'S': 'C', ' ': 'C', 'NA': 'NA'}
SSE = {}
SSE_cnt = {}
check_pdb = '1ncc'
idx = pdb_list.index(check_pdb)
for pdb_idcode in [pdb_list[idx]]:
# for pdb_idcode in pdb_list:
    logging.info(pdb_idcode)

    pdb_filename = Path(filenames[pdb_idcode])
    trj_in = md.load(Path.joinpath(exposed_dir, pdb_idcode, pdb_filename))
    ag_chains = chains[pdb_idcode].antigen
    # Get SSE of each residue
    dssp = md.compute_dssp(trj_in, simplified=False)[0]
    SSE_pdb = {}
    SSE_cnt_pdb = {'H': 0, 'E': 0, 'C': 0, 'NA': 0}
    for i, res in enumerate(trj_in.topology.residues):
        if res.chain.chain_id in ag_chains:
            sse = simplify_sse[dssp[i]]
            SSE_pdb[res.resSeq] = sse
            SSE_cnt_pdb[sse] += 1
    SSE[pdb_idcode] = SSE_pdb
    # SSE_cnt[pdb_idcode] = SSE_cnt_pdb

1ncc


In [121]:
Counter(SSE[pdb_idcode].values())

Counter({' ': 103, 'S': 62, 'B': 6, 'E': 162, 'H': 4, 'T': 42, 'G': 7})

----

### get_interface_atoms

In [72]:
import get_interface_atoms
imp.reload(get_interface_atoms)
from get_interface_atoms import *

In [50]:
with open(Path.joinpath(casa_dir, 'data', 'filenames.pkl'), 'rb') as file:
        filenames = pickle.load(file)
with open(Path.joinpath(casa_dir, 'data', 'chains.pkl'), 'rb') as file:
    chains = pickle.load(file)
with(open(Path.joinpath(data_dir, 'pdb.list'), 'r')) as file:
    pdb_list = [linea.strip() for linea in file]

In [91]:
interface_atoms = {}
check_pdb = '1egj'
idx = pdb_list.index(check_pdb)
for pdb_idcode in [pdb_list[idx]]:
# for pdb_idcode in pdb_list:
    

    pdb_filename = Path(filenames[pdb_idcode])
    trj_in = md.load(Path.joinpath(exposed_dir, pdb_idcode, pdb_filename))
    ab_chains = chains[pdb_idcode].antibody
    ag_chains = chains[pdb_idcode].antigen

    cadenas = get_chains(trj_in.topology, ab_chains, ag_chains)
    chain_types = get_chain_types(epitope_buried_cleaned, pdb_idcode, ab_chains, ag_chains)

    row_pdb = buried_interface_res.query(f"idcode == '{pdb_idcode}'\
        and chainID == '{ab_chains[0]}'")
    ab_atoms = []
    try:
        for row in row_pdb.ab_ag_interface_res.values[0]:
            chainID = row[0]
            resSeq_str = row[1].strip()
            resname = row[2]
            interface_residues = get_residue(cadenas[chainID], resSeq_str)
            for interface_resi in interface_residues:
                cdr = get_cdr_from_residue(
                    epitope_buried_cleaned, pdb_idcode, chainID, chain_types[chainID],
                    interface_resi)
                for atom in interface_resi.atoms:
                    atm = Atom(index=atom.index, serial=atom.serial,
                                resSeq=interface_resi.resSeq, resSeq_str=resSeq_str,
                                resname=interface_resi.name,
                                chain_ID=chainID, chain_type=chain_types[chainID], CDR=cdr)
                    ab_atoms.append(atm)

    except Exception as e:
        logging.exception(e)
        logging.error(f" {pdb_idcode} interface atom look-up failed.")
        continue

    ag_atoms = []
    try:
        for row in row_pdb.ag_ab_interface_res.values[0]:
            chainID = row[0]
            resSeq_str = row[1].strip()
            resname = row[2]
            interface_residues = get_residue(cadenas[chainID], resSeq_str)
            for interface_resi in interface_residues:
                cdr = get_cdr_from_residue(
                    epitope_buried_cleaned, pdb_idcode, chainID, chain_types[chainID],
                    interface_resi)
                for atom in interface_resi.atoms:
                    atm = Atom(index=atom.index, serial=atom.serial,
                                resSeq=interface_resi.resSeq, resSeq_str=resSeq_str,
                                resname=interface_resi.name,
                                chain_ID=chainID, chain_type=chain_types[chainID], CDR=cdr)
                    ag_atoms.append(atm)

    except Exception as e:
        logging.exception(e)
        logging.error(f" {pdb_idcode} interface atom look-up failed.")
        continue


---

In [226]:
import get_interface_atoms
imp.reload(get_interface_atoms)
from get_interface_atoms import *

In [227]:
check_pdb = '1egj'
idx = pdb_list.index(check_pdb)
for pdb_idcode in [pdb_list[idx]]:
    logging.info(pdb_idcode)

    pdb_filename = Path(filenames[pdb_idcode])
    trj_in = md.load(Path.joinpath(exposed_dir, pdb_idcode, pdb_filename))
    ab_chains = chains[pdb_idcode].antibody
    ag_chains = chains[pdb_idcode].antigen

    cadenas = get_chains(trj_in.topology, ab_chains, ag_chains)
    chain_types = get_chain_types(epitope_buried_cleaned, pdb_idcode, ab_chains, ag_chains)

    # `buried_interface_res` has 1 row per interface. In case there's more than one,
    # I use the heavy chain (ab_chains[0]) to identify the one I'm care about.
    df_interface_atoms = buried_interface_res.query(f"idcode == '{pdb_idcode}'\
        and chainID == '{ab_chains[0]}'")
    
    try:
        ab_atoms = get_atoms_from_rows(
            pdb_idcode, epitope_buried_cleaned, df_interface_atoms.ab_ag_interface_res.values[0],
            cadenas, chain_types)
    except Exception as e:
        logging.exception(e)
        logging.error(f" {pdb_idcode} antibody's interface atom look-up failed.")
        continue

----

In [20]:
pdb_idcode = '3cvh'
a = epitope_buried_cleaned.query(f"idcode == '{pdb_idcode}'").ag_interface_res.values

In [21]:
superficie = tuple({ (cada[0], int(cada[1].strip()), cada[2]) for cada in a[0] })

In [22]:
print(f"select resi ", end='')
for res in superficie[:-1]:
    print(f"{res[1]}", end = '+')
print(f"{superficie[-1][1]} and chain {superficie[-1][0]}")

select resi 250+58+124+102+121+2+18+118+128+98+158+6+218+199+36+66+267+86+258+110+191+256+68+269+180+270+220+222+57+64+72+138+177+228+3+240+263+272+41+87+92+183+235+195+139+265+254+59+273+44+91+119+106+115+257+94+8+147+35+71+161+169+248+172+241+50+251+210+12+242+266+229+154+216+25+42+198+30+159+73+157+17+149+174+224+46+88+227+7+15+27+60+14+271+61+170+96+135+70+230+40+97+137+206+150+215+221+246+186+63+105+233+123+127+219+108+165+187+182+93+231+5+168+179+104+243+142+184+238+167+85+48+77+125+136+207+145+45+130+141+47+37+76+262+148+225+6+53+80+274+152+190+113+21+39+111+129+133+153+7+264+83+176+249+185+31+155+163+234+1+226+19+49+65+54+122+151+162+109+173+175+2+4+178+212+188+32+117+166+232+192+11+189+211+82+146+126+193+43+244+223+204+10+260+4+8+202+253+90+38+196+55+214+237+197+247+20+107+268+84+255+132+239+22+9+23+13+62+120+3+209+78+131+51+213+69+29+134+181+194+200+1+236+56+252+75+89+144+16+116+103+79 and chain A


In [30]:
a[6]

IndexError: index 6 is out of bounds for axis 0 with size 6