In [2]:
import pandas as pd
import numpy as np
import itertools
import string
import networkx as nx
import random

import sys, os
from pathlib import Path
source_location = Path().resolve()
sys.path.append(source_location)
from scripts.utils import get_sabdab_details

import mdtraj as md
casa_dir = Path("/home/pbarletta/labo/22/AbAgInterface")

In [3]:
df_sabdab_all = pd.read_csv(Path.joinpath(source_location,
    'structures/sabdab_summary_all.tsv'), sep="\t")
df_sabdab_90 = pd.read_csv(Path.joinpath(source_location,
    'structures/sabdab_summary_90.tsv'), sep="\t")

df_buried = pd.read_pickle(Path.joinpath(source_location,
    'data/epitope_buried.pickle'))

df_interactions = pd.read_pickle(Path.joinpath(source_location,
    'data/interactions.pickle'))

In [4]:
# protein_antigens = df_sabdab_90[df_sabdab_90['antigen_type'].str.contains('protein', na = False)]
protein_antigens = df_sabdab_90.query("antigen_type == antigen_type and antigen_type.str.contains('protein')", engine = 'python').drop_duplicates()
ab_protein_antigens = set(protein_antigens.pdb.values)
all_saddab_proteins = set(df_sabdab_90.pdb.values)
print(
    f"SabDab protein antigen:\n"
    f"{len(ab_protein_antigens)} proteins out of {len(all_saddab_proteins)}, "
    f"{round(len(ab_protein_antigens) / len(all_saddab_proteins) * 100, 1)}%"
)

SabDab protein antigen:
1154 proteins out of 2017, 57.2%


In [5]:
ab_both_chains = set(protein_antigens.query("Hchain == Hchain and Lchain == Lchain").pdb.values)
ab_single_H_chain = set(protein_antigens.query("Hchain == Hchain").pdb.values)
ab_single_L_chain = set(protein_antigens.query("Lchain == Lchain").pdb.values)

n_ab_no_Hchain = len(ab_protein_antigens) - len(ab_single_H_chain)
n_ab_no_Lchain = len(ab_protein_antigens) - len(ab_single_L_chain)

print(f"All: {len(ab_protein_antigens)}\nNo Hchain: {n_ab_no_Hchain}\nNo Lchain: {n_ab_no_Lchain}\nBoth chains: {len(ab_both_chains)}")

All: 1154
No Hchain: 0
No Lchain: 0
Both chains: 1154


In [6]:
buried_fullab = df_buried[df_buried.idcode.isin(ab_both_chains)]
print(
    f"Buried surfaces of {len(set(df_buried.idcode.values))} proteins\n"
    f"with both chains: {len(set(buried_fullab.idcode.values))}"
)

Buried surfaces of 2492 proteins
with both chains: 867


In [7]:
buried_fullab.iloc[4805]

idcode                                                                    4ydk
chainID                                                                      H
chain_type                                                                   H
cdr                                                                          3
cdr_seq                                                   VTFYHEGSGYYYRAGNYFDS
cdr_begin                                                                   95
cdr_end                                                                    102
cdr_atoms                    [6701, 6702, 6703, 6704, 6705, 6706, 6707, 670...
epitope_atoms                [950, 2128, 2133, 2134, 2135, 2414, 2483, 2484...
epitope_residues             [105, 257, 257, 257, 257, 275, 280, 280, 280, ...
ag_ab_interface              {3584, 3592, 3593, 3596, 3597, 3599, 3600, 360...
ag_cdrchain_interface        {3584, 4738, 3582, 4742, 4358, 3592, 3593, 436...
ag_cdr_interface             {3584, 4738, 4742, 4358

In [8]:
def is_shielded(positions, C_cdr_id, C_epi_id, surrounding_ONS_ids):
    C_cdr_xyz = positions[C_cdr_id, :]
    C_epi_xyz = positions[C_epi_id, :]

    vec_C_C = C_epi_xyz - C_cdr_xyz
    n_vec_C_C = vec_C_C / np.linalg.norm(vec_C_C)

    for ONS_id in surrounding_ONS_ids:
        ONS_xyz = positions[ONS_id, :]
        vec_cdr_ONS = ONS_xyz - C_cdr_xyz
        n_vec_cdr_ONS = vec_cdr_ONS / np.linalg.norm(vec_cdr_ONS)
        vec_epi_ONS = ONS_xyz - C_epi_xyz
        n_vec_epi_ONS = vec_epi_ONS / np.linalg.norm(vec_epi_ONS)
        
        if np.dot(n_vec_C_C, n_vec_cdr_ONS) > 0.9 and\
            np.dot(n_vec_C_C, n_vec_epi_ONS) < -0.1:
            return True, ONS_id

    return False, 0

# intento 1

In [126]:
pdb_idcode = '4ydk'
pdb_filename = Path.joinpath(casa_dir, "tempo", pdb_idcode + ".pdb")
trj_in = md.load(pdb_filename)
serial_to_id = {}
for atomo in trj_in.topology.atoms:
    serial_to_id[atomo.serial] = atomo.index

all_epitope_atoms = np.unique(list(itertools.chain(
    *[ fila.epitope_atoms for index, fila in buried_fullab[buried_fullab.idcode == pdb_idcode].iterrows() ])))

all_cdr_atoms = np.unique(list(itertools.chain(
    *[ fila.cdr_atoms for index, fila in buried_fullab[buried_fullab.idcode == pdb_idcode].iterrows() ])))

ids_C_epitope_atoms = []
ids_ONS_epitope_atoms = []
for id in all_epitope_atoms:
    index = serial_to_id[id]
    elemento = trj_in.topology.atom(index).element.symbol
    if elemento == 'C':
        ids_C_epitope_atoms.append(index)
    elif elemento == 'O' or elemento == 'N' or elemento == 'S':
        ids_ONS_epitope_atoms.append(index)

ids_C_cdr_atoms = []
ids_ONS_cdr_atoms = []
for id in all_cdr_atoms:
    index = serial_to_id[id]
    elemento = trj_in.topology.atom(index).element.symbol
    if elemento == 'C':
        ids_C_cdr_atoms.append(index)
    elif elemento == 'O' or elemento == 'N' or elemento == 'S':
        ids_ONS_cdr_atoms.append(index)

ids_ONS_atoms = ids_ONS_cdr_atoms + ids_ONS_epitope_atoms 

C_ONS_pairs = np.array(list(itertools.product(ids_C_cdr_atoms, ids_ONS_atoms)))
C_C_pairs = np.array(list(itertools.product(ids_C_cdr_atoms, ids_C_epitope_atoms)))

C_ONS_distancias = md.compute_distances(trj_in, C_ONS_pairs).reshape((len(ids_C_cdr_atoms),
    len(ids_ONS_atoms)))

C_C_distancias = md.compute_distances(trj_in, C_C_pairs).reshape((len(ids_C_cdr_atoms),
    len(ids_C_epitope_atoms)))

cutoff = 0.5

In [341]:
indices_close_C_C_distancias = np.where(C_C_distancias < cutoff)
mask_close_C_ONS_distancias = C_ONS_distancias < cutoff
interacting_carbons = []
interacting_carbons_tuples = []
xyz_centers = []
shielding_atoms_serial = {}

for i, j in zip(*indices_close_C_C_distancias):
    C_cdr_id = ids_C_cdr_atoms[i]
    C_epi_id = ids_C_epitope_atoms[j]
    surrounding_ONS_ids = [ids_ONS_atoms[i] for i in np.where(mask_close_C_ONS_distancias[i, :])[0] ]
    
    shielded, ONS_id = is_shielded(trj_in.xyz[0], C_cdr_id, C_epi_id, surrounding_ONS_ids)

    C1 = trj_in.topology.atom(C_cdr_id).serial
    C2 = trj_in.topology.atom(C_epi_id).serial
    ONS = trj_in.topology.atom(ONS_id).serial
    if shielded:
        shielding_atoms_serial[i] = trj_in.topology.atom(ONS_id).serial
        # print(f'shielded: {C1=}  --  {C2=} -- {ONS=}')
    else:
        interacting_carbons.append(C_cdr_id)
        interacting_carbons.append(C_epi_id)
        interacting_carbons_tuples.append((C_cdr_id, C_epi_id))
        xyz_centers.append(trj_in.xyz[0][C_epi_id, :] - trj_in.xyz[0][C_cdr_id, :])
        # print(f'hydro: {C1=}  --  {C2=} -- {ONS=}')
interacting_carbons = list(set(interacting_carbons))

In [332]:
pairs_of_interacting_carbons = list(itertools.product(interacting_carbons,
    interacting_carbons))

pairs_of_interacting_carbons = [ i for i in pairs_of_interacting_carbons if i[0] != i[1] ]

interacting_carbons_distances = md.compute_distances(trj_in, pairs_of_interacting_carbons)
cutoff_carbons = cutoff
interacting_clusters = np.array(pairs_of_interacting_carbons)[np.where(
    interacting_carbons_distances < cutoff_carbons)[1]]

In [284]:
len(interacting_carbons)

106

In [285]:
len(pairs_of_interacting_carbons)

11130

In [336]:
G = nx.Graph()
G.add_edges_from(interacting_clusters)
for cluster in sorted(nx.connected_components(G), key = len, reverse = True):
    for C in cluster:
        print(trj_in.topology.atom(C).serial)
    print("------------ ", len(cluster))

4650
4653
4655
4688
2128
4691
6753
6773
6774
6776
6777
6779
6780
4207
4210
4211
6794
6795
6796
6806
6807
4238
4239
6813
6814
4241
6816
6824
6834
6835
6836
6837
6838
6839
6840
4262
4279
6852
4280
6855
6856
6857
6858
6859
6860
6861
4300
6873
6874
4301
6877
6879
6881
4310
4311
6894
6895
6897
6901
6918
6921
4871
4878
10012
10013
10014
10015
10016
10017
4888
4889
10029
10045
10048
5985
5986
5988
5989
5991
9065
5992
6004
9082
6010
6011
3505
3508
3516
2497
3523
2500
2507
3533
2512
2534
3561
3564
3565
3566
3579
3580
3581
3582
------------  103
9414
9416
4717
------------  3


# intento 2

In [379]:
G = nx.Graph()
cutoff_clusters = .3
for (xyz_i, tuple_i) in zip(xyz_centers, interacting_carbons_tuples):
    for (xyz_j, tuple_j) in zip(xyz_centers, interacting_carbons_tuples):
        if tuple_i == tuple_j:
            continue
        if np.linalg.norm(xyz_i - xyz_j) < cutoff_clusters:
            G.add_edge(tuple_i, tuple_j)

In [380]:
for cluster in sorted(nx.connected_components(G), key = len, reverse = True):
    for par in cluster:
        c1 = trj_in.topology.atom(par[0]).serial
        c2 = trj_in.topology.atom(par[1]).serial
        print(f'{c1=} -- {c2=}')
    print("------------ ", len(cluster))

c1=5991 -- c2=4210
c1=6860 -- c2=4241
c1=6777 -- c2=4655
c1=9414 -- c2=4717
c1=6873 -- c2=3533
c1=6856 -- c2=3581
c1=10015 -- c2=3505
c1=6855 -- c2=3579
c1=6859 -- c2=3561
c1=6795 -- c2=2512
c1=5989 -- c2=4279
c1=6834 -- c2=4279
c1=6858 -- c2=3581
c1=6010 -- c2=4311
c1=6824 -- c2=4888
c1=6838 -- c2=4279
c1=6861 -- c2=3561
c1=6813 -- c2=4878
c1=6836 -- c2=4279
c1=6859 -- c2=4210
c1=6840 -- c2=4279
c1=10012 -- c2=3508
c1=6901 -- c2=4655
c1=6857 -- c2=4210
c1=6877 -- c2=3523
c1=6852 -- c2=3533
c1=6011 -- c2=4311
c1=6861 -- c2=4210
c1=6796 -- c2=2512
c1=6879 -- c2=3523
c1=10016 -- c2=3508
c1=6894 -- c2=3523
c1=6859 -- c2=3565
c1=6860 -- c2=3564
c1=5989 -- c2=4210
c1=6807 -- c2=4888
c1=6860 -- c2=4238
c1=9065 -- c2=4691
c1=10045 -- c2=3516
c1=6776 -- c2=4653
c1=6861 -- c2=3565
c1=5985 -- c2=4279
c1=10013 -- c2=3508
c1=6795 -- c2=2500
c1=6773 -- c2=2497
c1=6894 -- c2=3580
c1=10017 -- c2=3508
c1=6860 -- c2=4878
c1=9082 -- c2=4691
c1=6855 -- c2=3533
c1=6816 -- c2=3581
c1=6776 -- c2=4655
c1=679

In [347]:
interacting_clusters

array([[4649, 4652],
       [4649, 4654],
       [4649, 6766],
       ...,
       [3581, 3578],
       [3581, 3579],
       [3581, 3580]])

In [None]:
with open("tempo/clusters_hydrophobic.py", "w") as fil:
    fil.write(f'from pymol import cmd\n\n')
    fil.write(f'cmd.load("{buried_fullab.iloc[4805].idcode}.pdb")\n')
    fil.write(f'cmd.color("salmon", "chain G")\n')
    fil.write(f'cmd.color("atomic", "(not elem C)")\n\n')
    for x in np.unique(buried_fullab.iloc[4805].epitope_residues):
        fil.write(f'cmd.show("lines", "resi ' + str(x) + f' and chain G")\n')

    for x in range(buried_fullab.iloc[4805].cdr_begin, buried_fullab.iloc[4805].cdr_end+1):
        fil.write(f'cmd.show("lines", "resi ' + str(x) + f' and chain H")\n')

    cdrH3_extra_residues = [''.join(tuple) for tuple in list(itertools.product(["100"], string.ascii_uppercase[0:12]))]
    for x in cdrH3_extra_residues:
        fil.write(f'cmd.show("lines", "resi ' + str(x) + f' and chain H")\n')

    ##
    # 
    ##

    for n, cluster in enumerate(sorted(nx.connected_components(G), key = len, reverse = True)):
        linea = f''
        cluster_id = 'cluster_' + str(n+1)
        fil.write(f'cmd.select("id ')
        for par in cluster:
            c1 = trj_in.topology.atom(par[0]).serial
            c2 = trj_in.topology.atom(par[1]).serial
            linea += f'{c1}+{c2}+'
        fil.write(linea[0:-1])
        fil.write(f'")\n')
        fil.write(f'cmd.set_name("sele", "{cluster_id}")\n')

# intento 3

In [432]:
pdb_idcode = '4ydk'
pdb_filename = Path.joinpath(casa_dir, "tempo", pdb_idcode + ".pdb")
trj_in = md.load(pdb_filename)
serial_to_id = {}
for atomo in trj_in.topology.atoms:
    serial_to_id[atomo.serial] = atomo.index

all_epitope_atoms = np.unique(list(itertools.chain(
    *[ fila.epitope_atoms for index, fila in buried_fullab[buried_fullab.idcode == pdb_idcode].iterrows() ])))

all_cdr_atoms = np.unique(list(itertools.chain(
    *[ fila.cdr_atoms for index, fila in buried_fullab[buried_fullab.idcode == pdb_idcode].iterrows() ])))

ids_C_epitope_atoms = []
ids_ONS_epitope_atoms = []
for id in all_epitope_atoms:
    index = serial_to_id[id]
    elemento = trj_in.topology.atom(index).element.symbol
    if elemento == 'C':
        ids_C_epitope_atoms.append(index)
    elif elemento == 'O' or elemento == 'N' or elemento == 'S':
        ids_ONS_epitope_atoms.append(index)

ids_C_cdr_atoms = []
ids_ONS_cdr_atoms = []
for id in all_cdr_atoms:
    index = serial_to_id[id]
    elemento = trj_in.topology.atom(index).element.symbol
    if elemento == 'C':
        ids_C_cdr_atoms.append(index)
    elif elemento == 'O' or elemento == 'N' or elemento == 'S':
        ids_ONS_cdr_atoms.append(index)

ids_ONS_atoms = ids_ONS_cdr_atoms + ids_ONS_epitope_atoms 

C_ONS_pairs = np.array(list(itertools.product(ids_C_cdr_atoms, ids_ONS_atoms)))
C_C_pairs = np.array(list(itertools.product(ids_C_cdr_atoms, ids_C_epitope_atoms)))

C_ONS_distancias = md.compute_distances(trj_in, C_ONS_pairs).reshape((len(ids_C_cdr_atoms),
    len(ids_ONS_atoms)))

C_C_distancias = md.compute_distances(trj_in, C_C_pairs).reshape((len(ids_C_cdr_atoms),
    len(ids_C_epitope_atoms)))

cutoff = 0.5

In [433]:
indices_close_C_C_distancias = np.where(C_C_distancias < cutoff)
mask_close_C_ONS_distancias = C_ONS_distancias < cutoff
interacting_carbons = []
interacting_carbons_tuples = []
xyz_centers = []
shielding_atoms_serial = {}
G = nx.Graph()

for i, j in zip(*indices_close_C_C_distancias):
    C_cdr_id = ids_C_cdr_atoms[i]
    C_epi_id = ids_C_epitope_atoms[j]
    surrounding_ONS_ids = [ids_ONS_atoms[i] for i in np.where(mask_close_C_ONS_distancias[i, :])[0] ]
    
    shielded, ONS_id = is_shielded(trj_in.xyz[0], C_cdr_id, C_epi_id, surrounding_ONS_ids)

    C1 = trj_in.topology.atom(C_cdr_id).serial
    C2 = trj_in.topology.atom(C_epi_id).serial
    ONS = trj_in.topology.atom(ONS_id).serial
    if shielded:
        shielding_atoms_serial[i] = trj_in.topology.atom(ONS_id).serial
        # print(f'shielded: {C1=}  --  {C2=} -- {ONS=}')
    else:
        G.add_edge(C_cdr_id, C_epi_id)
        interacting_carbons.append(C_cdr_id)
        interacting_carbons.append(C_epi_id)
        interacting_carbons_tuples.append((C_cdr_id, C_epi_id))
        xyz_centers.append(trj_in.xyz[0][C_epi_id, :] - trj_in.xyz[0][C_cdr_id, :])
        # print(f'hydro: {C1=}  --  {C2=} -- {ONS=}')
interacting_carbons = list(set(interacting_carbons))

In [475]:
with open("tempo/clusters_hydrophobic.py", "w") as fil:
    fil.write(f'from pymol import cmd\n\n')
    fil.write(f'cmd.load("{buried_fullab.iloc[4805].idcode}.pdb")\n')
    fil.write(f'cmd.color("salmon", "chain G")\n')
    fil.write(f'cmd.color("atomic", "(not elem C)")\n\n')

    # Show epitope residues as lines
    epitope_residues = np.unique(list(itertools.chain(
    *[ fila.epitope_residues for index, fila in buried_fullab[buried_fullab.idcode == pdb_idcode].iterrows() ])))
    for x in epitope_residues:
        fil.write(f'cmd.show("lines", "resi ' + str(x) + f' and chain G")\n')

    # Show CDR residues as lines
    cdr_H_residues = np.unique(list(itertools.chain(
        *[ list(range(fila.cdr_begin, fila.cdr_end+1))
        for index, fila in buried_fullab[(buried_fullab.idcode == pdb_idcode) & (buried_fullab.chainID == 'H')].iterrows() ])))
    for x in cdr_H_residues:
        fil.write(f'cmd.show("lines", "resi ' + str(x) + f' and chain H")\n')
    
    # CD3 has several residues numbered '100 + letter'
    cdrH3_extra_residues = [''.join(tuple) for tuple in list(itertools.product(["100"], string.ascii_uppercase[0:12]))]
    for x in cdrH3_extra_residues:
        fil.write(f'cmd.show("lines", "resi ' + str(x) + f' and chain H")\n')
    
    cdr_L_residues = np.unique(list(itertools.chain(
        *[ list(range(fila.cdr_begin, fila.cdr_end+1))\
        for index, fila in buried_fullab[(buried_fullab.idcode == pdb_idcode) & (buried_fullab.chainID == 'L')].iterrows() ])))
    for x in cdr_L_residues:
        fil.write(f'cmd.show("lines", "resi ' + str(x) + f' and chain L")\n')

    ##
    # 
    ##

    for n, cluster in enumerate(sorted(nx.connected_components(G), key = len, reverse = True)):
        linea = f''
        cluster_id = 'cluster_' + str(n+1)
        fil.write(f'cmd.select("id ')
        for c in cluster:
            c_serial = trj_in.topology.atom(c).serial
            linea += f'{c_serial}+'
        fil.write(linea[0:-1])
        fil.write(f'")\n')
        fil.write(f'cmd.set_name("sele", "{cluster_id}")\n')
        fil.write(f'cmd.show("spheres", "{cluster_id}")\n')
        color = "%06x" % random.randint(0, 0xFFFFFF)
        fil.write(f'cmd.color("0x{color}", "{cluster_id}")\n')

In [473]:
cdr_L_residues = np.unique(list(itertools.chain(
        *[ list(range(fila.cdr_begin, fila.cdr_end+1)) for index, fila in buried_fullab[(buried_fullab.idcode == pdb_idcode) & (buried_fullab.chainID == 'K')].iterrows() ])))

In [474]:
cdr_L_residues

array([], dtype=float64)

In [453]:
np.unique(list(itertools.chain(*[ list(range(fila.cdr_begin, fila.cdr_end+1)) for index, fila in buried_fullab[buried_fullab.idcode == pdb_idcode].iterrows() ])))

array([ 24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  50,  51,
        52,  53,  54,  55,  56,  89,  90,  91,  92,  93,  94,  95,  96,
        97,  98,  99, 100, 101, 102])

In [443]:
list(range(buried_fullab.iloc[4805].cdr_begin, buried_fullab.iloc[4805].cdr_end+1))

[95, 96, 97, 98, 99, 100, 101, 102]

# intento 4
##### igual q el 3, pero busca reducir el número de clusters, juntando aquellos q tengan carbonos de la misma proteína (Ab o Ag) y sean cercanos entre sí

In [None]:
get_ids_CONS(topologia, buried_fullab, pdb_idcode)

In [9]:
pdb_idcode = '4ydk'
pdb_filename = Path.joinpath(casa_dir, "tempo", pdb_idcode + ".pdb")
trj_in = md.load(pdb_filename)

serial_to_id = {}
for atomo in trj_in.topology.atoms:
    serial_to_id[atomo.serial] = atomo.index

all_epitope_atoms = np.unique(list(itertools.chain(
    *[ fila.epitope_atoms for index, fila in buried_fullab[buried_fullab.idcode == pdb_idcode].iterrows() ])))

all_cdr_atoms = np.unique(list(itertools.chain(
    *[ fila.cdr_atoms for index, fila in buried_fullab[buried_fullab.idcode == pdb_idcode].iterrows() ])))

ids_C_epitope_atoms = []
ids_ONS_epitope_atoms = []
for id in all_epitope_atoms:
    index = serial_to_id[id]
    elemento = trj_in.topology.atom(index).element.symbol
    if elemento == 'C':
        ids_C_epitope_atoms.append(index)
    elif elemento == 'O' or elemento == 'N' or elemento == 'S':
        ids_ONS_epitope_atoms.append(index)

ids_C_cdr_atoms = []
ids_ONS_cdr_atoms = []
for id in all_cdr_atoms:
    index = serial_to_id[id]
    elemento = trj_in.topology.atom(index).element.symbol
    if elemento == 'C':
        ids_C_cdr_atoms.append(index)
    elif elemento == 'O' or elemento == 'N' or elemento == 'S':
        ids_ONS_cdr_atoms.append(index)

ids_ONS_atoms = ids_ONS_cdr_atoms + ids_ONS_epitope_atoms 

C_ONS_pairs = np.array(list(itertools.product(ids_C_cdr_atoms, ids_ONS_atoms)))
C_C_pairs = np.array(list(itertools.product(ids_C_cdr_atoms, ids_C_epitope_atoms)))

C_ONS_distancias = md.compute_distances(trj_in, C_ONS_pairs).reshape((len(ids_C_cdr_atoms),
    len(ids_ONS_atoms)))

C_C_distancias = md.compute_distances(trj_in, C_C_pairs).reshape((len(ids_C_cdr_atoms),
    len(ids_C_epitope_atoms)))

cutoff = 0.5

In [12]:
indices_close_C_C_distancias = np.where(C_C_distancias < cutoff)
mask_close_C_ONS_distancias = C_ONS_distancias < cutoff
G = nx.Graph()
shielding_atoms_serial = {}

for i, j in zip(*indices_close_C_C_distancias):
    C_cdr_id = ids_C_cdr_atoms[i]
    C_epi_id = ids_C_epitope_atoms[j]
    surrounding_ONS_ids = [ids_ONS_atoms[i] for i in np.where(mask_close_C_ONS_distancias[i, :])[0] ]
    
    shielded, ONS_id = is_shielded(trj_in.xyz[0], C_cdr_id, C_epi_id, surrounding_ONS_ids)

    C1 = trj_in.topology.atom(C_cdr_id).serial
    C2 = trj_in.topology.atom(C_epi_id).serial
    ONS = trj_in.topology.atom(ONS_id).serial
    if shielded:
        shielding_atoms_serial[i] = trj_in.topology.atom(ONS_id).serial
    else:
        G.add_edge(C_cdr_id, C_epi_id)

In [13]:
cutoff_clusters = .2
pre_clusteres = []
for cluster in sorted(nx.connected_components(G), key = len, reverse = True):
    pre_clusteres.append(cluster)

In [14]:
def clusters_are_close(cluster_1, cluster_2, cutoff_clusters):
    for carbon in cluster_1:
        close_clusters = np.any(md.compute_distances(trj_in,
            list(itertools.product([carbon], list(cluster_2)))) < cutoff_clusters)
        if close_clusters:
            # return  (cluster_1 + cluster_2)
            return True
    # return cluster_1
    return False

In [16]:
H = nx.Graph()
for i, clu_i in enumerate(pre_clusteres):
    for j in range(i+1, len(pre_clusteres)):
        if clusters_are_close(clu_i, pre_clusteres[j], cutoff_clusters):
            H.add_edge(i, j)
        

In [17]:
clusteres = []

for connected_clusters in sorted(nx.connected_components(H), key = len, reverse = True):
    new_cluster = []
    for i in connected_clusters:
        # print(f'{type(i)}, -- {type(pre_clusteres[i])}')
        new_cluster.extend(pre_clusteres[i])
    print(f'{connected_clusters=}')
    clusteres.append(new_cluster)

connected_clusters={0, 3, 7}
connected_clusters={2, 4}
connected_clusters={8, 5}


In [18]:
with open("tempo/clusters_hydrophobic.py", "w") as fil:
    fil.write(f'from pymol import cmd\n\n')
    fil.write(f'cmd.load("{buried_fullab.iloc[4805].idcode}.pdb")\n')
    fil.write(f'cmd.color("salmon", "chain G")\n')
    fil.write(f'cmd.color("atomic", "(not elem C)")\n\n')

    # Show epitope residues as lines
    epitope_residues = np.unique(list(itertools.chain(
    *[ fila.epitope_residues for index, fila in buried_fullab[buried_fullab.idcode == pdb_idcode].iterrows() ])))
    for x in epitope_residues:
        fil.write(f'cmd.show("lines", "resi ' + str(x) + f' and chain G")\n')

    # Show CDR residues as lines
    cdr_H_residues = np.unique(list(itertools.chain(
        *[ list(range(fila.cdr_begin, fila.cdr_end+1)) for index, fila in buried_fullab[(buried_fullab.idcode == pdb_idcode) & (buried_fullab.chainID == 'H')].iterrows() ])))
    for x in cdr_H_residues:
        fil.write(f'cmd.show("lines", "resi ' + str(x) + f' and chain H")\n')
    # CD3 has several residues numbered '100 + letter'
    cdrH3_extra_residues = [''.join(tuple) for tuple in list(itertools.product(["100"], string.ascii_uppercase[0:12]))]
    for x in cdrH3_extra_residues:
        fil.write(f'cmd.show("lines", "resi ' + str(x) + f' and chain H")\n')
    
    cdr_L_residues = np.unique(list(itertools.chain(
        *[ list(range(fila.cdr_begin, fila.cdr_end+1)) for index, fila in buried_fullab[(buried_fullab.idcode == pdb_idcode) & (buried_fullab.chainID == 'L')].iterrows() ])))
    for x in cdr_L_residues:
        fil.write(f'cmd.show("lines", "resi ' + str(x) + f' and chain L")\n')

    ##
    # 
    ##

    for n, cluster in enumerate(clusteres):
        linea = f''
        cluster_id = 'cluster_' + str(n+1)
        fil.write(f'cmd.select("id ')
        for c in cluster:
            c_serial = trj_in.topology.atom(c).serial
            linea += f'{c_serial}+'
        fil.write(linea[0:-1])
        fil.write(f'")\n')
        fil.write(f'cmd.set_name("sele", "{cluster_id}")\n')
        fil.write(f'cmd.show("spheres", "{cluster_id}")\n')
        color = "%06x" % random.randint(0, 0xFFFFFF)
        fil.write(f'cmd.color("0x{color}", "{cluster_id}")\n')

In [684]:
len(pre_clusteres[8])

2

# intento 5

---

In [128]:
with open("tempo/shielded.py", "w") as fil:
    fil.write(f'from pymol import cmd\n\n')
    fil.write(f'cmd.load("{buried_fullab.iloc[4805].idcode}.pdb")\n')
    fil.write(f'cmd.color("salmon", "chain G")\n')
    fil.write(f'cmd.color("atomic", "(not elem C)")\n\n')
    for x in np.unique(buried_fullab.iloc[4805].epitope_residues):
        fil.write(f'cmd.show("lines", "resi ' + str(x) + f' and chain G")\n')

    for x in range(buried_fullab.iloc[4805].cdr_begin, buried_fullab.iloc[4805].cdr_end+1):
        fil.write(f'cmd.show("lines", "resi ' + str(x) + f' and chain H")\n')

    cdrH3_extra_residues = [''.join(tuple) for tuple in list(itertools.product(["100"], string.ascii_uppercase[0:12]))]
    for x in cdrH3_extra_residues:
        fil.write(f'cmd.show("lines", "resi ' + str(x) + f' and chain H")\n')

with open("tempo/hydro.py", "w") as fil:
    fil.write(f'from pymol import cmd\n\n')
    fil.write(f'cmd.load("{buried_fullab.iloc[4805].idcode}.pdb")\n')
    fil.write(f'cmd.color("salmon", "chain G")\n')
    fil.write(f'cmd.color("atomic", "(not elem C)")\n\n')

    for x in np.unique(buried_fullab.iloc[4805].epitope_residues):
        fil.write(f'cmd.show("lines", "resi ' + str(x) + f' and chain G")\n')

    for x in range(buried_fullab.iloc[4805].cdr_begin, buried_fullab.iloc[4805].cdr_end+1):
        fil.write(f'cmd.show("lines", "resi ' + str(x) + f' and chain H")\n')

    cdrH3_extra_residues = [''.join(tuple) for tuple in list(itertools.product(["100"], string.ascii_uppercase[0:12]))]
    for x in cdrH3_extra_residues:
        fil.write(f'cmd.show("lines", "resi ' + str(x) + f' and chain H")\n')



In [129]:
with open("tempo/shielded.py", "a") as fil:
    for i, j in zip(*np.where(C_C_distancias < cutoff)):
        C_cdr_id = ids_C_cdr_atoms[i]
        C_epi_id = ids_C_epitope_atoms[j]
        surrounding_ONS_ids = [ids_ONS_atoms[i] for i in np.where(C_ONS_distancias[i, :] < cutoff)[0]]
        
        shielded, ONS_id = is_shielded(trj_in.xyz[0], C_cdr_id, C_epi_id, surrounding_ONS_ids)

        C1 = trj_in.topology.atom(C_cdr_id).serial
        C2 = trj_in.topology.atom(C_epi_id).serial
        ONS = trj_in.topology.atom(ONS_id).serial
        if shielded:
            fil.write(f'cmd.show("spheres", "id {C1}+{C2}+{ONS}")\n')

with open("tempo/hydro.py", "a") as fil:
    for i, j in zip(*np.where(C_C_distancias < cutoff)):
        C_cdr_id = ids_C_cdr_atoms[i]
        C_epi_id = ids_C_epitope_atoms[j]
        surrounding_ONS_ids = [ids_ONS_atoms[i] for i in np.where(C_ONS_distancias[i, :] < cutoff)[0]]
        
        shielded, ONS_id = is_shielded(trj_in.xyz[0], C_cdr_id, C_epi_id, surrounding_ONS_ids)

        C1 = trj_in.topology.atom(C_cdr_id).serial
        C2 = trj_in.topology.atom(C_epi_id).serial
        ONS = trj_in.topology.atom(ONS_id).serial
        if not shielded:
            fil.write(f'cmd.show("spheres", "id {C1}+{C2}")\n')

In [1]:
buried_fullab.query(f"idcode == '4ydk'")

NameError: name 'buried_fullab' is not defined