In [1]:
import sys
from pathlib import Path
import itertools
import pickle
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from collections import namedtuple
PiPiPair = namedtuple('PiPiPair', ['antibody', 'antigen'])
PionPair = namedtuple('PionPair', ['ring', 'ion'])
HBondAtom = namedtuple('HBondAtom', ['chainID', 'chain_type',
                       'CDR', 'resSeq', 'index', 'serial', 'element', 'is_sidechain'])
HBond = namedtuple('HBond', ['donor', 'acceptor'])
ShieldingAtom = namedtuple(
    'ShieldingAtom', ['chainID', 'chain_type', 'CDR', 'resSeq', 'index',
                      'serial', 'element', 'is_sidechain'])
from collections import Counter

source_location = Path().resolve()
sys.path.append(source_location)

from scripts.abag_interactions_hydrophobic import *
from scripts.abag_interactions_rings import *
from scripts.more_utils import *

casa_dir = Path("/home/pbarletta/labo/22/AbAgInterface")
str_dir = Path.joinpath(casa_dir, "structures/raw")

In [2]:
with open(Path.joinpath(casa_dir, 'data', 'filenames.pkl'), 'rb') as file:
    filenames = pickle.load(file)

pdb_list = list(filenames.keys())
df_dataset = get_df_dataset(casa_dir)

SabDab protein antigen:
1154 proteins out of 2017, 57.2%
All: 1154
No Hchain: 0
No Lchain: 0
Both chains: 1154
Buried surfaces of 2492 proteins
with both chains: 867


------

## Hydrophobic

In [3]:
with open(Path.joinpath(casa_dir, 'data', 'hydrophobic.pkl'), 'rb') as file:
    df_hydrophobic_atom_indices, df_hydrophobic_atom_serials,\
    df_hydrophobic_resSeq, df_hydrophobic_chain_ID,\
    df_hydrophobic_chain_type, df_hydrophobic_cdr = pickle.load(file)

In [5]:
chain_type_dict = {'H': 0, 'K': 0, 'L': 0}
cdr_dict = {'H1': 0, 'H2': 0, 'H3': 0, 'L1': 0, 'L2': 0, 'L3': 0, 
'K1': 0, 'K2': 0, 'K3': 0, 'H0': 0, 'K0': 0, 'L0': 0}
cdr_dict_big_cluster = {'H1': 0, 'H2': 0, 'H3': 0, 'L1': 0, 'L2': 0, 'L3': 0, 
'K1': 0, 'K2': 0, 'K3': 0, 'H0': 0, 'K0': 0, 'L0': 0}
size_largest_cluster = []
clusters_count = []

for pdb_idcode in pdb_list:
    chain_type_of_each_contact = (df_hydrophobic_chain_type.query(
        f"idcode == '{pdb_idcode}'").chain_type)[0]
    cdr_of_each_contact = (df_hydrophobic_cdr.query(
        f"idcode == '{pdb_idcode}'").CDR)[0]
    if isinstance(chain_type_of_each_contact, float):
        # Testing if is NaN
        print(f"-- BAD: {pdb_idcode} -- ")
        continue
    
    
    nbr_of_clusters = 0
    flag = True
    list_chain_type_cdr = []
    for cluster_cdr, cluster_chain_type in zip(cdr_of_each_contact, chain_type_of_each_contact):
        # Only count clusters involving more than 4 carbons
        nbr_of_clusters += 1 if len(cluster_cdr) > 4 else 0
        for contact_cdr, contact_chain_type in zip(cluster_cdr, cluster_chain_type):
            # Discard antigen carbons
            if contact_chain_type != '':
                chain_type_dict[contact_chain_type] += 1
                
                chain_type_cdr = contact_chain_type + str(contact_cdr)
                cdr_dict[chain_type_cdr] += 1
                
                list_chain_type_cdr.append(chain_type_cdr)
        if flag:
            # Clusters are sorted by size, so the 1st one is the largest.
            for cdr, count in Counter(list_chain_type_cdr).items():
                cdr_dict_big_cluster[cdr] += count

            size_largest_cluster.append(len(cluster_cdr))
            flag = False
    clusters_count.append(nbr_of_clusters)

### origin_of_carbons_in_hydrophobic_interactions

In [6]:
go.Figure(data = go.Pie(labels = ['H1', 'H2', 'H3', 'L1', 'L2', 'L3'], 
    values = [cdr_dict['H1'], cdr_dict['H2'], cdr_dict['H3'],
    cdr_dict['L1']+cdr_dict['K1'], cdr_dict['L2']+cdr_dict['K2'],
    cdr_dict['L3']+cdr_dict['L3']], hole = .4) )

### origin_of_the_carbons_from_the_biggest_hydrophobic_cluster

In [7]:
go.Figure(data = go.Pie(labels = ['H1', 'H2', 'H3', 'L1', 'L2', 'L3'], 
    values = [cdr_dict_big_cluster['H1'], cdr_dict_big_cluster['H2'],
    cdr_dict_big_cluster['H3'], cdr_dict_big_cluster['L1']+cdr_dict_big_cluster['K1'],
    cdr_dict_big_cluster['L2']+cdr_dict_big_cluster['L2'],
    cdr_dict_big_cluster['L3']+cdr_dict_big_cluster['K3']], hole = .4))

### number_of_clusters_per_PDB

In [10]:
figu = px.histogram(pd.DataFrame({'count':clusters_count}),
    histnorm = 'probability', x='count', labels={'count': "# of clusters"})
figu.update_xaxes(tick0 = 1, dtick=1)

### size_of_the_largest_cluster_of_each_PDB

In [11]:
figu = px.histogram(pd.DataFrame({'size':size_largest_cluster}), nbins = 20,
    histnorm = 'probability', x='size', labels={'size': "# of carbons on largest cluster"})
figu.update_xaxes(tick0 = 5, dtick=10)

---

## Ring

In [47]:
# There're some Pi-Pi interactions where one of the rings
# doesn't belong to a CDR. This doesn't happen with the hydrophobic interactions,
# since for those I just look at the CDR atoms.

with open(Path.joinpath(casa_dir, 'data', 'PiPi.pkl'), 'rb') as file:
    df_PiPi_atom_indices, df_PiPi_atom_serials,\
    df_PiPi_resSeq, df_PiPi_name, df_PiPi_chain_ID,\
    df_PiPi_chain_type, df_PiPi_cdr = pickle.load(file)

# There're some PiAnion and PiCation interactions where one ion/ring 
# doesn't belong to a CDR. This doesn't happen with the hydrophobic interactions,
# since for those I just look at the CDR atoms.

with open(Path.joinpath(casa_dir, 'data', 'PiAnion.pkl'), 'rb') as file:
    df_PiAnion_atom_indices, df_PiAnion_atom_serials,\
    df_PiAnion_resSeq, df_PiAnion_name, df_PiAnion_chain_ID,\
    df_PiAnion_chain_type, df_PiAnion_cdr = pickle.load(file)

with open(Path.joinpath(casa_dir, 'data', 'PiCation.pkl'), 'rb') as file:
    df_PiCation_atom_indices, df_PiCation_atom_serials,\
    df_PiCation_resSeq, df_PiCation_name, df_PiCation_chain_ID,\
    df_PiCation_chain_type, df_PiCation_cdr = pickle.load(file)    

In [22]:
chain_type_pipi_dict = {'H': 0, 'K': 0, 'L': 0}
cdr_pipi_dict = {'H1': 0, 'H2': 0, 'H3': 0, 'L1': 0, 'L2': 0, 'L3': 0, 
'K1': 0, 'K2': 0, 'K3': 0, 'H0': 0, 'K0': 0, 'L0': 0}

for pdb_idcode in pdb_list:
    chain_type_PiPi_of_each_contact = (df_PiPi_chain_type.query(
        f"idcode == '{pdb_idcode}'").chain_type)[0]
    if not chain_type_PiPi_of_each_contact:
        # No PiPi interactions
        continue

    cdr_of_each_pair = (df_PiPi_cdr.query(
        f"idcode == '{pdb_idcode}'").CDR)[0]
    if isinstance(chain_type_PiPi_of_each_contact, float):
        # Testing if is NaN
        print(f"-- BAD: {pdb_idcode} -- ")
        continue
    
    list_chain_type_PiPi_cdr = []
    for ring_cdr, ring_chain_type in zip(cdr_of_each_pair, chain_type_PiPi_of_each_contact):
        if ring_chain_type.antibody == '':
            chain_type_pipi_dict[ring_chain_type.antigen] += 1
            
            chain_type_PiPi_cdr = ring_chain_type.antigen + str(ring_cdr.antigen)
            cdr_pipi_dict[chain_type_PiPi_cdr] += 1
            
            list_chain_type_PiPi_cdr.append(chain_type_PiPi_cdr)
        else:
            chain_type_pipi_dict[ring_chain_type.antibody] += 1
            
            chain_type_PiPi_cdr = ring_chain_type.antibody + str(ring_cdr.antibody)
            cdr_pipi_dict[chain_type_PiPi_cdr] += 1
            
            list_chain_type_PiPi_cdr.append(chain_type_PiPi_cdr)

In [23]:
chain_type_anion_dict = {'H': 0, 'K': 0, 'L': 0}
cdr_anion_dict = {'H1': 0, 'H2': 0, 'H3': 0, 'L1': 0, 'L2': 0, 'L3': 0, 
'K1': 0, 'K2': 0, 'K3': 0, 'H0': 0, 'K0': 0, 'L0': 0}

for pdb_idcode in pdb_list:
    chain_type_of_each_contact = (df_PiAnion_chain_type.query(
        f"idcode == '{pdb_idcode}'").chain_type)[0]
    if not chain_type_of_each_contact:
        # No PiCation interactions
        continue
    
    cdr_of_each_contact = (df_PiAnion_cdr.query(
        f"idcode == '{pdb_idcode}'").CDR)[0]
    if isinstance(chain_type_of_each_contact, float):
        # Testing if is NaN
        print(f"-- BAD: {pdb_idcode} -- ")
        continue

    list_chain_type_cdr = []
    for ringion_cdr, ringion_chain_type in zip(cdr_of_each_contact,chain_type_of_each_contact):
        if ringion_chain_type.ring == '':
            chain_type_anion_dict[ringion_chain_type.ion] += 1
            
            chain_type_cdr = ringion_chain_type.ion + str(ringion_cdr.ion)
            cdr_anion_dict[chain_type_cdr] += 1
            
            list_chain_type_cdr.append(chain_type_cdr)
        else:
            chain_type_anion_dict[ringion_chain_type.ring] += 1
            
            chain_type_cdr = ringion_chain_type.ring + str(ringion_cdr.ring)
            cdr_anion_dict[chain_type_cdr] += 1
            
            list_chain_type_cdr.append(chain_type_cdr)

In [24]:
chain_type_cation_dict = {'H': 0, 'K': 0, 'L': 0}
cdr_cation_dict = {'H1': 0, 'H2': 0, 'H3': 0, 'L1': 0, 'L2': 0, 'L3': 0, 
'K1': 0, 'K2': 0, 'K3': 0, 'H0': 0, 'K0': 0, 'L0': 0}
most_pion = []

for pdb_idcode in pdb_list:
    chain_type_of_each_contact = (df_PiCation_chain_type.query(
        f"idcode == '{pdb_idcode}'").chain_type)[0]
    if not chain_type_of_each_contact:
        # No PiCation interactions
        continue
    
    cdr_of_each_contact = (df_PiCation_cdr.query(
        f"idcode == '{pdb_idcode}'").CDR)[0]
    if isinstance(chain_type_of_each_contact, float):
        # Testing if is NaN
        print(f"-- BAD: {pdb_idcode} -- ")
        continue
    

    list_chain_type_cdr = []
    for ringion_cdr, ringion_chain_type in zip(cdr_of_each_contact,chain_type_of_each_contact):
        if ringion_chain_type.ring == '':
            chain_type_cation_dict[ringion_chain_type.ion] += 1
            
            chain_type_cdr = ringion_chain_type.ion + str(ringion_cdr.ion)
            cdr_cation_dict[chain_type_cdr] += 1
            
            list_chain_type_cdr.append(chain_type_cdr)
        else:
            chain_type_cation_dict[ringion_chain_type.ring] += 1
            
            chain_type_cdr = ringion_chain_type.ring + str(ringion_cdr.ring)
            cdr_cation_dict[chain_type_cdr] += 1
            
            list_chain_type_cdr.append(chain_type_cdr)

In [25]:
all_ring_interactions = \
[ cdr_pipi_dict['H0'], cdr_pipi_dict['H1'], cdr_pipi_dict['H2'], cdr_pipi_dict['H3'],
cdr_pipi_dict['L0']+cdr_pipi_dict['K0'], cdr_pipi_dict['L1']+cdr_pipi_dict['K1'],
cdr_pipi_dict['L2']+cdr_pipi_dict['K2'], cdr_pipi_dict['L3']+cdr_pipi_dict['K3'],

cdr_anion_dict['H0'], cdr_anion_dict['H1'], cdr_anion_dict['H2'], cdr_anion_dict['H3'],
cdr_anion_dict['L0']+cdr_anion_dict['K0'], cdr_anion_dict['L1']+cdr_anion_dict['K1'],
cdr_anion_dict['L2']+cdr_anion_dict['K2'], cdr_anion_dict['L3']+cdr_anion_dict['K3'],

cdr_cation_dict['H0'], cdr_cation_dict['H1'], cdr_cation_dict['H2'], cdr_cation_dict['H3'],
cdr_cation_dict['L0']+cdr_cation_dict['K0'], cdr_cation_dict['L1']+cdr_cation_dict['K1'],
cdr_cation_dict['L2']+cdr_cation_dict['K2'], cdr_cation_dict['L3']+cdr_cation_dict['K3'] ]

all_ring_group_interactions = list(itertools.repeat("Pi stacking", 8)) +\
    list(itertools.repeat("Pi anion", 8)) + list(itertools.repeat("Pi cation", 8))

all_ring_cdrs = list(itertools.chain.from_iterable(itertools.repeat(
    ['H0', 'H1', 'H2', 'H3', 'L0', 'L1', 'L2', 'L3'], 3)))

df_ring = pd.DataFrame({"y": all_ring_interactions, "x":all_ring_cdrs,
    "color":all_ring_group_interactions})

### origin_of_rings_in_ring_interactions

In [28]:
fig_ring = px.histogram(df_ring, x="x", y="y", 
             color='color', barmode='group',
             height=400)
fig_ring.update_xaxes(title = "CDR")
fig_ring.update_yaxes(title = "Count", range=[0, 350])

-----

### TYR

In [29]:
chain_type_pipi_tyr_dict = {'H': 0, 'K': 0, 'L': 0}
cdr_pipi_tyr_dict = {'H1': 0, 'H2': 0, 'H3': 0, 'L1': 0, 'L2': 0, 'L3': 0, 
'K1': 0, 'K2': 0, 'K3': 0, 'H0': 0, 'K0': 0, 'L0': 0}

for pdb_idcode in pdb_list:
    chain_type_PiPi_of_each_contact = (df_PiPi_chain_type.query(
        f"idcode == '{pdb_idcode}'").chain_type)[0]
    if not chain_type_PiPi_of_each_contact:
        # No PiPi interactions
        continue

    cdr_of_each_pair = (df_PiPi_cdr.query(
        f"idcode == '{pdb_idcode}'").CDR)[0]
    if isinstance(chain_type_PiPi_of_each_contact, float):
        # Testing if is NaN
        print(f"-- BAD: {pdb_idcode} -- ")
        continue

    resname_PiPi_of_each_contact = (df_PiPi_name.query(
        f"idcode == '{pdb_idcode}'").resname)[0]
    
    list_chain_type_PiPi_cdr = []
    for ring_cdr, ring_chain_type, ring_resname in \
        zip(cdr_of_each_pair, chain_type_PiPi_of_each_contact, resname_PiPi_of_each_contact):
        if ring_resname.antibody != 'TYR' and ring_resname.antigen != 'TYR':
            continue
        if ring_chain_type.antibody == '':
            chain_type_pipi_tyr_dict[ring_chain_type.antigen] += 1
            
            chain_type_PiPi_cdr = ring_chain_type.antigen + str(ring_cdr.antigen)
            cdr_pipi_tyr_dict[chain_type_PiPi_cdr] += 1
            
            list_chain_type_PiPi_cdr.append(chain_type_PiPi_cdr)
        else:
            chain_type_pipi_tyr_dict[ring_chain_type.antibody] += 1
            
            chain_type_PiPi_cdr = ring_chain_type.antibody + str(ring_cdr.antibody)
            cdr_pipi_tyr_dict[chain_type_PiPi_cdr] += 1
            
            list_chain_type_PiPi_cdr.append(chain_type_PiPi_cdr)

In [30]:
chain_type_anion_tyr_dict = {'H': 0, 'K': 0, 'L': 0}
cdr_anion_tyr_dict = {'H1': 0, 'H2': 0, 'H3': 0, 'L1': 0, 'L2': 0, 'L3': 0, 
'K1': 0, 'K2': 0, 'K3': 0, 'H0': 0, 'K0': 0, 'L0': 0}

for pdb_idcode in pdb_list:
    chain_type_of_each_contact = (df_PiAnion_chain_type.query(
        f"idcode == '{pdb_idcode}'").chain_type)[0]
    if not chain_type_of_each_contact:
        # No PiCation interactions
        continue
    
    cdr_of_each_contact = (df_PiAnion_cdr.query(
        f"idcode == '{pdb_idcode}'").CDR)[0]
    if isinstance(chain_type_of_each_contact, float):
        # Testing if is NaN
        print(f"-- BAD: {pdb_idcode} -- ")
        continue

    resname_of_each_contact = (df_PiAnion_name.query(
        f"idcode == '{pdb_idcode}'").resname)[0]

    list_chain_type_cdr = []
    for ringion_cdr, ringion_chain_type, ringion_resname in \
        zip(cdr_of_each_contact, chain_type_of_each_contact, resname_of_each_contact):
        if ringion_resname.ring != 'TYR':
            continue
        if ringion_chain_type.ring == '':
            chain_type_anion_tyr_dict[ringion_chain_type.ion] += 1
            
            chain_type_cdr = ringion_chain_type.ion + str(ringion_cdr.ion)
            cdr_anion_tyr_dict[chain_type_cdr] += 1
            
            list_chain_type_cdr.append(chain_type_cdr)
        else:
            chain_type_anion_tyr_dict[ringion_chain_type.ring] += 1
            
            chain_type_cdr = ringion_chain_type.ring + str(ringion_cdr.ring)
            cdr_anion_tyr_dict[chain_type_cdr] += 1
            
            list_chain_type_cdr.append(chain_type_cdr)

In [31]:
chain_type_cation_tyr_dict = {'H': 0, 'K': 0, 'L': 0}
cdr_cation_tyr_dict = {'H1': 0, 'H2': 0, 'H3': 0, 'L1': 0, 'L2': 0, 'L3': 0, 
'K1': 0, 'K2': 0, 'K3': 0, 'H0': 0, 'K0': 0, 'L0': 0}
most_pion = []

for pdb_idcode in pdb_list:
    chain_type_of_each_contact = (df_PiCation_chain_type.query(
        f"idcode == '{pdb_idcode}'").chain_type)[0]
    if not chain_type_of_each_contact:
        # No PiCation interactions
        continue
    
    cdr_of_each_contact = (df_PiCation_cdr.query(
        f"idcode == '{pdb_idcode}'").CDR)[0]
    if isinstance(chain_type_of_each_contact, float):
        # Testing if is NaN
        print(f"-- BAD: {pdb_idcode} -- ")
        continue
    
    resname_of_each_contact = (df_PiCation_name.query(
        f"idcode == '{pdb_idcode}'").resname)[0]


    list_chain_type_cdr = []
    for ringion_cdr, ringion_chain_type, ringion_resname in \
        zip(cdr_of_each_contact, chain_type_of_each_contact, resname_of_each_contact):
        if ringion_resname.ring != 'TYR':
            continue
        if ringion_chain_type.ring == '':
            chain_type_cation_tyr_dict[ringion_chain_type.ion] += 1
            
            chain_type_cdr = ringion_chain_type.ion + str(ringion_cdr.ion)
            cdr_cation_tyr_dict[chain_type_cdr] += 1
            
            list_chain_type_cdr.append(chain_type_cdr)
        else:
            chain_type_cation_tyr_dict[ringion_chain_type.ring] += 1
            
            chain_type_cdr = ringion_chain_type.ring + str(ringion_cdr.ring)
            cdr_cation_tyr_dict[chain_type_cdr] += 1
            
            list_chain_type_cdr.append(chain_type_cdr)

In [32]:
all_ring_interactions = \
[ cdr_pipi_tyr_dict['H0'], cdr_pipi_tyr_dict['H1'], cdr_pipi_tyr_dict['H2'], cdr_pipi_tyr_dict['H3'],
cdr_pipi_tyr_dict['L0']+cdr_pipi_tyr_dict['K0'], cdr_pipi_tyr_dict['L1']+cdr_pipi_tyr_dict['K1'],
cdr_pipi_tyr_dict['L2']+cdr_pipi_tyr_dict['K2'], cdr_pipi_tyr_dict['L3']+cdr_pipi_tyr_dict['K3'],

cdr_anion_tyr_dict['H0'], cdr_anion_tyr_dict['H1'], cdr_anion_tyr_dict['H2'], cdr_anion_tyr_dict['H3'],
cdr_anion_tyr_dict['L0']+cdr_anion_tyr_dict['K0'], cdr_anion_tyr_dict['L1']+cdr_anion_tyr_dict['K1'],
cdr_anion_tyr_dict['L2']+cdr_anion_tyr_dict['K2'], cdr_anion_tyr_dict['L3']+cdr_anion_tyr_dict['K3'],

cdr_cation_tyr_dict['H0'], cdr_cation_tyr_dict['H1'], cdr_cation_tyr_dict['H2'], cdr_cation_tyr_dict['H3'],
cdr_cation_tyr_dict['L0']+cdr_cation_tyr_dict['K0'], cdr_cation_tyr_dict['L1']+cdr_cation_tyr_dict['K1'],
cdr_cation_tyr_dict['L2']+cdr_cation_tyr_dict['K2'], cdr_cation_tyr_dict['L3']+cdr_cation_tyr_dict['K3'] ]

all_ring_group_interactions = list(itertools.repeat("Pi_tyr stacking", 8)) +\
    list(itertools.repeat("Pi_tyr anion", 8)) + list(itertools.repeat("Pi_tyr cation", 8))

all_ring_cdrs = list(itertools.chain.from_iterable(itertools.repeat(
    ['H0', 'H1', 'H2', 'H3', 'L0', 'L1', 'L2', 'L3'], 3)))

df_tyr = pd.DataFrame({"y": all_ring_interactions, "x":all_ring_cdrs,
    "color":all_ring_group_interactions})

### origin_of_tyrs_in_ring_interactions

In [33]:
fig_ring = px.histogram(df_tyr, x="x", y="y",
             color='color', barmode='group',
             height=400)
fig_ring.update_xaxes(title = "CDR")
fig_ring.update_yaxes(title = "Count", range=[0, 350])

## TRP

In [34]:
chain_type_pipi_trp_dict = {'H': 0, 'K': 0, 'L': 0}
cdr_pipi_trp_dict = {'H1': 0, 'H2': 0, 'H3': 0, 'L1': 0, 'L2': 0, 'L3': 0, 
'K1': 0, 'K2': 0, 'K3': 0, 'H0': 0, 'K0': 0, 'L0': 0}

for pdb_idcode in pdb_list:
    chain_type_PiPi_of_each_contact = (df_PiPi_chain_type.query(
        f"idcode == '{pdb_idcode}'").chain_type)[0]
    if not chain_type_PiPi_of_each_contact:
        # No PiPi interactions
        continue

    cdr_of_each_pair = (df_PiPi_cdr.query(
        f"idcode == '{pdb_idcode}'").CDR)[0]
    if isinstance(chain_type_PiPi_of_each_contact, float):
        # Testing if is NaN
        print(f"-- BAD: {pdb_idcode} -- ")
        continue

    resname_PiPi_of_each_contact = (df_PiPi_name.query(
        f"idcode == '{pdb_idcode}'").resname)[0]
    
    list_chain_type_PiPi_cdr = []
    for ring_cdr, ring_chain_type, ring_resname in \
        zip(cdr_of_each_pair, chain_type_PiPi_of_each_contact, resname_PiPi_of_each_contact):
        if ring_resname.antibody != 'TRP' and ring_resname.antigen != 'TRP':
            continue
        if ring_chain_type.antibody == '':
            chain_type_pipi_trp_dict[ring_chain_type.antigen] += 1
            
            chain_type_PiPi_cdr = ring_chain_type.antigen + str(ring_cdr.antigen)
            cdr_pipi_trp_dict[chain_type_PiPi_cdr] += 1
            
            list_chain_type_PiPi_cdr.append(chain_type_PiPi_cdr)
        else:
            chain_type_pipi_trp_dict[ring_chain_type.antibody] += 1
            
            chain_type_PiPi_cdr = ring_chain_type.antibody + str(ring_cdr.antibody)
            cdr_pipi_trp_dict[chain_type_PiPi_cdr] += 1
            
            list_chain_type_PiPi_cdr.append(chain_type_PiPi_cdr)

In [35]:
chain_type_anion_trp_dict = {'H': 0, 'K': 0, 'L': 0}
cdr_anion_trp_dict = {'H1': 0, 'H2': 0, 'H3': 0, 'L1': 0, 'L2': 0, 'L3': 0, 
'K1': 0, 'K2': 0, 'K3': 0, 'H0': 0, 'K0': 0, 'L0': 0}

for pdb_idcode in pdb_list:
    chain_type_of_each_contact = (df_PiAnion_chain_type.query(
        f"idcode == '{pdb_idcode}'").chain_type)[0]
    if not chain_type_of_each_contact:
        # No PiCation interactions
        continue
    
    cdr_of_each_contact = (df_PiAnion_cdr.query(
        f"idcode == '{pdb_idcode}'").CDR)[0]
    if isinstance(chain_type_of_each_contact, float):
        # Testing if is NaN
        print(f"-- BAD: {pdb_idcode} -- ")
        continue

    resname_of_each_contact = (df_PiAnion_name.query(
        f"idcode == '{pdb_idcode}'").resname)[0]

    list_chain_type_cdr = []
    for ringion_cdr, ringion_chain_type, ringion_resname in \
        zip(cdr_of_each_contact, chain_type_of_each_contact, resname_of_each_contact):
        if ringion_resname.ring != 'TRP':
            continue
        if ringion_chain_type.ring == '':
            chain_type_anion_trp_dict[ringion_chain_type.ion] += 1
            
            chain_type_cdr = ringion_chain_type.ion + str(ringion_cdr.ion)
            cdr_anion_trp_dict[chain_type_cdr] += 1
            
            list_chain_type_cdr.append(chain_type_cdr)
        else:
            chain_type_anion_trp_dict[ringion_chain_type.ring] += 1
            
            chain_type_cdr = ringion_chain_type.ring + str(ringion_cdr.ring)
            cdr_anion_trp_dict[chain_type_cdr] += 1
            
            list_chain_type_cdr.append(chain_type_cdr)

In [36]:
chain_type_cation_trp_dict = {'H': 0, 'K': 0, 'L': 0}
cdr_cation_trp_dict = {'H1': 0, 'H2': 0, 'H3': 0, 'L1': 0, 'L2': 0, 'L3': 0, 
'K1': 0, 'K2': 0, 'K3': 0, 'H0': 0, 'K0': 0, 'L0': 0}
most_pion = []

for pdb_idcode in pdb_list:
    chain_type_of_each_contact = (df_PiCation_chain_type.query(
        f"idcode == '{pdb_idcode}'").chain_type)[0]
    if not chain_type_of_each_contact:
        # No PiCation interactions
        continue
    
    cdr_of_each_contact = (df_PiCation_cdr.query(
        f"idcode == '{pdb_idcode}'").CDR)[0]
    if isinstance(chain_type_of_each_contact, float):
        # Testing if is NaN
        print(f"-- BAD: {pdb_idcode} -- ")
        continue
    
    resname_of_each_contact = (df_PiCation_name.query(
        f"idcode == '{pdb_idcode}'").resname)[0]


    list_chain_type_cdr = []
    for ringion_cdr, ringion_chain_type, ringion_resname in \
        zip(cdr_of_each_contact, chain_type_of_each_contact, resname_of_each_contact):
        if ringion_resname.ring != 'TRP':
            continue
        if ringion_chain_type.ring == '':
            chain_type_cation_trp_dict[ringion_chain_type.ion] += 1
            
            chain_type_cdr = ringion_chain_type.ion + str(ringion_cdr.ion)
            cdr_cation_trp_dict[chain_type_cdr] += 1
            
            list_chain_type_cdr.append(chain_type_cdr)
        else:
            chain_type_cation_trp_dict[ringion_chain_type.ring] += 1
            
            chain_type_cdr = ringion_chain_type.ring + str(ringion_cdr.ring)
            cdr_cation_trp_dict[chain_type_cdr] += 1
            
            list_chain_type_cdr.append(chain_type_cdr)

In [37]:
all_ring_interactions = \
[ cdr_pipi_trp_dict['H0'], cdr_pipi_trp_dict['H1'], cdr_pipi_trp_dict['H2'], cdr_pipi_trp_dict['H3'],
cdr_pipi_trp_dict['L0']+cdr_pipi_trp_dict['K0'], cdr_pipi_trp_dict['L1']+cdr_pipi_trp_dict['K1'],
cdr_pipi_trp_dict['L2']+cdr_pipi_trp_dict['K2'], cdr_pipi_trp_dict['L3']+cdr_pipi_trp_dict['K3'],

cdr_anion_trp_dict['H0'], cdr_anion_trp_dict['H1'], cdr_anion_trp_dict['H2'], cdr_anion_trp_dict['H3'],
cdr_anion_trp_dict['L0']+cdr_anion_trp_dict['K0'], cdr_anion_trp_dict['L1']+cdr_anion_trp_dict['K1'],
cdr_anion_trp_dict['L2']+cdr_anion_trp_dict['K2'], cdr_anion_trp_dict['L3']+cdr_anion_trp_dict['K3'],

cdr_cation_trp_dict['H0'], cdr_cation_trp_dict['H1'], cdr_cation_trp_dict['H2'], cdr_cation_trp_dict['H3'],
cdr_cation_trp_dict['L0']+cdr_cation_trp_dict['K0'], cdr_cation_trp_dict['L1']+cdr_cation_trp_dict['K1'],
cdr_cation_trp_dict['L2']+cdr_cation_trp_dict['K2'], cdr_cation_trp_dict['L3']+cdr_cation_trp_dict['K3'] ]

all_ring_group_interactions = list(itertools.repeat("Pi_trp stacking", 8)) +\
    list(itertools.repeat("Pi_trp anion", 8)) + list(itertools.repeat("Pi_trp cation", 8))

all_ring_cdrs = list(itertools.chain.from_iterable(itertools.repeat(
    ['H0', 'H1', 'H2', 'H3', 'L0', 'L1', 'L2', 'L3'], 3)))

df_trp = pd.DataFrame({"y": all_ring_interactions, "x":all_ring_cdrs,
    "color":all_ring_group_interactions})

### origin_of_trps_in_ring_interactions

In [38]:
fig_ring = px.histogram(df_trp, x="x", y="y",
             color='color', barmode='group',
             height=400)
fig_ring.update_xaxes(title = "CDR")
fig_ring.update_yaxes(title = "Count", range=[0, 350])

### PHE

In [39]:
chain_type_pipi_phe_dict = {'H': 0, 'K': 0, 'L': 0}
cdr_pipi_phe_dict = {'H1': 0, 'H2': 0, 'H3': 0, 'L1': 0, 'L2': 0, 'L3': 0, 
'K1': 0, 'K2': 0, 'K3': 0, 'H0': 0, 'K0': 0, 'L0': 0}

for pdb_idcode in pdb_list:
    chain_type_PiPi_of_each_contact = (df_PiPi_chain_type.query(
        f"idcode == '{pdb_idcode}'").chain_type)[0]
    if not chain_type_PiPi_of_each_contact:
        # No PiPi interactions
        continue

    cdr_of_each_pair = (df_PiPi_cdr.query(
        f"idcode == '{pdb_idcode}'").CDR)[0]
    if isinstance(chain_type_PiPi_of_each_contact, float):
        # Testing if is NaN
        print(f"-- BAD: {pdb_idcode} -- ")
        continue

    resname_PiPi_of_each_contact = (df_PiPi_name.query(
        f"idcode == '{pdb_idcode}'").resname)[0]
    
    list_chain_type_PiPi_cdr = []
    for ring_cdr, ring_chain_type, ring_resname in \
        zip(cdr_of_each_pair, chain_type_PiPi_of_each_contact, resname_PiPi_of_each_contact):
        if ring_resname.antibody != 'PHE' and ring_resname.antigen != 'PHE':
            continue
        if ring_chain_type.antibody == '':
            chain_type_pipi_phe_dict[ring_chain_type.antigen] += 1
            
            chain_type_PiPi_cdr = ring_chain_type.antigen + str(ring_cdr.antigen)
            cdr_pipi_phe_dict[chain_type_PiPi_cdr] += 1
            
            list_chain_type_PiPi_cdr.append(chain_type_PiPi_cdr)
        else:
            chain_type_pipi_phe_dict[ring_chain_type.antibody] += 1
            
            chain_type_PiPi_cdr = ring_chain_type.antibody + str(ring_cdr.antibody)
            cdr_pipi_phe_dict[chain_type_PiPi_cdr] += 1
            
            list_chain_type_PiPi_cdr.append(chain_type_PiPi_cdr)

In [40]:
chain_type_anion_phe_dict = {'H': 0, 'K': 0, 'L': 0}
cdr_anion_phe_dict = {'H1': 0, 'H2': 0, 'H3': 0, 'L1': 0, 'L2': 0, 'L3': 0, 
'K1': 0, 'K2': 0, 'K3': 0, 'H0': 0, 'K0': 0, 'L0': 0}

for pdb_idcode in pdb_list:
    chain_type_of_each_contact = (df_PiAnion_chain_type.query(
        f"idcode == '{pdb_idcode}'").chain_type)[0]
    if not chain_type_of_each_contact:
        # No PiCation interactions
        continue
    
    cdr_of_each_contact = (df_PiAnion_cdr.query(
        f"idcode == '{pdb_idcode}'").CDR)[0]
    if isinstance(chain_type_of_each_contact, float):
        # Testing if is NaN
        print(f"-- BAD: {pdb_idcode} -- ")
        continue

    resname_of_each_contact = (df_PiAnion_name.query(
        f"idcode == '{pdb_idcode}'").resname)[0]

    list_chain_type_cdr = []
    for ringion_cdr, ringion_chain_type, ringion_resname in \
        zip(cdr_of_each_contact, chain_type_of_each_contact, resname_of_each_contact):
        if ringion_resname.ring != 'TYR':
            continue
        if ringion_chain_type.ring == '':
            chain_type_anion_phe_dict[ringion_chain_type.ion] += 1
            
            chain_type_cdr = ringion_chain_type.ion + str(ringion_cdr.ion)
            cdr_anion_phe_dict[chain_type_cdr] += 1
            
            list_chain_type_cdr.append(chain_type_cdr)
        else:
            chain_type_anion_phe_dict[ringion_chain_type.ring] += 1
            
            chain_type_cdr = ringion_chain_type.ring + str(ringion_cdr.ring)
            cdr_anion_phe_dict[chain_type_cdr] += 1
            
            list_chain_type_cdr.append(chain_type_cdr)

In [44]:
chain_type_cation_phe_dict = {'H': 0, 'K': 0, 'L': 0}
cdr_cation_phe_dict = {'H1': 0, 'H2': 0, 'H3': 0, 'L1': 0, 'L2': 0, 'L3': 0, 
'K1': 0, 'K2': 0, 'K3': 0, 'H0': 0, 'K0': 0, 'L0': 0}
most_pion = []

for pdb_idcode in pdb_list:
    chain_type_of_each_contact = (df_PiCation_chain_type.query(
        f"idcode == '{pdb_idcode}'").chain_type)[0]
    if not chain_type_of_each_contact:
        # No PiCation interactions
        continue
    
    cdr_of_each_contact = (df_PiCation_cdr.query(
        f"idcode == '{pdb_idcode}'").CDR)[0]
    if isinstance(chain_type_of_each_contact, float):
        # Testing if is NaN
        print(f"-- BAD: {pdb_idcode} -- ")
        continue
    
    resname_of_each_contact = (df_PiCation_name.query(
        f"idcode == '{pdb_idcode}'").resname)[0]


    list_chain_type_cdr = []
    for ringion_cdr, ringion_chain_type, ringion_resname in \
        zip(cdr_of_each_contact, chain_type_of_each_contact, resname_of_each_contact):
        if ringion_resname.ring != 'PHE':
            continue
        if ringion_chain_type.ring == '':
            chain_type_cation_phe_dict[ringion_chain_type.ion] += 1
            
            chain_type_cdr = ringion_chain_type.ion + str(ringion_cdr.ion)
            cdr_cation_phe_dict[chain_type_cdr] += 1
            
            list_chain_type_cdr.append(chain_type_cdr)
        else:
            chain_type_cation_phe_dict[ringion_chain_type.ring] += 1
            
            chain_type_cdr = ringion_chain_type.ring + str(ringion_cdr.ring)
            cdr_cation_phe_dict[chain_type_cdr] += 1
            
            list_chain_type_cdr.append(chain_type_cdr)

In [45]:
all_ring_interactions = \
[ cdr_pipi_phe_dict['H0'], cdr_pipi_phe_dict['H1'], cdr_pipi_phe_dict['H2'], cdr_pipi_phe_dict['H3'],
cdr_pipi_phe_dict['L0']+cdr_pipi_phe_dict['K0'], cdr_pipi_phe_dict['L1']+cdr_pipi_phe_dict['K1'],
cdr_pipi_phe_dict['L2']+cdr_pipi_phe_dict['K2'], cdr_pipi_phe_dict['L3']+cdr_pipi_phe_dict['K3'],

cdr_anion_phe_dict['H0'], cdr_anion_phe_dict['H1'], cdr_anion_phe_dict['H2'], cdr_anion_phe_dict['H3'],
cdr_anion_phe_dict['L0']+cdr_anion_phe_dict['K0'], cdr_anion_phe_dict['L1']+cdr_anion_phe_dict['K1'],
cdr_anion_phe_dict['L2']+cdr_anion_phe_dict['K2'], cdr_anion_phe_dict['L3']+cdr_anion_phe_dict['K3'],

cdr_cation_phe_dict['H0'], cdr_cation_phe_dict['H1'], cdr_cation_phe_dict['H2'], cdr_cation_phe_dict['H3'],
cdr_cation_phe_dict['L0']+cdr_cation_phe_dict['K0'], cdr_cation_phe_dict['L1']+cdr_cation_phe_dict['K1'],
cdr_cation_phe_dict['L2']+cdr_cation_phe_dict['K2'], cdr_cation_phe_dict['L3']+cdr_cation_phe_dict['K3'] ]

all_ring_group_interactions = list(itertools.repeat("Pi_phe stacking", 8)) +\
    list(itertools.repeat("Pi_phe anion", 8)) + list(itertools.repeat("Pi_phe cation", 8))

all_ring_cdrs = list(itertools.chain.from_iterable(itertools.repeat(
    ['H0', 'H1', 'H2', 'H3', 'L0', 'L1', 'L2', 'L3'], 3)))

df_phe = pd.DataFrame({"y": all_ring_interactions, "x":all_ring_cdrs,
    "color":all_ring_group_interactions})

### origin_of_phes_in_ring_interactions

In [46]:
fig_ring = px.histogram(df_phe, x="x", y="y",
             color='color', barmode='group',
             height=400)
fig_ring.update_xaxes(title = "CDR")
fig_ring.update_yaxes(title = "Count", range=[0, 350])

---

### Hydrogen bonding

In [37]:
with open(Path.joinpath(casa_dir, 'data', 'hbonds_90_39.pkl'), 'rb') as file:
        hbonds_90_39 = pickle.load(file)

In [38]:
hbond = hbonds_90_39
chain_type_hbond_dict = {'H': 0, 'K': 0, 'L': 0}
cdr_hbond_dict = {'H1': 0, 'H2': 0, 'H3': 0, 'L1': 0, 'L2': 0, 'L3': 0, 
'K1': 0, 'K2': 0, 'K3': 0, 'H0': 0, 'K0': 0, 'L0': 0}

for pdb_idcode in pdb_list:
    for id in hbond[pdb_idcode]:
        chain_type_acceptor = hbond[pdb_idcode][id].acceptor.chain_type
        chain_type_donor = hbond[pdb_idcode][id].donor.chain_type        
        if chain_type_donor == -1:
            chain_type_hbond_dict[chain_type_acceptor] += 1
            cdr = chain_type_acceptor + str(hbond[pdb_idcode][id].acceptor.CDR)
            cdr_hbond_dict[cdr] += 1
        else:
            chain_type_hbond_dict[chain_type_donor] += 1
            cdr = chain_type_donor + str(hbond[pdb_idcode][id].donor.CDR)
            cdr_hbond_dict[cdr] += 1

### Origin_of_Hbonds__min_angle_=_90_max_distance_=_39

In [39]:
go.Figure(data = go.Pie(labels = ['H1', 'H2', 'H3', 'L1', 'L2', 'L3'], 
    values = [cdr_hbond_dict['H1'], cdr_hbond_dict['H2'], cdr_hbond_dict['H3'],
    cdr_hbond_dict['L1']+cdr_hbond_dict['K1'],
    cdr_hbond_dict['L2']+cdr_hbond_dict['K2'],
    cdr_hbond_dict['L3']+cdr_hbond_dict['K3']], hole = .4) )

### Shielding

In [43]:
with open(Path.joinpath(casa_dir, 'data', 'shielding.pkl'), 'rb') as file:
    shielding_dict = pickle.load(file)

In [44]:
mol_dict = {'antibody': 0, 'antigen': 0}
chain_type_shield_dict = {'H': 0, 'K': 0, 'L': 0}
cdr_shield_dict = {'H1': 0, 'H2': 0, 'H3': 0, 'L1': 0, 'L2': 0, 'L3': 0, 
'K1': 0, 'K2': 0, 'K3': 0, 'H0': 0, 'K0': 0, 'L0': 0}

for pdb_idcode in pdb_list:
    for id in shielding_dict[pdb_idcode]:
        chain_type = shielding_dict[pdb_idcode][id].chain_type
        if chain_type == -1:
            mol_dict['antigen'] += 1
        else:
            mol_dict['antibody'] += 1
            chain_type_shield_dict[chain_type] += 1
            cdr = chain_type + str(shielding_dict[pdb_idcode][id].CDR)
            cdr_shield_dict[cdr] += 1

### origin_of_shielding_ONs

In [45]:
go.Figure(data = go.Pie(labels = ['H1', 'H2', 'H3', 'L1', 'L2', 'L3'], 
    values = [cdr_shield_dict['H1'], cdr_shield_dict['H2'], cdr_shield_dict['H3'],
    cdr_shield_dict['L1']+cdr_shield_dict['K1'],
    cdr_shield_dict['L2']+cdr_shield_dict['K2'],
    cdr_shield_dict['L3']+cdr_shield_dict['K3']], hole = .4) )

### molecule_of_shielding_ONs

In [46]:
go.Figure(data = go.Pie(labels = ['antibody', 'antigen'], 
    values = [mol_dict['antibody'], mol_dict['antigen']], hole = .4) )

### hydrogen_bonding_and_shielding

In [47]:
# hbond = hbonds_1_32
hbond = hbonds_1_39
# hbond = hbonds_90_39
# hbond = hbonds_120_39
ON_function_dict = {'only_shields': 0, 'shields_and_hbonds': 0}

for pdb_idcode in pdb_list:
    for id in shielding_dict[pdb_idcode]:
        if id in hbond[pdb_idcode]:        
            ON_function_dict['shields_and_hbonds'] += 1
        else:
            ON_function_dict['only_shields'] += 1

In [53]:
# go.Figure(data = go.Pie(labels = ['Only shields', 'Shields and forms an Hbond'], 
#     values = [ON_function_dict['only_shields'],
#     ON_function_dict['shields_and_hbonds']], hole = .4) )

In [49]:
ON_count_dict_SC = {'shields': 0, 'hbonds': 0, 'shields_and_hbonds': 0, 'none': 0}

for pdb_idcode in pdb_list:
    shield_and_hbond = sum([ shielding_dict[pdb_idcode][id].is_sidechain\
        for id in set(shielding_dict[pdb_idcode].keys()).intersection(
        set(hbond[pdb_idcode].keys()))])

    sh_pre = len(
        [ key for key, val in shielding_dict[pdb_idcode].items() if val.is_sidechain ])
    sh_cnt = sh_pre - shield_and_hbond
    
    h_pre = sum([val.donor.is_sidechain + val.acceptor.is_sidechain\
            for key, val in hbond[pdb_idcode].items() ])
    if h_pre % 2:
        h_cnt =h_pre // 2 + 1 - shield_and_hbond
    else:
        h_cnt = h_pre // 2 - shield_and_hbond

    
    tot = shielding_dict[pdb_idcode + "_cnt_ON_cdr_SC"] +\
        shielding_dict[pdb_idcode + "_cnt_ON_epi_SC"]
    
    ON_count_dict_SC['shields'] += sh_cnt / tot
    ON_count_dict_SC['hbonds'] += h_cnt / tot
    ON_count_dict_SC['shields_and_hbonds'] += shield_and_hbond / tot
    ON_count_dict_SC['none'] += (tot - sh_cnt - h_cnt - shield_and_hbond) / tot

### role_of_polar_atoms_SC

In [50]:
go.Figure(data = go.Pie(labels = ['Shielding polar atom', 'Hydrogen bonding polar atom',
    'Shields and forms an Hbond', 'None'], 
    values = [ON_count_dict_SC['shields'], ON_count_dict_SC['hbonds'],
    ON_count_dict_SC['shields_and_hbonds'], ON_count_dict_SC['none']], hole = .4))

In [51]:
ON_count_dict_BB = {'shields': 0, 'hbonds': 0, 'shields_and_hbonds': 0, 'none': 0}

for pdb_idcode in pdb_list:
    shield_and_hbond = sum([ not shielding_dict[pdb_idcode][id].is_sidechain\
        for id in set(shielding_dict[pdb_idcode].keys()).intersection(
        set(hbond[pdb_idcode].keys()))])

    sh_pre = len(
        [ key for key, val in shielding_dict[pdb_idcode].items() if not val.is_sidechain ])
    sh_cnt = sh_pre - shield_and_hbond
    
    h_pre = sum([(not val.donor.is_sidechain) + (not val.acceptor.is_sidechain)\
            for key, val in hbond[pdb_idcode].items() ])
    if h_pre % 2:
        h_cnt =h_pre // 2 + 1 - shield_and_hbond
    else:
        h_cnt = h_pre // 2 - shield_and_hbond

    
    tot = shielding_dict[pdb_idcode + "_cnt_ON_cdr_BB"] +\
        shielding_dict[pdb_idcode + "_cnt_ON_epi_BB"]
    
    ON_count_dict_BB['shields'] += sh_cnt / tot
    ON_count_dict_BB['hbonds'] += h_cnt / tot
    ON_count_dict_BB['shields_and_hbonds'] += shield_and_hbond / tot
    ON_count_dict_BB['none'] += (tot - sh_cnt - h_cnt - shield_and_hbond) / tot

### role_of_polar_atoms_BB

In [52]:
go.Figure(data = go.Pie(labels = ['Shielding polar atom', 'Hydrogen bonding polar atom',
    'Shields and forms an Hbond', 'None'], 
    values = [ON_count_dict_BB['shields'], ON_count_dict_BB['hbonds'],
    ON_count_dict_BB['shields_and_hbonds'], ON_count_dict_BB['none']], hole = .4))