In [1]:
import alphaspace2 as al
import mdtraj
import numpy as np
from scipy.spatial.distance import cdist
from scipy.cluster.hierarchy import fcluster, linkage
from alphaspace2.functions import _binCluster, _group
from alphaspace2.Cluster import _DPocket

In [9]:
import features  ## python module with pseudomolecular features
import glob
from collections import defaultdict 
import matplotlib.pyplot as plt
import pandas as pd

### For this tutorial, We use pocket features to compare the pocket/ligand similarities of 10 HIV-protease inhibitors

In [3]:
protease_list = ['1c70','1hvi','1hvj','1izh','1pro','1siv','2i0a','2psv','2q5k','3lzu']

### Calculating the contact pockets for proteases

In [4]:
protease_data_lig = {}
protease_data_ss = {}
protease_data_prot = {}
for pdb_id in protease_list:
    prot = mdtraj.load('Beta_Cluster_features/protein_' + pdb_id + '.pdb')
    protease_data_prot[pdb_id] = prot
    lig = mdtraj.load('Beta_Cluster_features/ligand_' + pdb_id + '.pdb')
    protease_data_lig[pdb_id] = lig
    ss_prot = al.Snapshot()
    ss_prot.run(prot, lig)
    protease_data_ss[pdb_id] = ss_prot
    

In [5]:
protease_contact_pockets = defaultdict(dict)
for pdb_id in protease_data_ss:
    for px,pocket in enumerate(protease_data_ss[pdb_id].pockets):
        if pocket.isContact:
            protease_contact_pockets[pdb_id][px] = np.array([b.xyz for b in pocket.betas])
            

### Calculate some beta cluster features 

In [6]:
protease_contact_pockets = defaultdict(dict)
for pdb_id in protease_data_ss:
    for px,pocket in enumerate(protease_data_ss[pdb_id].pockets):
        if pocket.isContact:
            protease_contact_pockets[pdb_id][px] = np.array([b.xyz for b in pocket.betas])
            
protease_pocket_props_dict = {}
for pdb_id in protease_contact_pockets:
    contact_betas = []
    prot = protease_data_prot[pdb_id]
    for px in protease_contact_pockets[pdb_id]:
        contact_betas.extend(protease_contact_pockets[pdb_id][px])
    contact_betas = np.array(contact_betas)
    beta_temp_dict = {}
    beta_temp_dict['occluded_asa'] = features._get_pharmacophore_fingerprint(prot,contact_betas)
    beta_temp_dict['usr'] = features._Get_USR_alpha_beta(contact_betas)
    beta_temp_dict['normalized_PMI'] = features._calculate_NormalizedRatioPMI(contact_betas)
    beta_temp_dict['span'] = features._calculate_SPAN(contact_betas)
    beta_temp_dict['radius_of_gyration'] = features._calculate_RadiusofGyration(contact_betas)    
    protease_pocket_props_dict[pdb_id] = beta_temp_dict     

### Repeat calculation for ligands

In [7]:
protease_ligand_props_dict = {}
for pdb_id in protease_data_prot:
    lig = protease_data_lig[pdb_id]
    prot = protease_data_prot[pdb_id]
    lig_coords = lig.xyz[0]*10   ### multiply by 10 to convert nm to Angstrom
    lig_temp_dict = {}
    lig_temp_dict['occluded_asa'] = features._get_pharmacophore_fingerprint(prot,lig_coords)
    lig_temp_dict['usr'] = features._Get_USR_alpha_beta(lig_coords)
    lig_temp_dict['normalized_PMI'] = features._calculate_NormalizedRatioPMI(lig_coords)
    lig_temp_dict['span'] = features._calculate_SPAN(lig_coords)
    lig_temp_dict['radius_of_gyration'] = features._calculate_RadiusofGyration(lig_coords)    
    protease_ligand_props_dict[pdb_id] = lig_temp_dict         

### Save data into a pandas dataframe

In [20]:
pocket_ligand_data = pd.DataFrame()

In [24]:
field_subfields = {'occluded_asa':['Total_OASA', 'Positive_OASA', 'Negative_OASA', 'H_bond_Donor_OASA', 'H_bond_Acceptor_OASA', 'H_bond_Doneptor_OASA', 'Aromatic_OASA', 'Hydrophobic_OASA', 'Polar_OASA', 'Null_type_OASA'],
                  'usr':['ctd_1', 'ctd_2', 'ctd_3', 'cst_1', 'cst_2', 'cst_3', 'fct_1', 'fct_2', 'fct_3', 'ftf_1', 'ftf_2', 'ftf_3'],
                  'normalized_PMI':['NPR1', 'NPR2']}

In [28]:
pdb_id = list(protease_pocket_props_dict.keys())
pocket_ligand_data['PDB_ID'] = pdb_id
for field in field_subfields:
    for sub in field_subfields[field]:
        temp_row = []
        for pdb in pdb_id:
            temp_row.append(protease_pocket_props_dict[pdb][field][sub])
        pocket_ligand_data['protein_'+sub] = temp_row
    for sub in field_subfields[field]:
        temp_row = []
        for pdb in pdb_id:
            temp_row.append(protease_ligand_props_dict[pdb][field][sub])
        pocket_ligand_data['ligand_'+sub] = temp_row

for field in ['span', 'radius_of_gyration']:
    temp_row = []
    for pdb in pdb_id:
        temp_row.append(protease_pocket_props_dict[pdb][field])
    pocket_ligand_data['protein_'+field] = temp_row
    temp_row = []
    for pdb in pdb_id:
        temp_row.append(protease_ligand_props_dict[pdb][field])
    pocket_ligand_data['ligand_'+field] = temp_row
        

### Displaying properties in dataframe

In [31]:
pocket_ligand_data[['PDB_ID','protein_Total_OASA','ligand_Total_OASA','protein_span','ligand_span','protein_radius_of_gyration','ligand_radius_of_gyration']]

Unnamed: 0,PDB_ID,protein_Total_OASA,ligand_Total_OASA,protein_span,ligand_span,protein_radius_of_gyration,ligand_radius_of_gyration
0,1c70,621.255975,575.210036,9.836,7.644,5.713,5.354
1,1hvi,829.419295,649.139736,12.357,9.667,7.07,6.12
2,1hvj,775.498047,645.416238,12.107,9.898,6.961,6.136
3,1izh,697.906888,561.791315,10.926,8.833,6.604,5.718
4,1pro,512.777,461.788556,11.557,7.44,5.705,4.802
5,1siv,592.609946,518.247853,10.41,10.339,6.298,6.012
6,2i0a,590.784228,456.540037,10.949,10.784,6.222,5.806
7,2psv,464.632414,396.808604,10.012,8.257,5.741,4.494
8,2q5k,723.997743,537.52224,12.12,8.934,6.729,5.36
9,3lzu,502.946246,413.259663,8.951,8.506,5.531,4.748


### Beta Clusters allow for the direct comparison of ligands and their binding site pockets. Here we calculate the similarity of the ligands and beta pockets based on their occluded surface area features 

In [32]:
temp_row = []
for pdb_id in protease_list:
    oasa_pocket = [s for _,s in protease_pocket_props_dict[pdb_id]['occluded_asa'].items()]
    oasa_ligand = [s for _,s in protease_ligand_props_dict[pdb_id]['occluded_asa'].items()]
    similarity = 1 - features._soergel(oasa_pocket,oasa_ligand)
    temp_row.append(similarity)

pocket_ligand_data['OASA_similarity'] = temp_row
pocket_ligand_data[['PDB_ID','OASA_similarity']]

Unnamed: 0,PDB_ID,OASA_similarity
0,1c70,0.925883
1,1hvi,0.782644
2,1hvj,0.830627
3,1izh,0.804966
4,1pro,0.900564
5,1siv,0.83964
6,2i0a,0.77277
7,2psv,0.854027
8,2q5k,0.742436
9,3lzu,0.821678


### Likewise, we calculate their similarity based on USR features 

In [34]:
temp_row = []
for pdb_id in protease_list:
    usr_pocket = [s for _,s in protease_pocket_props_dict[pdb_id]['usr'].items()]
    usr_ligand = [s for _,s in protease_ligand_props_dict[pdb_id]['usr'].items()]
    similarity = 1 - features._soergel(usr_pocket,usr_ligand)
    temp_row.append(similarity)

pocket_ligand_data['USR_similarity'] = temp_row
pocket_ligand_data[['PDB_ID','USR_similarity']]

Unnamed: 0,PDB_ID,USR_similarity
0,1c70,0.815841
1,1hvi,0.804031
2,1hvj,0.866056
3,1izh,0.79485
4,1pro,0.771391
5,1siv,0.891211
6,2i0a,0.921977
7,2psv,0.760895
8,2q5k,0.796197
9,3lzu,0.832484
