In [1]:
import alphaspace2 as al
import mdtraj
import numpy as np
from scipy.spatial.distance import cdist
from scipy.cluster.hierarchy import fcluster, linkage
from alphaspace2.functions import _binCluster, _group
from alphaspace2.Cluster import _DPocket
import pandas as pd
import os

In [2]:
from communities import genCommunityPocket,CoreCluster

In [3]:
import features

### Use this function to strip hydrogens from the pdbqt files

In [14]:
def strip_h(input_file,output_file):
    '''
    input_file and output_file need to be in pdb or pdbqt format 
    '''
    inputlines = open(input_file,'r').readlines()
    no_h_lines = [l for l in inputlines if not l.split()[-1].startswith('H')] 
    output = open(output_file,'w')
    output.writelines(no_h_lines)
    output.close()

In [15]:
strip_h("CDK2_Communities/cdk2_prot.pdbqt","CDK2_Communities/cdk2_prot_noH.pdbqt")

## In this tutorial, we will be applying community detection for the full CDK2 surface

### In the first part we calculate the beta clusters 

In [16]:
cdk2_prot = mdtraj.load('CDK2_Communities/cdk2_prot.pdb')
al.annotateVinaAtomTypes(pdbqt="CDK2_Communities/cdk2_prot_noH.pdbqt", receptor=cdk2_prot)

In [17]:
ss_cdk2 = al.Snapshot()
ss_cdk2.run(cdk2_prot)

### For the next cell, we store pocket properties into dictionaries which will be used for generating the surface communities

In [19]:
temp_space_dict = {}
temp_coords_dict = {}
temp_contact_dict = {}
temp_score_dict = {}
temp_centroids_dict = {}
for px,pocket in enumerate(ss_cdk2.pockets):
    temp_coords_dict[px] = [list(b.xyz) for b in pocket.betas]
    temp_score_dict[px] = np.array([min(b.scores) for b in  pocket.betas])
    temp_space_dict[px] = np.array([b.space for b in  pocket.betas])
    temp_contact_dict[px] = list(pocket.lining_atoms_idx)
    temp_centroids_dict[px] = pocket.centroid


In [20]:
prot_coords = cdk2_prot.xyz[0]*10   ### change nm to Angstrom

### Generating communities for the CDK2 surface using the genCommunityPocket function

In [21]:
surface_communities = genCommunityPocket(prot_coords, temp_centroids_dict, temp_space_dict, \
                                            temp_contact_dict, temp_score_dict, \
                                            corecut = 100, auxcut = 30, tight_option = True, tight_cutoff_core = 12.5, tight_cutoff_aux = 6.5)

### Calculate more features of beta clusters community 

In [22]:
surface_communities_props = {}
for cx,community in surface_communities.items():
    temp_coords_array = []
    for pock in community['core_pockets'] + community['aux_pockets']:
        temp_coords_array.extend(temp_coords_dict[pock])
    temp_coords_array = np.array(temp_coords_array)
    volume = features._get_grid_volume(temp_coords_array)
    occluded_asa = features._get_pharmacophore_fingerprint(cdk2_prot,temp_coords_array)
    surface_communities_props[cx] = {}
    surface_communities_props[cx]['space'] = community['space']
    surface_communities_props[cx]['score'] = community['score']
    surface_communities_props[cx]['volume'] = volume
    surface_communities_props[cx]['occluded_asa'] = occluded_asa['Total_OASA']    
    

### We save the pocket communities into pandas dataframe for easier viewing and data manipulation

In [23]:
community_data = pd.DataFrame.from_dict(surface_communities_props,orient='index')
community_data

Unnamed: 0,space,score,volume,occluded_asa
0,1472.0,-28.62,690.25,619.090723
1,1199.0,-18.89,574.125,675.705542
2,889.0,-15.69,474.5,516.936319
3,885.0,-19.93,564.625,538.00384
4,659.0,-13.57,413.875,504.916808
5,647.0,-15.72,392.625,371.448937
6,457.0,-10.47,317.25,349.742828
7,406.0,-9.69,199.5,224.786338
8,351.0,-5.42,167.75,188.640394
9,335.0,-6.65,187.0,233.43567


### In this section, we present some scripts that can be use to visualize CDK2 communities along with the corresponding pictures. We will be saving the pocket communities in the CDK2_Communities/pocket_communities folder

In [16]:
protein_topology = []
for resatom in cdk2_prot.top.atoms:
    res,atom_index,res_index,atom,element = resatom.residue.name, resatom.index, resatom.residue.index, resatom.name, resatom.element.symbol
    protein_topology.append([res,atom_index,res_index,atom,element])
protein_topology = np.array(protein_topology)

In [17]:
for sx,data in surface_communities.items():
    community_pdb = []
    lining_atoms = set()
    for pock in data['core_pockets'] + data['aux_pockets']:
        score = np.sum(temp_score_dict[pock])
        centroid = temp_centroids_dict[pock]
        lining_atoms.update(list(temp_contact_dict[pock]))
        if score <= -2.5:
            res = 'BHI'
        elif score > -2.5 and score <= -1.5:
            res = 'BMI'
        elif score > -1.5:
            res = 'BLI'
        community_pdb.append('ATOM  '+str(pock).rjust(5)+'  '+'BAO'+' '+res+'   '+str(pock).rjust(3)+'    '+str(round(centroid[0],3)).rjust(8)+str(round(centroid[1],3)).rjust(8)+str(round(centroid[2],3)).rjust(8)+str(0.0).rjust(6)+str(0.00).rjust(6)+'           C')
    community_pdb.append('TER')
    lining_atoms = list(lining_atoms)
    lining_atoms.sort()
    for top,coord in zip(protein_topology[lining_atoms],prot_coords[lining_atoms]):
        res,atom_index,res_index,atom,element = top 
        community_pdb.append('ATOM  '+str(atom_index).rjust(5)+'  '+atom.ljust(3)+' '+res+'   '+str(res_index).rjust(3)+'    '+str(round(coord[0],3)).rjust(8)+str(round(coord[1],3)).rjust(8)+str(round(coord[2],3)).rjust(8)+str(0.0).rjust(6)+str(0.00).rjust(6)+'           '+element)
    with open('CDK2_Communities/pocket_communities/community_'+str(sx)+'.pdb','w') as f:
        f.write('\n'.join(community_pdb))

In [18]:
with open('CDK2_Communities/pocket_communities/protein.pdb','w') as f:
    for top,coord in zip(protein_topology,prot_coords):    
        res,atom_index,res_index,atom,element = top 
        f.write('ATOM  '+str(atom_index).rjust(5)+'  '+atom.ljust(3)+' '+res+'   '+str(res_index).rjust(3)+'    '+str(round(coord[0],3)).rjust(8)+str(round(coord[1],3)).rjust(8)+str(round(coord[2],3)).rjust(8)+str(0.0).rjust(6)+str(0.00).rjust(6)+'           '+element+'\n')

### Once the files above have been written into the pocket_communities folder, the template chimera file can be open from the Chimera dialog

### Lets add a color column to the pandas dataframe to help identify each community

In [19]:
with open('CDK2_Communities/colors_table.txt','r') as f:
    colors = f.read().splitlines()

In [20]:
community_data['Color'] = colors[0:len(community_data)]

In [21]:
community_data = community_data.reindex(['Color','space','score','volume','occluded_asa'],axis=1)
community_data

Unnamed: 0,Color,space,score,volume,occluded_asa
0,green,1472.0,-23.69,690.25,619.090723
1,yellow,1199.0,-18.26,574.125,675.705542
2,pink,889.0,-14.36,474.5,516.936319
3,teal,885.0,-18.98,564.625,538.00384
4,orange,659.0,-10.99,413.875,504.916808
5,blue,647.0,-14.08,392.625,371.448937
6,purple,457.0,-10.53,317.25,349.742828
7,ltblue,406.0,-8.39,199.5,224.786338
8,peri,351.0,-5.82,167.75,188.640394
9,peach,335.0,-6.33,187.0,233.43567


### This is the image what we should expect from opening the AS_viz_template.py and loading the first saved scene. The colors of each community corresponds to the colors column in the dataframe above

![title](CDK2_Communities/pocket_communities/CDK2_community_1.png)

### This alternate scene saved in the same chimera file. In this image, high-, mid-, and low-scoring pockets from each community is colored green, blue, and rosybrown respectively. This alternate view allows users to immediately visualize the pockets within the context of the communities

![title](CDK2_Communities/pocket_communities/CDK2_community_2.png)