In [1]:
import pickle
import sys
import glob
import yaml
from copy import deepcopy
from graphein.protein.config import ProteinGraphConfig
from graphein.protein.graphs import construct_graph
from graphein.protein.edges.atomic import add_atomic_edges, add_bond_order, add_ring_status 
from graphein.protein.edges.distance import node_coords
import torch
from torch_geometric.data import Data

To use the Graphein submodule graphein.protein.features.sequence.embeddings, you need to install: biovec 
biovec cannot be installed via conda
To use the Graphein submodule graphein.protein.visualisation, you need to install: pytorch3d 
To do so, use the following command: conda install -c pytorch3d pytorch3d


In [2]:
with open('config.yaml', 'r') as config_file:  
  config = yaml.safe_load(config_file) 

with open(config['protein_config_file'], 'r') as config_file:  
  protein_config = yaml.safe_load(config_file)

protein_atom_labels = protein_config['atom_labels']
protein_edge_labels = protein_config['edge_labels']
interaction_labels = protein_config['interaction_labels']

graphein_param_dict = {"granularity": "atom", 
                       "edge_construction_functions": [add_atomic_edges, add_bond_order, add_ring_status],
                       "deprotonate": False}

graphein_config = ProteinGraphConfig(**graphein_param_dict)

In [9]:
def get_distance(x,y):
    total = 0 

    for a,b in zip(x,y):
        total += (a-b)**2

    return total**(0.5)

def generate_node(node_type_list, node_type, coords):
    feature_vec = [0 for x in range(len(node_type_list))]
    feature_vec[node_type_list.index(node_type)] = 1
    feature_vec.extend(coords)
    return feature_vec

def generate_edge(edge_feature_list, edge_features, weight):
    feature_vec = [0 for x in range(len(edge_feature_list))]
    
    for item in edge_features:
        feature_vec[edge_feature_list.index(item)] = 1
        
    feature_vec.append(weight)
    return feature_vec

In [5]:
pdb_dir = sorted(glob.glob(config['processed_pdbbind_dir'] + "*/"))
voxel_graph_dir = config['voxel_graph_dir']
target_id_list = []

In [16]:
for t_idx, target_dir in enumerate(pdb_dir):
    if t_idx % 100 == 0:
        print(t_idx)

    target_id = target_dir.split('/')[-2]
    target_id_list.append(target_id)
    protein_atom_data = []         

    ip_data = pickle.load(open("%s%s_ip.pkl" % (target_dir, target_id), 'rb'))
    protein_graph = construct_graph(config=graphein_config, pdb_path="%s%s_protein_25.pdb" % (target_dir, target_id))

    for interaction_type, interaction_coords in ip_data.items():
        for interaction_xyz in interaction_coords: 
            sorted_nodelist = []
            sorted_node_labels = []
            node_features = []

            edge_features = []
            edge_index = [[],[]]

            for i, n in enumerate(protein_graph.nodes(data=True)):
                n = [n[0], n[1]['atom_type'], n[1]['coords'], get_distance(interaction_xyz, n[1]['coords'])]
                sorted_nodelist.append(n)

            sorted_nodelist = sorted(sorted_nodelist, key=lambda x: x[-1])[:10]
            
            edge_check = [[0 for x in sorted_nodelist] for y in sorted_nodelist]

            for item in sorted_nodelist:
                sorted_node_labels.append(item[0])
                node_features.append(generate_node(protein_atom_labels, item[1], item[2]))

            for i,e in enumerate(protein_graph.edges(data=True)):
                if e[0] not in sorted_node_labels:
                    continue

                if e[1] not in sorted_node_labels:
                    continue
                    
                n1 = sorted_node_labels.index(e[0])
                n2 = sorted_node_labels.index(e[1])

                edge_index[0].extend([n1,n2])
                edge_index[1].extend([n2,n1])
                
                edge_check[n1][n2] = 1
                edge_check[n2][n1] = 1
                
                edge_feature_vec = generate_edge(protein_edge_labels, e[2]['kind'], e[2]['distance'])
                edge_features.extend([edge_feature_vec, edge_feature_vec])
                
            for n1 in range(len(edge_check)):
                for n2 in range(len(edge_check)):
                    if n1 == n2:
                        continue
                    if edge_check[n1][n2] == 0:
                        edge_index[0].extend([n1,n2])
                        edge_index[1].extend([n2,n1])

                        edge_check[n1][n2] = 1
                        edge_check[n2][n1] = 1

                        node_distance = get_distance(node_features[n1][-3:], node_features[n2][-3:])

                        edge_feature_vec = generate_edge(protein_edge_labels, ['spatial'], node_distance)
                        edge_features.extend([edge_feature_vec, edge_feature_vec])
                        
                
            for row in edge_index:
                print(row)
            
            for row in edge_features:
                print(row)
                
            for row in edge_check:
                print(row)
            sys.exit()
            print('----------')
            if t_idx == 5:
                sys.exit()

Output()

0


[4, 9, 5, 6, 6, 0, 0, 7, 7, 2, 2, 1, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 8, 0, 9, 1, 3, 1, 4, 1, 5, 1, 6, 1, 7, 1, 8, 1, 9, 2, 3, 2, 4, 2, 5, 2, 6, 2, 8, 2, 9, 3, 4, 3, 5, 3, 6, 3, 7, 3, 8, 3, 9, 4, 5, 4, 6, 4, 7, 4, 8, 5, 7, 5, 8, 5, 9, 6, 7, 6, 8, 6, 9, 7, 8, 7, 9, 8, 9]
[9, 4, 6, 5, 0, 6, 7, 0, 2, 7, 1, 2, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 8, 0, 9, 0, 3, 1, 4, 1, 5, 1, 6, 1, 7, 1, 8, 1, 9, 1, 3, 2, 4, 2, 5, 2, 6, 2, 8, 2, 9, 2, 4, 3, 5, 3, 6, 3, 7, 3, 8, 3, 9, 3, 5, 4, 6, 4, 7, 4, 8, 4, 7, 5, 8, 5, 9, 5, 7, 6, 8, 6, 9, 6, 8, 7, 9, 7, 9, 8]
[0, 0, 1, 0, 1, 0, 1.4613397962144188]
[0, 0, 1, 0, 1, 0, 1.4613397962144188]
[0, 0, 1, 0, 1, 0, 1.5322434532410312]
[0, 0, 1, 0, 1, 0, 1.5322434532410312]
[1, 0, 0, 0, 1, 0, 1.3309883545696437]
[1, 0, 0, 0, 1, 0, 1.3309883545696437]
[0, 0, 1, 0, 1, 0, 1.461198480700003]
[0, 0, 1, 0, 1, 0, 1.461198480700003]
[0, 0, 1, 0, 1, 0, 1.5276625936377437]
[0, 0, 1, 0, 1, 0, 1.5276625936377437]
[0, 0, 1, 0, 1, 0, 1.4137287575769253]
[0, 0, 1, 0, 1, 0, 1.4137287575

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
