In [1]:
import os
import dgl
import torch
import numpy as np
import gradio #as gr
import pandas as pd
import networkx as nx
import torch.nn as nn
from sklearn import metrics
from tqdm.notebook import tqdm
import torch.nn.functional as F
from dgl.data import DGLDataset
#torch.cuda.set_device(0)  

Using backend: pytorch


## Dateset

In [2]:
class PSE_eval(DGLDataset):
    def __init__(self):
        super().__init__(name='PSE_eval')

    def process(self):
        features = pd.read_csv('../data/GNN-GSE_full_pkd_norm.csv',index_col = 'ProteinID', sep=',')
        nodes = pd.read_csv('../data/GNN-GSE_full_pkd_norm.csv', sep=',')
        edges = pd.read_csv('../data/GNN-PPI-net.csv', sep=',')
        dti = pd.read_csv('../data/GNN-DTI_full.csv', sep=',')
        DrugID = pd.read_csv('../data/DrugID.csv', sep = ',')
        print('data loaded!')
        
        # generate drug specific ppi subgraph for GNN edges
        def drug2ppi(drug):
            genes = dti['ProteinID'].loc[dti['DrugID'] == drug].tolist()
            df = edges[['protein1','protein2']].loc[edges['protein1'].isin(genes)]
            df = df.loc[df['protein2'].isin(genes)]
            num_nodes = len(df['protein1'].unique())
            df['graph_id'] = DrugID.loc[DrugID['DrugID'] == drug]['GraphID'].tolist()[0]  #DrugID
            df = df.rename(columns={'protein1': 'src_prot', 'protein2': 'dst_prot'}) # prot: actual protein id
            final_genes =df['src_prot'].unique().tolist() # final genes that have ppi data
            dic = {gene:final_genes.index(gene) for gene in final_genes} # conversion dic, starts at 0
            df['src'] = df['src_prot'].map(dic) #local ids
            df['dst'] = df['dst_prot'].map(dic) #local ids
            return(df[['graph_id', 'src', 'dst', 'src_prot', 'dst_prot']],num_nodes)
        
        self.graphs = []
        self.labels = []
        self.comb_graphs = []
        self.comb_labels = []

        #Node features or PSEs dictionary
        feature_dic = {i+1:torch.tensor(features.loc[i+1,]) for i in range(len(features))}
    
        # For each graph ID...
        for drug in tqdm(DrugID['DrugID'].tolist()[:15]):
            # Find the edges as well as the number of nodes and its label.
            edges_of_id,num_nodes = drug2ppi(drug)
            src = edges_of_id['src'].to_numpy()
            dst = edges_of_id['dst'].to_numpy()
            label = DrugID.loc[DrugID['DrugID'] == drug]['Name'].tolist()[0]
            
            # Create a graph and add it to the list of graphs and labels.
            g = dgl.graph((src, dst), num_nodes=num_nodes)
            
            # Need to convert proteinsIDs for feature assigning
            prot_ids = edges_of_id['src_prot'].unique().tolist()
            for prot in edges_of_id['dst_prot'].unique().tolist():
                if prot not in prot_ids:
                    prot_ids.append(prot)
            convert_prot = {prot_ids.index(prot):prot for prot in prot_ids}
            
            #Adding features of each node
            g.ndata['PSE'] = torch.zeros(g.num_nodes(), 964)
            for node in g.nodes().tolist():
                g.ndata['PSE'][node] = feature_dic[convert_prot[node]]
                
            self.graphs.append(g)
            self.labels.append(label)
        '''
        # conver drugid to their respective graph id
        #drug2graph = {properties['label'][i]:i for i in range(len(properties))} 
        drug2graph = {self.labels[i]:i for i in range(len(self.labels))} 
        
        for i in range(len(drug_comb)):
            row = drug_comb.loc[i]
            g1 = self.graphs[drug2graph[row[0]]] # Drug1 graph
            g2 = self.graphs[drug2graph[row[1]]] # Drug2 graph  
            self.comb_graphs.append([g1,g2])
            self.comb_labels.append(torch.tensor(row[2:])) # PSE values

            
        # Convert the label list to tensor for saving.
        #self.comb_labels = torch.LongTensor(self.comb_labels)
        '''
    def __getitem__(self, i):
        #return self.comb_graphs[i], self.comb_labels[i]
        return self.graphs[i], self.labels[i]

    def __len__(self):
        return len(self.graphs)


In [None]:
dataset = PSE_eval()
dataset[2]

data loaded!


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))

In [7]:
#model
from dgl.nn import GraphConv

class GCN(nn.Module):
    def __init__(self, in_feats, h_feats, num_classes):
        super(GCN, self).__init__()
        self.conv1 = GraphConv(in_feats, h_feats)
        self.conv2 = GraphConv(h_feats,  num_classes)
        
    def forward(self, g, in_feat):
        h = self.conv1(g, in_feat)
        h = F.relu(h)
        h = self.conv2(g, h)
        g.ndata['h'] = h
        out = F.relu(dgl.mean_nodes(g, 'h'))
        #out = F.relu(dgl.max_nodes(g, 'h'))
        return out

In [8]:
# Specify a path
PATH = "entire_model2.pt"
conf = 'state_dict_model2.pt'

# Load
model = GCN(964,200,964)
model.load_state_dict(torch.load(conf))
model.eval()

GCN(
  (conv1): GraphConv(in=964, out=200, normalization=both, activation=None)
  (conv2): GraphConv(in=200, out=964, normalization=both, activation=None)
)

In [16]:
def predict(g1, g2):  # graph1, graph2
    pred1 = model(g1, g1.ndata['PSE'].float())
    pred2 = model(g2, g2.ndata['PSE'].float())
    pred = F.normalize(pred1+pred2)/2
    return(pred)

In [21]:
features = pd.read_csv('../data/GNN-GSE_full_pkd_norm.csv',index_col = 'ProteinID', sep=',')
PSE = features.columns.values.tolist()
PSE_dic = {PSE.index(se):se for se in PSE}

In [24]:
def PSE_predict(g1,g2):
    pred = predic(g1,g2)
    pred_PSE = pred.ne(0)[0].tolist() # not equal to 0
    pred_PSE_value = pred[0].tolist()

    tmp = []
    for idx,se in enumerate(pred_PSE):
    if se == True:
        tmp.append([PSE_dic[idx],round(pred_PSE_value[idx]*100,3)])
    

KeyError: 'Leukopenia'

In [49]:
pred = predict(dataset[1][0], dataset[3][0])

tmp
#torch.nonzero(pred)

[['Arthralgia', 5.506],
 ['Diarrhoea', 13.763],
 ['Headache', 10.054],
 ['Vomiting', 13.493],
 ['Malaise', 6.177],
 ['Back pain', 5.466],
 ['Oedema peripheral', 6.814],
 ['Death', 5.082],
 ['Pain', 12.297],
 ['Fatigue', 12.072],
 ['Urinary tract infection', 2.49],
 ['Fall', 7.576],
 ['Nausea', 17.14],
 ['Abdominal pain', 5.079],
 ['Anxiety', 8.639],
 ['Rash', 5.054],
 ['Chest pain', 7.452],
 ['Dyspnoea', 15.876],
 ['Asthenia', 10.12],
 ['Depression', 7.214],
 ['Weight decreased', 5.654],
 ['Dizziness', 10.562],
 ['Drug ineffective', 5.695],
 ['Dehydration', 3.961],
 ['Insomnia', 4.72],
 ['Anaemia', 10.294],
 ['Hypertension', 7.344],
 ['Pain in extremity', 5.999],
 ['Confusional state', 4.323],
 ['Pneumonia', 10.748],
 ['Renal failure', 4.539],
 ['Decreased appetite', 2.704],
 ['Renal failure acute', 5.579],
 ['Constipation', 2.729],
 ['Drug interaction', 1.991]]

In [46]:
SE

[True,
 True,
 True,
 True,
 False,
 False,
 False,
 False,
 True,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 True,
 False,
 False,
 True,
 True,
 False,
 True,
 False,
 False,
 True,
 True,
 False,
 False,
 False,
 True,
 False,
 False,
 True,
 True,
 False,
 True,
 True,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 True,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False

In [None]:
model_BCE = model
model_Cosin = model2

gradio.Interface(DTI_pred, 
                 [gradio.inputs.Dropdown(label = "Models", choices = ['BindingDB', 'DAVIS', 'KIBA']),
                  gradio.inputs.Textbox(lines = 5, label = "Drug SMILES"),
                  gradio.inputs.Textbox(lines = 5, label = "Target Amino Acid Sequence")], 
                 gradio.outputs.Textbox(label = "Predicted Affinity")).launch(share=True)http://127.0.0.1:7860/

In [None]:

model_BCE = model
model_Cosin = model2

def DTI_pred(data, drug, target):
    if data == 'BindingDB':
        model = model_binding
    elif data == 'KIBA':
        model = model_kiba
    elif data == 'DAVIS':
        model = model_davis

    X_pred = utils.data_process(X_drug = [drug], X_target = [target], y = [0],
                                drug_encoding = 'MPNN', target_encoding = 'CNN', 
                                split_method='no_split')
    y_pred = model.predict(X_pred)
    return str(y_pred[0])

gradio.Interface(DTI_pred, 
                 [gradio.inputs.Dropdown(label = "Training Dataset", choices = ['BindingDB', 'DAVIS', 'KIBA']),
                  gradio.inputs.Textbox(lines = 5, label = "Drug SMILES"),
                  gradio.inputs.Textbox(lines = 5, label = "Target Amino Acid Sequence")], 
                 gradio.outputs.Textbox(label = "Predicted Affinity")).launch(share=True)