In [52]:
from __future__ import division
from __future__ import print_function
from operator import itemgetter
from itertools import combinations
import time
import os

import tensorflow.compat.v1 as tf

import numpy as np
import networkx as nx
import scipy.sparse as sp
from sklearn import metrics

from decagon.deep.optimizer import DecagonOptimizer
from decagon.deep.model import DecagonModel
from decagon.deep.minibatch import EdgeMinibatchIterator
from decagon.utility import rank_metrics, preprocessing

tf.disable_eager_execution()

########## scipy: version 1.9로 바꿔야함

#깔아야하는 packages
#networkx
#scikit-learn
#tensorflow

# Train on CPU (hide GPU) due to memory constraints
os.environ['CUDA_VISIBLE_DEVICES'] = ""

# Train on GPU
# os.environ["CUDA_DEVICE_ORDER"] = 'PCI_BUS_ID'
# os.environ["CUDA_VISIBLE_DEVICES"] = '0'
# config = tf.ConfigProto()
# config.gpu_options.allow_growth = True

np.random.seed(0)

In [53]:

###########################################################
#
# Functions
#
###########################################################


def get_accuracy_scores(edges_pos, edges_neg, edge_type):
    feed_dict.update({placeholders['dropout']: 0})
    feed_dict.update({placeholders['batch_edge_type_idx']: minibatch.edge_type2idx[edge_type]})
    feed_dict.update({placeholders['batch_row_edge_type']: edge_type[0]})
    feed_dict.update({placeholders['batch_col_edge_type']: edge_type[1]})
    rec = sess.run(opt.predictions, feed_dict=feed_dict)

    def sigmoid(x):
        return 1. / (1 + np.exp(-x))

    # Predict on test set of edges
    preds = []
    actual = []
    predicted = []
    edge_ind = 0
    for u, v in edges_pos[edge_type[:2]][edge_type[2]]:
        score = sigmoid(rec[u, v])
        preds.append(score)
        assert adj_mats_orig[edge_type[:2]][edge_type[2]][u,v] == 1, 'Problem 1'

        actual.append(edge_ind)
        predicted.append((score, edge_ind))
        edge_ind += 1

    preds_neg = []
    for u, v in edges_neg[edge_type[:2]][edge_type[2]]:
        score = sigmoid(rec[u, v])
        preds_neg.append(score)
        assert adj_mats_orig[edge_type[:2]][edge_type[2]][u,v] == 0, 'Problem 0'

        predicted.append((score, edge_ind))
        edge_ind += 1

    preds_all = np.hstack([preds, preds_neg])
    preds_all = np.nan_to_num(preds_all)
    labels_all = np.hstack([np.ones(len(preds)), np.zeros(len(preds_neg))])
    predicted = list(zip(*sorted(predicted, reverse=True, key=itemgetter(0))))[1]

    roc_sc = metrics.roc_auc_score(labels_all, preds_all)
    aupr_sc = metrics.average_precision_score(labels_all, preds_all)
    apk_sc = rank_metrics.apk(actual, predicted, k=50)

    return roc_sc, aupr_sc, apk_sc


def construct_placeholders(edge_types):
    placeholders = {
        'batch': tf.placeholder(tf.int32, name='batch'),
        'batch_edge_type_idx': tf.placeholder(tf.int32, shape=(), name='batch_edge_type_idx'),
        'batch_row_edge_type': tf.placeholder(tf.int32, shape=(), name='batch_row_edge_type'),
        'batch_col_edge_type': tf.placeholder(tf.int32, shape=(), name='batch_col_edge_type'),
        'degrees': tf.placeholder(tf.int32),
        'dropout': tf.placeholder_with_default(0., shape=()),
    }
    placeholders.update({
        'adj_mats_%d,%d,%d' % (i, j, k): tf.sparse_placeholder(tf.float32)
        for i, j in edge_types for k in range(edge_types[i,j])})
    placeholders.update({
        'feat_%d' % i: tf.sparse_placeholder(tf.float32)
        for i, _ in edge_types})
    return placeholders

In [54]:

###########################################################
#
# Load and preprocess data (This is a dummy toy example!)
#
###########################################################

####
# The following code uses artificially generated and very small networks.
# Expect less than excellent performance as these random networks do not have any interesting structure.
# The purpose of main.py is to show how to use the code!
#
# All preprocessed datasets used in the drug combination study are at: http://snap.stanford.edu/decagon:
# (1) Download datasets from http://snap.stanford.edu/decagon to your local machine.
# (2) Replace dummy toy datasets used here with the actual datasets you just downloaded.
# (3) Train & test the model.
####

val_test_size = 0.05
n_genes = 500
n_drugs = 400
n_drugdrug_rel_types = 3
gene_net = nx.planted_partition_graph(50, 10, 0.2, 0.05, seed=42)
print("gene_net")
print(type(gene_net))
print(gene_net)
print()


gene_adj = nx.adjacency_matrix(gene_net)
gene_degrees = np.array(gene_adj.sum(axis=0)).squeeze()

gene_drug_adj = sp.csr_matrix((10 * np.random.randn(n_genes, n_drugs) > 15).astype(int))
drug_gene_adj = gene_drug_adj.transpose(copy=True)

drug_drug_adj_list = []
tmp = np.dot(drug_gene_adj, gene_drug_adj)
for i in range(n_drugdrug_rel_types):
    mat = np.zeros((n_drugs, n_drugs))
    for d1, d2 in combinations(list(range(n_drugs)), 2):
        if tmp[d1, d2] == i + 4:
            mat[d1, d2] = mat[d2, d1] = 1.
    drug_drug_adj_list.append(sp.csr_matrix(mat))
    print("hmm")
    print(drug_drug_adj_list)
drug_degrees_list = [np.array(drug_adj.sum(axis=0)).squeeze() for drug_adj in drug_drug_adj_list]


# data representation
adj_mats_orig = {
    (0, 0): [gene_adj, gene_adj.transpose(copy=True)],
    (0, 1): [gene_drug_adj],
    (1, 0): [drug_gene_adj],
    (1, 1): drug_drug_adj_list + [x.transpose(copy=True) for x in drug_drug_adj_list],
}

print("gene_adj")
print(gene_adj)
print("")
print("gene_degrees")
print(gene_degrees)
print("")

degrees = {
    0: [gene_degrees, gene_degrees],
    1: drug_degrees_list + drug_degrees_list,
}

# featureless (genes)
gene_feat = sp.identity(n_genes)
gene_nonzero_feat, gene_num_feat = gene_feat.shape
gene_feat = preprocessing.sparse_to_tuple(gene_feat.tocoo())

# features (drugs)
drug_feat = sp.identity(n_drugs)
drug_nonzero_feat, drug_num_feat = drug_feat.shape
drug_feat = preprocessing.sparse_to_tuple(drug_feat.tocoo())

# data representation
num_feat = {
    0: gene_num_feat,
    1: drug_num_feat,
}
nonzero_feat = {
    0: gene_nonzero_feat,
    1: drug_nonzero_feat,
}
feat = {
    0: gene_feat,
    1: drug_feat,
}

edge_type2dim = {k: [adj.shape for adj in adjs] for k, adjs in adj_mats_orig.items()}
edge_type2decoder = {
    (0, 0): 'bilinear',
    (0, 1): 'bilinear',
    (1, 0): 'bilinear',
    (1, 1): 'dedicom',
}
print(adj_mats_orig)
edge_types = {k: len(v) for k, v in adj_mats_orig.items()}
print(edge_types)
num_edge_types = sum(edge_types.values())
print("Edge types:", "%d" % num_edge_types)


gene_net
<class 'networkx.classes.graph.Graph'>
Graph named 'stochastic_block_model' with 500 nodes and 6639 edges



  gene_adj = nx.adjacency_matrix(gene_net)


hmm
[<400x400 sparse matrix of type '<class 'numpy.float64'>'
	with 17378 stored elements in Compressed Sparse Row format>]
hmm
[<400x400 sparse matrix of type '<class 'numpy.float64'>'
	with 17378 stored elements in Compressed Sparse Row format>, <400x400 sparse matrix of type '<class 'numpy.float64'>'
	with 7566 stored elements in Compressed Sparse Row format>]
hmm
[<400x400 sparse matrix of type '<class 'numpy.float64'>'
	with 17378 stored elements in Compressed Sparse Row format>, <400x400 sparse matrix of type '<class 'numpy.float64'>'
	with 7566 stored elements in Compressed Sparse Row format>, <400x400 sparse matrix of type '<class 'numpy.float64'>'
	with 2738 stored elements in Compressed Sparse Row format>]
gene_adj
  (0, 2)	1
  (0, 8)	1
  (0, 36)	1
  (0, 61)	1
  (0, 99)	1
  (0, 100)	1
  (0, 116)	1
  (0, 131)	1
  (0, 134)	1
  (0, 144)	1
  (0, 162)	1
  (0, 166)	1
  (0, 228)	1
  (0, 237)	1
  (0, 244)	1
  (0, 249)	1
  (0, 342)	1
  (0, 356)	1
  (0, 365)	1
  (0, 383)	1
  (0, 393)	1

In [55]:
print(type(gene_adj))
print(type(gene_drug_adj))
print(type(drug_drug_adj_list))
print("transposed gene_adj")
print(gene_adj.transpose(copy=True))

<class 'scipy.sparse._csr.csr_matrix'>
<class 'scipy.sparse._csr.csr_matrix'>
<class 'list'>
transposed gene_adj
  (2, 0)	1
  (8, 0)	1
  (36, 0)	1
  (61, 0)	1
  (99, 0)	1
  (100, 0)	1
  (116, 0)	1
  (131, 0)	1
  (134, 0)	1
  (144, 0)	1
  (162, 0)	1
  (166, 0)	1
  (228, 0)	1
  (237, 0)	1
  (244, 0)	1
  (249, 0)	1
  (342, 0)	1
  (356, 0)	1
  (365, 0)	1
  (383, 0)	1
  (393, 0)	1
  (418, 0)	1
  (435, 0)	1
  (485, 0)	1
  (492, 0)	1
  :	:
  (459, 498)	1
  (3, 499)	1
  (7, 499)	1
  (17, 499)	1
  (19, 499)	1
  (64, 499)	1
  (88, 499)	1
  (95, 499)	1
  (100, 499)	1
  (105, 499)	1
  (133, 499)	1
  (165, 499)	1
  (166, 499)	1
  (233, 499)	1
  (245, 499)	1
  (273, 499)	1
  (279, 499)	1
  (299, 499)	1
  (309, 499)	1
  (383, 499)	1
  (451, 499)	1
  (452, 499)	1
  (490, 499)	1
  (491, 499)	1
  (492, 499)	1


In [56]:
print([gene_adj, gene_adj.transpose(copy=True)])
print(type(gene_adj))
print(gene_adj[0,3])
print(gene_adj[0].shape)
print(gene_adj[0][0])
print(type(drug_drug_adj_list))

[<500x500 sparse matrix of type '<class 'numpy.int64'>'
	with 13278 stored elements in Compressed Sparse Row format>, <500x500 sparse matrix of type '<class 'numpy.int64'>'
	with 13278 stored elements in Compressed Sparse Column format>]
<class 'scipy.sparse._csr.csr_matrix'>
0
(1, 500)
  (0, 2)	1
  (0, 8)	1
  (0, 36)	1
  (0, 61)	1
  (0, 99)	1
  (0, 100)	1
  (0, 116)	1
  (0, 131)	1
  (0, 134)	1
  (0, 144)	1
  (0, 162)	1
  (0, 166)	1
  (0, 228)	1
  (0, 237)	1
  (0, 244)	1
  (0, 249)	1
  (0, 342)	1
  (0, 356)	1
  (0, 365)	1
  (0, 383)	1
  (0, 393)	1
  (0, 418)	1
  (0, 435)	1
  (0, 485)	1
  (0, 492)	1
  (0, 495)	1
<class 'list'>


In [57]:
import csv
f = open("/mnt/nas2/seogyeong/bio-decagon-ppi.csv")
reader = csv.reader(f)
# for line in reader:
    

In [None]:
import pandas as pd
df = pd.read_csv('/mnt/nas2/seogyeong/bio-decagon-ppi.csv', sep=',', header=0)
print(type(df.values))
print(df.values.T[0])
gene_1max = np.max( df.values.T[0] )
gene_2max = np.max( df.values.T[1] )
gene_max = gene_1max
if gene_max < gene_2max:
    gene_max = gene_2max
print(gene_max)

gene_gene_adj = sp.csr_matrix((gene_max, gene_max), dtype=np.int8)

a =  0

for line in reader:
    print(line)
    if a==0:
        a=1
        continue
    gene_gene_adj[int(line[0]), int(line[1])] = 1
print("hey")
print(gene_gene_adj)
# print(df)

<class 'numpy.ndarray'>
[114787 114787 114787 ...   5635   5636   8480]
110255169
['114785', '8331']
['114785', '8337']


  self._set_intXint(row, col, x.flat[0])


['114785', '8334']
['114785', '3054']
['114785', '171023']
['114785', '85235']
['114785', '1994']
['114785', '3013']
['114785', '7311']
['114785', '8930']
['114785', '7314']
['114785', '6233']
['114784', '25']
['114784', '4690']
['114784', '5335']
['114784', '2534']
['114784', '1398']
['114783', '7704']
['114783', '5980']
['114783', '5499']
['114783', '4609']
['221545', '55605']
['221545', '5347']
['221545', '1460']
['221545', '55294']
['114781', '9616']
['114781', '84893']
['114781', '121551']
['114781', '79791']
['114781', '339745']
['114781', '26260']
['114781', '51013']
['114781', '55964']
['114781', '5607']
['114781', '5887']
['114781', '129285']
['114781', '10130']
['114781', '26224']
['114781', '491']
['114781', '25841']
['114781', '8945']
['114781', '55294']
['114781', '11124']
['114781', '874']
['114781', '112936']
['114781', '114609']
['114781', '25827']
['114781', '26272']
['114781', '26270']
['114781', '90135']
['114781', '26232']
['114781', '162517']
['114781', '4738']
['1

In [None]:
print("1")
print(gene_gene_adj)
print("2")

In [None]:

###########################################################
#
# Settings and placeholders
#
###########################################################

flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_integer('neg_sample_size', 1, 'Negative sample size.')
flags.DEFINE_float('learning_rate', 0.001, 'Initial learning rate.')
flags.DEFINE_integer('epochs', 50, 'Number of epochs to train.')
flags.DEFINE_integer('hidden1', 64, 'Number of units in hidden layer 1.')
flags.DEFINE_integer('hidden2', 32, 'Number of units in hidden layer 2.')
flags.DEFINE_float('weight_decay', 0, 'Weight for L2 loss on embedding matrix.')
flags.DEFINE_float('dropout', 0.1, 'Dropout rate (1 - keep probability).')
flags.DEFINE_float('max_margin', 0.1, 'Max margin parameter in hinge loss')
flags.DEFINE_integer('batch_size', 512, 'minibatch size.')
flags.DEFINE_boolean('bias', True, 'Bias term.')
# Important -- Do not evaluate/print validation performance every iteration as it can take
# substantial amount of time
PRINT_PROGRESS_EVERY = 150

print("Defining placeholders")
placeholders = construct_placeholders(edge_types)
print(placeholders)
