In [1]:
from __future__ import division
from __future__ import print_function
from operator import itemgetter
from itertools import combinations
import time
import os

import tensorflow.compat.v1 as tf

import numpy as np
import networkx as nx
import scipy.sparse as sp
from sklearn import metrics

from decagon.deep.optimizer import DecagonOptimizer
from decagon.deep.model import DecagonModel
from decagon.deep.minibatch import EdgeMinibatchIterator
from decagon.utility import rank_metrics, preprocessing

tf.disable_eager_execution()

########## scipy: version 1.9로 바꿔야함

#깔아야하는 packages
#networkx
#scikit-learn
#tensorflow

# Train on CPU (hide GPU) due to memory constraints
# os.environ['CUDA_VISIBLE_DEVICES'] = ""

# Train on GPU
os.environ["CUDA_DEVICE_ORDER"] = 'PCI_BUS_ID'
os.environ["CUDA_VISIBLE_DEVICES"] = '0'
config = tf.ConfigProto()
config.gpu_options.allow_growth = True

np.random.seed(0)

In [2]:

###########################################################
#
# Functions
#
###########################################################


def get_accuracy_scores(edges_pos, edges_neg, edge_type):
    feed_dict.update({placeholders['dropout']: 0})
    feed_dict.update({placeholders['batch_edge_type_idx']: minibatch.edge_type2idx[edge_type]})
    feed_dict.update({placeholders['batch_row_edge_type']: edge_type[0]})
    feed_dict.update({placeholders['batch_col_edge_type']: edge_type[1]})
    rec = sess.run(opt.predictions, feed_dict=feed_dict)

    def sigmoid(x):
        return 1. / (1 + np.exp(-x))

    # Predict on test set of edges
    preds = []
    actual = []
    predicted = []
    edge_ind = 0
    for u, v in edges_pos[edge_type[:2]][edge_type[2]]:
        score = sigmoid(rec[u, v])
        preds.append(score)
        assert adj_mats_orig[edge_type[:2]][edge_type[2]][u,v] == 1, 'Problem 1'

        actual.append(edge_ind)
        predicted.append((score, edge_ind))
        edge_ind += 1

    preds_neg = []
    for u, v in edges_neg[edge_type[:2]][edge_type[2]]:
        score = sigmoid(rec[u, v])
        preds_neg.append(score)
        assert adj_mats_orig[edge_type[:2]][edge_type[2]][u,v] == 0, 'Problem 0'

        predicted.append((score, edge_ind))
        edge_ind += 1

    preds_all = np.hstack([preds, preds_neg])
    preds_all = np.nan_to_num(preds_all)
    labels_all = np.hstack([np.ones(len(preds)), np.zeros(len(preds_neg))])
    predicted = list(zip(*sorted(predicted, reverse=True, key=itemgetter(0))))[1]

    roc_sc = metrics.roc_auc_score(labels_all, preds_all)
    aupr_sc = metrics.average_precision_score(labels_all, preds_all)
    apk_sc = rank_metrics.apk(actual, predicted, k=50)

    return roc_sc, aupr_sc, apk_sc


def construct_placeholders(edge_types):
    placeholders = {
        'batch': tf.placeholder(tf.int32, name='batch'),
        'batch_edge_type_idx': tf.placeholder(tf.int32, shape=(), name='batch_edge_type_idx'),
        'batch_row_edge_type': tf.placeholder(tf.int32, shape=(), name='batch_row_edge_type'),
        'batch_col_edge_type': tf.placeholder(tf.int32, shape=(), name='batch_col_edge_type'),
        'degrees': tf.placeholder(tf.int32),
        'dropout': tf.placeholder_with_default(0., shape=()),
    }
    placeholders.update({
        'adj_mats_%d,%d,%d' % (i, j, k): tf.sparse_placeholder(tf.float32)
        for i, j in edge_types for k in range(edge_types[i,j])})
    placeholders.update({
        'feat_%d' % i: tf.sparse_placeholder(tf.float32)
        for i, _ in edge_types})
    return placeholders

In [3]:

###########################################################
#
# Load and preprocess data (This is a dummy toy example!)
#
###########################################################

####
# The following code uses artificially generated and very small networks.
# Expect less than excellent performance as these random networks do not have any interesting structure.
# The purpose of main.py is to show how to use the code!
#
# All preprocessed datasets used in the drug combination study are at: http://snap.stanford.edu/decagon:
# (1) Download datasets from http://snap.stanford.edu/decagon to your local machine.
# (2) Replace dummy toy datasets used here with the actual datasets you just downloaded.
# (3) Train & test the model.
####

val_test_size = 0.05

In [4]:
import csv

# for line in reader:
    

In [5]:
import pandas as pd

In [6]:
gene_max = 0
drug_max = 0

df_ppi = pd.read_csv('/mnt/nas2/seogyeong/bio-decagon-ppi.csv', sep=',', header=0)
df_targets = pd.read_csv('/mnt/nas2/seogyeong/bio-decagon-targets-all.csv', sep=',', header=0)
df_combo = pd.read_csv('/mnt/nas2/seogyeong/bio-decagon-combo.csv', sep=',', header=0)

print(df_ppi.values.T[0].shape)
print(df_ppi.values.T[1].shape)
print(df_targets.values.T[1].shape)
temp = list(df_ppi.values.T[0])
temp.extend(list(df_ppi.values.T[1]))
temp.extend(list(df_targets.values.T[1]))

gene_set = set(temp)
# print(gene_set)


temp = list(df_combo.values.T[0])
temp.extend(list(df_combo.values.T[1]))
temp.extend(list(df_targets.values.T[0]))
drug_set = set(temp)
# print(drug_set)

print("numbers")
gene_n = len(gene_set)
drug_n = len(drug_set)
print(gene_n, drug_n)

print("max")
gene_max = max(gene_set)
drug_max = 0 
for drugs in drug_set:
    # print(drugs)
    # print(drugs[3:])
    if drug_max < int(drugs[3:]):
        drug_max = int(drugs[3:])
print(gene_max)
print(drug_max)



(715612,)
(715612,)
(131034,)
numbers
19122 2135
max
110255169
154677977


In [None]:

hashtable_gene = np.zeros(2* gene_n)
hashtable_drug = np.zeros(2* drug_n)

In [7]:
#make hash
count = 0
for genes in gene_set:
    n = genes % (2* gene_n)
    if count % 500 == 0:
        print(count, ", ",genes)
    count = count + 1
    while hashtable_gene[n] !=0:
        n = n+11
        if n >= 2* gene_n:
            n = n % 11
    hashtable_gene[n] = genes



0 ,  1
500 ,  6636
1000 ,  1233
1500 ,  1903
2000 ,  2651
2500 ,  3248
3000 ,  3890
3500 ,  4626
4000 ,  5241
4500 ,  5864
5000 ,  399694
5500 ,  7026
6000 ,  7620
6500 ,  8483
7000 ,  9087
7500 ,  9667
8000 ,  10211
8500 ,  10815
9000 ,  339559
9500 ,  79689
10000 ,  80312
10500 ,  114571
11000 ,  23066
11500 ,  51204
12000 ,  51725
12500 ,  85406
13000 ,  283643
13500 ,  284370
14000 ,  55626
14500 ,  23357
15000 ,  56903
15500 ,  57540
16000 ,  124056
16500 ,  26338
17000 ,  158293
17500 ,  126626
18000 ,  160857
18500 ,  129450
19000 ,  27333


In [8]:
print(hashtable_gene)

count = 0
for drugs in drug_set:
    
    if count % 500 == 0:
        print(count, ", ",drugs)
    count = count + 1
    
    n = int(drugs[3:]) % (2* drug_n)
    while hashtable_drug[n] !=0:
        n = n + 11
        if n >= 2* drug_n:
            n = n % 11
    hashtable_drug[n] = int(drugs[3:])


print(hashtable_drug)

[0. 1. 2. ... 0. 0. 0.]
0 ,  CID000065856
500 ,  CID000002232
1000 ,  CID000039764
1500 ,  CID100001775
2000 ,  CID006323490
[0.00000000e+00 1.00003355e+08 0.00000000e+00 ... 1.00003397e+08
 1.00024748e+08 1.00003399e+08]


In [9]:
def hash_gene(gene_id):
    n = gene_id % (2* gene_n)
    while hashtable_gene[n] != gene_id:
        n = n+11
        if n >= 2*gene_n:
            n = n % 11
    return n

def hash_drug(drug_id):
    n = int(drug_id[3:]) % (2* drug_n)
    while hashtable_drug[n] != int(drug_id[3:]):
        n = n+11
        if n >= 2* drug_n:
            n = n % 11
    return n

In [10]:
# protein-protein


gene_adj = sp.csr_matrix((2* gene_n, 2* gene_n), dtype=np.int8)

a =  0

f = open("/mnt/nas2/seogyeong/bio-decagon-ppi.csv")
reader = csv.reader(f)
count = 0

print(gene_adj)
print(gene_adj.shape)

for line in reader:
    #print(line)
    count = count + 1
    if a==0:
        a=1
        continue
    if count % 5000 == 0:
        print("count: ",count, ", ", line)
        break
        #여기지우기 꼭!
    gene_adj[hash_gene(int(line[0])), hash_gene(int(line[1]))] = 1
print("hey")
print(gene_adj)
gene_degrees = np.array(gene_adj.sum(axis=0)).squeeze()
print(gene_degrees)
# print(df)


(38244, 38244)


  self._set_intXint(row, col, x.flat[0])


count:  5000 ,  ['6790', '7324']
hey
  (57, 974)	1
  (57, 2795)	1
  (57, 3598)	1
  (57, 3914)	1
  (57, 4035)	1
  (57, 6272)	1
  (57, 6564)	1
  (57, 10482)	1
  (57, 10492)	1
  (57, 10988)	1
  (57, 23012)	1
  (63, 25)	1
  (63, 1398)	1
  (63, 2534)	1
  (63, 4690)	1
  (63, 5335)	1
  (64, 1994)	1
  (64, 3013)	1
  (64, 3054)	1
  (64, 3607)	1
  (64, 5928)	1
  (64, 6233)	1
  (64, 7311)	1
  (64, 7314)	1
  (64, 8314)	1
  :	:
  (36007, 5437)	1
  (36007, 5440)	1
  (36007, 5441)	1
  (36007, 6908)	1
  (36007, 6932)	1
  (36007, 9329)	1
  (36007, 9339)	1
  (36007, 9341)	1
  (36007, 9533)	1
  (36007, 10482)	1
  (36007, 10621)	1
  (36007, 10622)	1
  (36007, 11128)	1
  (36007, 11385)	1
  (36007, 12838)	1
  (36007, 13484)	1
  (36007, 15272)	1
  (36007, 15737)	1
  (36007, 17474)	1
  (36007, 17492)	1
  (36007, 17665)	1
  (36007, 18592)	1
  (36007, 20249)	1
  (36007, 23524)	1
  (36007, 27297)	1
[0 1 0 ... 0 0 0]


In [11]:
#protein-protein check
print("1")
print(gene_adj)
print("2")

f.close()

1
  (57, 974)	1
  (57, 2795)	1
  (57, 3598)	1
  (57, 3914)	1
  (57, 4035)	1
  (57, 6272)	1
  (57, 6564)	1
  (57, 10482)	1
  (57, 10492)	1
  (57, 10988)	1
  (57, 23012)	1
  (63, 25)	1
  (63, 1398)	1
  (63, 2534)	1
  (63, 4690)	1
  (63, 5335)	1
  (64, 1994)	1
  (64, 3013)	1
  (64, 3054)	1
  (64, 3607)	1
  (64, 5928)	1
  (64, 6233)	1
  (64, 7311)	1
  (64, 7314)	1
  (64, 8314)	1
  :	:
  (36007, 5437)	1
  (36007, 5440)	1
  (36007, 5441)	1
  (36007, 6908)	1
  (36007, 6932)	1
  (36007, 9329)	1
  (36007, 9339)	1
  (36007, 9341)	1
  (36007, 9533)	1
  (36007, 10482)	1
  (36007, 10621)	1
  (36007, 10622)	1
  (36007, 11128)	1
  (36007, 11385)	1
  (36007, 12838)	1
  (36007, 13484)	1
  (36007, 15272)	1
  (36007, 15737)	1
  (36007, 17474)	1
  (36007, 17492)	1
  (36007, 17665)	1
  (36007, 18592)	1
  (36007, 20249)	1
  (36007, 23524)	1
  (36007, 27297)	1
2


In [12]:

# gene_drug_adj = sp.csr_matrix((10 * np.random.randn(n_genes, n_drugs) > 15).astype(int))
# drug_gene_adj = gene_drug_adj.transpose(copy=True)

f = open("/mnt/nas2/seogyeong/bio-decagon-targets-all.csv")
reader = csv.reader(f)
gene_drug_adj = sp.csr_matrix((2* gene_n, 2* drug_n), dtype=np.int8)

a =  0
count = 0
for line in reader:
    #print(line)
    count = count + 1
    if a==0:
        a=1
        continue
    if count % 5000 == 0:
        print("count: ",count, ", ", line)
        break
        #여기지우기 꼭!
    gene_drug_adj[hash_gene(int(line[1])), hash_drug(line[0])] = 1
print("hey")
print(gene_drug_adj)
drug_gene_adj = gene_drug_adj.transpose(copy=True)
f.close()





count:  5000 ,  ['CID100003510', '1145']
hey
  (13, 271)	1
  (15, 271)	1
  (26, 271)	1
  (26, 2803)	1
  (30, 271)	1
  (31, 271)	1
  (32, 271)	1
  (34, 271)	1
  (35, 271)	1
  (36, 271)	1
  (37, 271)	1
  (38, 271)	1
  (39, 271)	1
  (48, 961)	1
  (49, 271)	1
  (50, 961)	1
  (54, 271)	1
  (58, 271)	1
  (59, 271)	1
  (60, 271)	1
  (69, 271)	1
  (69, 961)	1
  (70, 271)	1
  (71, 271)	1
  (72, 271)	1
  :	:
  (36574, 1184)	1
  (36581, 271)	1
  (36691, 271)	1
  (36732, 271)	1
  (36799, 271)	1
  (36898, 271)	1
  (36917, 271)	1
  (36940, 271)	1
  (36957, 271)	1
  (36967, 271)	1
  (36969, 271)	1
  (36974, 271)	1
  (36981, 271)	1
  (36993, 271)	1
  (37010, 271)	1
  (37114, 271)	1
  (37124, 271)	1
  (37124, 1498)	1
  (37124, 2072)	1
  (37187, 271)	1
  (37347, 271)	1
  (37538, 271)	1
  (37624, 271)	1
  (37685, 271)	1
  (37753, 271)	1


In [13]:
#drug_gene check
print("1")
print(gene_drug_adj)
print("2")

1
  (13, 271)	1
  (15, 271)	1
  (26, 271)	1
  (26, 2803)	1
  (30, 271)	1
  (31, 271)	1
  (32, 271)	1
  (34, 271)	1
  (35, 271)	1
  (36, 271)	1
  (37, 271)	1
  (38, 271)	1
  (39, 271)	1
  (48, 961)	1
  (49, 271)	1
  (50, 961)	1
  (54, 271)	1
  (58, 271)	1
  (59, 271)	1
  (60, 271)	1
  (69, 271)	1
  (69, 961)	1
  (70, 271)	1
  (71, 271)	1
  (72, 271)	1
  :	:
  (36574, 1184)	1
  (36581, 271)	1
  (36691, 271)	1
  (36732, 271)	1
  (36799, 271)	1
  (36898, 271)	1
  (36917, 271)	1
  (36940, 271)	1
  (36957, 271)	1
  (36967, 271)	1
  (36969, 271)	1
  (36974, 271)	1
  (36981, 271)	1
  (36993, 271)	1
  (37010, 271)	1
  (37114, 271)	1
  (37124, 271)	1
  (37124, 1498)	1
  (37124, 2072)	1
  (37187, 271)	1
  (37347, 271)	1
  (37538, 271)	1
  (37624, 271)	1
  (37685, 271)	1
  (37753, 271)	1
2


In [14]:
# drug_drug check


#drug_drug_adj_list = []
#tmp = np.dot(drug_gene_adj, gene_drug_adj)
#for i in range(n_drugdrug_rel_types):
#    mat = np.zeros((n_drugs, n_drugs))
#    for d1, d2 in combinations(list(range(n_drugs)), 2):
#        if tmp[d1, d2] == i + 4:
#            mat[d1, d2] = mat[d2, d1] = 1.
#    drug_drug_adj_list.append(sp.csr_matrix(mat))
#    print("hmm")
#    print(drug_drug_adj_list)
# drug_degrees_list = [np.array(drug_adj.sum(axis=0)).squeeze() for drug_adj in drug_drug_adj_list]


drug_drug_adj_list = []

# category
df = pd.read_csv('/mnt/nas2/seogyeong/bio-decagon-effectcategories.csv', sep=',', header=0)
print(type(df.values))
print(df.values.T[0])
category = df.values.T[0]

category_count = 0

for cat in category:
    print("category_count: ", category_count, ", ", cat)
    gene_drug_adj = sp.csr_matrix((2* drug_n, 2* drug_n), dtype=np.int8)
    f = open("/mnt/nas2/seogyeong/bio-decagon-combo.csv")
    reader = csv.reader(f)
    category_count = category_count + 1
    for line in reader:
        if line[2] == cat:
            gene_drug_adj[hash_drug(line[0]), hash_drug(line[1]) ] = 1
            gene_drug_adj[hash_drug(line[1]), hash_drug(line[0]) ] = 1
    drug_drug_adj_list.append(gene_drug_adj)
    f.close()
    
    if category_count == 10:
        break
        #여기지우기 꼭!
drug_degrees_list = [np.array(drug_adj.sum(axis=0)).squeeze() for drug_adj in drug_drug_adj_list]

print(drug_drug_adj_list)
print("GHHHHHHHHHHHHHHHHH")
print(drug_degrees_list)


<class 'numpy.ndarray'>
['C0017152' 'C0027858' 'C0041466' 'C0032807' 'C0033860' 'C0259749'
 'C0349231' 'C0027726' 'C0024437' 'C0024312' 'C0020635' 'C0002878'
 'C0003873' 'C0003872' 'C0002871' 'C0030446' 'C0002874' 'C0029429'
 'C0019693' 'C0038013' 'C0018916' 'C0038019' 'C0008449' 'C0003486'
 'C0009768' 'C0004030' 'C0002170' 'C0009763' 'C0031350' 'C0476089'
 'C1527407' 'C0017658' 'C0013295' 'C0013298' 'C0000889' 'C0038454'
 'C0019284' 'C0017574' 'C0011636' 'C0035258' 'C0011633' 'C0027051'
 'C0007642' 'C0025290' 'C0025297' 'C0042594' 'C0010043' 'C0242225'
 'C0034951' 'C0021345' 'C0025362' 'C0019212' 'C0019196' 'C0034494'
 'C0014130' 'C0023798' 'C0006271' 'C0040262' 'C0011849' 'C0006277'
 'C0016053' 'C0040558' 'C0020565' 'C0020445' 'C0023646' 'C0027086'
 'C0014553' 'C0014550' 'C0162529' 'C0022568' 'C0018099' 'C0022603'
 'C0034931' 'C0242172' 'C0034072' 'C0149756' 'C0031538' 'C0155540'
 'C0023891' 'C0023890' 'C0023892' 'C0023895' 'C0030486' 'C0022665'
 'C0014038' 'C0022667' 'C0007570' 'C00

  self._set_intXint(row, col, x.flat[0])


category_count:  1 ,  C0027858
category_count:  2 ,  C0041466
category_count:  3 ,  C0032807
category_count:  4 ,  C0033860
category_count:  5 ,  C0259749
category_count:  6 ,  C0349231
category_count:  7 ,  C0027726
category_count:  8 ,  C0024437
category_count:  9 ,  C0024312
[<4270x4270 sparse matrix of type '<class 'numpy.int8'>'
	with 22140 stored elements in Compressed Sparse Row format>, <4270x4270 sparse matrix of type '<class 'numpy.int8'>'
	with 866 stored elements in Compressed Sparse Row format>, <4270x4270 sparse matrix of type '<class 'numpy.int8'>'
	with 20 stored elements in Compressed Sparse Row format>, <4270x4270 sparse matrix of type '<class 'numpy.int8'>'
	with 416 stored elements in Compressed Sparse Row format>, <4270x4270 sparse matrix of type '<class 'numpy.int8'>'
	with 7712 stored elements in Compressed Sparse Row format>, <4270x4270 sparse matrix of type '<class 'numpy.int8'>'
	with 1500 stored elements in Compressed Sparse Row format>, <4270x4270 sparse mat

In [15]:
# data representation
adj_mats_orig = {
    (0, 0): [gene_adj, gene_adj.transpose(copy=True)],
    (0, 1): [gene_drug_adj],
    (1, 0): [drug_gene_adj],
    (1, 1): drug_drug_adj_list + [x.transpose(copy=True) for x in drug_drug_adj_list],
}


degrees = {
    0: [gene_degrees, gene_degrees],
    1: drug_degrees_list + drug_degrees_list,
}


In [16]:

# featureless (genes)
gene_feat = sp.identity(gene_max)
gene_nonzero_feat, gene_num_feat = gene_feat.shape
gene_feat = preprocessing.sparse_to_tuple(gene_feat.tocoo())

# features (drugs)
drug_feat = sp.identity(drug_max)
drug_nonzero_feat, drug_num_feat = drug_feat.shape
drug_feat = preprocessing.sparse_to_tuple(drug_feat.tocoo())

# data representation
num_feat = {
    0: gene_num_feat,
    1: drug_num_feat,
}
nonzero_feat = {
    0: gene_nonzero_feat,
    1: drug_nonzero_feat,
}
feat = {
    0: gene_feat,
    1: drug_feat,
}

edge_type2dim = {k: [adj.shape for adj in adjs] for k, adjs in adj_mats_orig.items()}
edge_type2decoder = {
    (0, 0): 'bilinear',
    (0, 1): 'bilinear',
    (1, 0): 'bilinear',
    (1, 1): 'dedicom',
}
print(adj_mats_orig)
edge_types = {k: len(v) for k, v in adj_mats_orig.items()}
print(edge_types)
num_edge_types = sum(edge_types.values())
print("Edge types:", "%d" % num_edge_types)


{(0, 0): [<38244x38244 sparse matrix of type '<class 'numpy.int8'>'
	with 4998 stored elements in Compressed Sparse Row format>, <38244x38244 sparse matrix of type '<class 'numpy.int8'>'
	with 4998 stored elements in Compressed Sparse Column format>], (0, 1): [<4270x4270 sparse matrix of type '<class 'numpy.int8'>'
	with 4654 stored elements in Compressed Sparse Row format>], (1, 0): [<4270x38244 sparse matrix of type '<class 'numpy.int8'>'
	with 4998 stored elements in Compressed Sparse Column format>], (1, 1): [<4270x4270 sparse matrix of type '<class 'numpy.int8'>'
	with 22140 stored elements in Compressed Sparse Row format>, <4270x4270 sparse matrix of type '<class 'numpy.int8'>'
	with 866 stored elements in Compressed Sparse Row format>, <4270x4270 sparse matrix of type '<class 'numpy.int8'>'
	with 20 stored elements in Compressed Sparse Row format>, <4270x4270 sparse matrix of type '<class 'numpy.int8'>'
	with 416 stored elements in Compressed Sparse Row format>, <4270x4270 spars

In [17]:

###########################################################
#
# Settings and placeholders
#
###########################################################

flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_integer('neg_sample_size', 1, 'Negative sample size.')
flags.DEFINE_float('learning_rate', 0.001, 'Initial learning rate.')
flags.DEFINE_integer('epochs', 50, 'Number of epochs to train.')
flags.DEFINE_integer('hidden1', 64, 'Number of units in hidden layer 1.')
flags.DEFINE_integer('hidden2', 32, 'Number of units in hidden layer 2.')
flags.DEFINE_float('weight_decay', 0, 'Weight for L2 loss on embedding matrix.')
flags.DEFINE_float('dropout', 0.1, 'Dropout rate (1 - keep probability).')
flags.DEFINE_float('max_margin', 0.1, 'Max margin parameter in hinge loss')
flags.DEFINE_integer('batch_size', 512, 'minibatch size.')
flags.DEFINE_boolean('bias', True, 'Bias term.')
flags.DEFINE_string("f", "", "kernel")
# Important -- Do not evaluate/print validation performance every iteration as it can take
# substantial amount of time
PRINT_PROGRESS_EVERY = 150

print("Defining placeholders")
placeholders = construct_placeholders(edge_types)
print(placeholders)


Defining placeholders
{'batch': <tf.Tensor 'batch:0' shape=<unknown> dtype=int32>, 'batch_edge_type_idx': <tf.Tensor 'batch_edge_type_idx:0' shape=() dtype=int32>, 'batch_row_edge_type': <tf.Tensor 'batch_row_edge_type:0' shape=() dtype=int32>, 'batch_col_edge_type': <tf.Tensor 'batch_col_edge_type:0' shape=() dtype=int32>, 'degrees': <tf.Tensor 'Placeholder:0' shape=<unknown> dtype=int32>, 'dropout': <tf.Tensor 'PlaceholderWithDefault:0' shape=() dtype=float32>, 'adj_mats_0,0,0': <tensorflow.python.framework.sparse_tensor.SparseTensor object at 0x7f95f9b49b20>, 'adj_mats_0,0,1': <tensorflow.python.framework.sparse_tensor.SparseTensor object at 0x7f95f9b490a0>, 'adj_mats_0,1,0': <tensorflow.python.framework.sparse_tensor.SparseTensor object at 0x7f95f9bb0490>, 'adj_mats_1,0,0': <tensorflow.python.framework.sparse_tensor.SparseTensor object at 0x7f95f9bb0520>, 'adj_mats_1,1,0': <tensorflow.python.framework.sparse_tensor.SparseTensor object at 0x7f95f9bb77f0>, 'adj_mats_1,1,1': <tensorfl

In [18]:

###########################################################
#
# Create minibatch iterator, model and optimizer
#
###########################################################

# I changed decagon/deep/minibatch code (def mask_test_edges)


print("Create minibatch iterator")
print("adj_mats_orig")
print(adj_mats_orig)
print("feat")
print(feat)
print("edge_types")
print("FLAGS.batch_size")
print(FLAGS.batch_size)
print("val_test_size")
print(val_test_size)

minibatch = EdgeMinibatchIterator(
    adj_mats=adj_mats_orig,
    feat=feat,
    edge_types=edge_types,
    batch_size=FLAGS.batch_size,
    val_test_size=val_test_size
)

print("Create model")
model = DecagonModel(
    placeholders=placeholders,
    num_feat=num_feat,
    nonzero_feat=nonzero_feat,
    edge_types=edge_types,
    decoders=edge_type2decoder,
)

print("Create optimizer")
with tf.name_scope('optimizer'):
    opt = DecagonOptimizer(
        embeddings=model.embeddings,
        latent_inters=model.latent_inters,
        latent_varies=model.latent_varies,
        degrees=degrees,
        edge_types=edge_types,
        edge_type2dim=edge_type2dim,
        placeholders=placeholders,
        batch_size=FLAGS.batch_size,
        margin=FLAGS.max_margin
    )

print("Initialize session")
sess = tf.Session()
sess.run(tf.global_variables_initializer())
feed_dict = {}


Create minibatch iterator
Create minibatch iterator
adj_mats_orig
{(0, 0): [<38244x38244 sparse matrix of type '<class 'numpy.int8'>'
	with 4998 stored elements in Compressed Sparse Row format>, <38244x38244 sparse matrix of type '<class 'numpy.int8'>'
	with 4998 stored elements in Compressed Sparse Column format>], (0, 1): [<4270x4270 sparse matrix of type '<class 'numpy.int8'>'
	with 4654 stored elements in Compressed Sparse Row format>], (1, 0): [<4270x38244 sparse matrix of type '<class 'numpy.int8'>'
	with 4998 stored elements in Compressed Sparse Column format>], (1, 1): [<4270x4270 sparse matrix of type '<class 'numpy.int8'>'
	with 22140 stored elements in Compressed Sparse Row format>, <4270x4270 sparse matrix of type '<class 'numpy.int8'>'
	with 866 stored elements in Compressed Sparse Row format>, <4270x4270 sparse matrix of type '<class 'numpy.int8'>'
	with 20 stored elements in Compressed Sparse Row format>, <4270x4270 sparse matrix of type '<class 'numpy.int8'>'
	with 416 

  rowdegree_mat_inv = sp.diags(np.nan_to_num(np.power(rowsum, -0.5)).flatten())
  coldegree_mat_inv = sp.diags(np.nan_to_num(np.power(colsum, -0.5)).flatten())


Constructing test edges= 1000/1107
Constructing val edges= 0000/1107
Constructing val edges= 1000/1107
Train edges= 19926
Val edges= 1107
Test edges= 1107
k:  1
Minibatch edge type: (1, 1, 1)
Constructing test edges= 0000/0043
Constructing val edges= 0000/0043
Train edges= 0780
Val edges= 0043
Test edges= 0043
k:  2
Minibatch edge type: (1, 1, 2)
Constructing test edges= 0000/0010
Constructing val edges= 0000/0010
Train edges= 0000
Val edges= 0010
Test edges= 0010
k:  3
Minibatch edge type: (1, 1, 3)
Constructing test edges= 0000/0020
Constructing val edges= 0000/0020
Train edges= 0376
Val edges= 0020
Test edges= 0020
k:  4
Minibatch edge type: (1, 1, 4)
Constructing test edges= 0000/0385
Constructing val edges= 0000/0385
Train edges= 6942
Val edges= 0385
Test edges= 0385
k:  5
Minibatch edge type: (1, 1, 5)
Constructing test edges= 0000/0075
Constructing val edges= 0000/0075
Train edges= 1350
Val edges= 0075
Test edges= 0075
k:  6
Minibatch edge type: (1, 1, 6)
Constructing test edges



Initialize session


2022-08-30 06:15:03.735886: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-08-30 06:15:03.742516: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.
2022-08-30 06:16:58.397652: W tensorflow/core/common_runtime/bfc_allocator.cc:462] Allocator (mklcpu) ran out of memory trying to allocate 36.88GiB (rounded to 39597562112)requested by op decagonmodel/graphconvolutionsparsemulti_4_vars/weights_4/Adam/Initializer/zeros
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows

ResourceExhaustedError: Graph execution error:

Detected at node 'decagonmodel/graphconvolutionsparsemulti_4_vars/random_uniform_12/RandomUniform' defined at (most recent call last):
    File "/root/anaconda3/lib/python3.9/runpy.py", line 197, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/root/anaconda3/lib/python3.9/runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "/root/anaconda3/lib/python3.9/site-packages/ipykernel_launcher.py", line 16, in <module>
      app.launch_new_instance()
    File "/root/anaconda3/lib/python3.9/site-packages/traitlets/config/application.py", line 846, in launch_instance
      app.start()
    File "/root/anaconda3/lib/python3.9/site-packages/ipykernel/kernelapp.py", line 677, in start
      self.io_loop.start()
    File "/root/anaconda3/lib/python3.9/site-packages/tornado/platform/asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "/root/anaconda3/lib/python3.9/asyncio/base_events.py", line 601, in run_forever
      self._run_once()
    File "/root/anaconda3/lib/python3.9/asyncio/base_events.py", line 1905, in _run_once
      handle._run()
    File "/root/anaconda3/lib/python3.9/asyncio/events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "/root/anaconda3/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 471, in dispatch_queue
      await self.process_one()
    File "/root/anaconda3/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 460, in process_one
      await dispatch(*args)
    File "/root/anaconda3/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 367, in dispatch_shell
      await result
    File "/root/anaconda3/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 662, in execute_request
      reply_content = await reply_content
    File "/root/anaconda3/lib/python3.9/site-packages/ipykernel/ipkernel.py", line 360, in do_execute
      res = shell.run_cell(code, store_history=store_history, silent=silent)
    File "/root/anaconda3/lib/python3.9/site-packages/ipykernel/zmqshell.py", line 532, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/root/anaconda3/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 2881, in run_cell
      result = self._run_cell(
    File "/root/anaconda3/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 2936, in _run_cell
      return runner(coro)
    File "/root/anaconda3/lib/python3.9/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/root/anaconda3/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3135, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/root/anaconda3/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3338, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/root/anaconda3/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3398, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/tmp/ipykernel_19239/2623286357.py", line 32, in <cell line: 32>
      model = DecagonModel(
    File "/root/decagon/decagon/deep/model.py", line 62, in __init__
      self.build()
    File "/root/decagon/decagon/deep/model.py", line 36, in build
      self._build()
    File "/root/decagon/decagon/deep/model.py", line 67, in _build
      self.hidden1[i].append(GraphConvolutionSparseMulti(
    File "/root/decagon/decagon/deep/layers.py", line 82, in __init__
      self.vars['weights_%d' % k] = inits.weight_variable_glorot(
    File "/root/decagon/decagon/deep/inits.py", line 10, in weight_variable_glorot
      initial = tf.random_uniform([input_dim, output_dim], minval=-init_range,
Node: 'decagonmodel/graphconvolutionsparsemulti_4_vars/random_uniform_12/RandomUniform'
OOM when allocating tensor with shape[154677977,64] and type float on /job:localhost/replica:0/task:0/device:CPU:0 by allocator mklcpu
	 [[{{node decagonmodel/graphconvolutionsparsemulti_4_vars/random_uniform_12/RandomUniform}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.

Original stack trace for 'decagonmodel/graphconvolutionsparsemulti_4_vars/random_uniform_12/RandomUniform':
  File "/root/anaconda3/lib/python3.9/runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/root/anaconda3/lib/python3.9/runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "/root/anaconda3/lib/python3.9/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/root/anaconda3/lib/python3.9/site-packages/traitlets/config/application.py", line 846, in launch_instance
    app.start()
  File "/root/anaconda3/lib/python3.9/site-packages/ipykernel/kernelapp.py", line 677, in start
    self.io_loop.start()
  File "/root/anaconda3/lib/python3.9/site-packages/tornado/platform/asyncio.py", line 199, in start
    self.asyncio_loop.run_forever()
  File "/root/anaconda3/lib/python3.9/asyncio/base_events.py", line 601, in run_forever
    self._run_once()
  File "/root/anaconda3/lib/python3.9/asyncio/base_events.py", line 1905, in _run_once
    handle._run()
  File "/root/anaconda3/lib/python3.9/asyncio/events.py", line 80, in _run
    self._context.run(self._callback, *self._args)
  File "/root/anaconda3/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 471, in dispatch_queue
    await self.process_one()
  File "/root/anaconda3/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 460, in process_one
    await dispatch(*args)
  File "/root/anaconda3/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 367, in dispatch_shell
    await result
  File "/root/anaconda3/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 662, in execute_request
    reply_content = await reply_content
  File "/root/anaconda3/lib/python3.9/site-packages/ipykernel/ipkernel.py", line 360, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/root/anaconda3/lib/python3.9/site-packages/ipykernel/zmqshell.py", line 532, in run_cell
    return super().run_cell(*args, **kwargs)
  File "/root/anaconda3/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 2881, in run_cell
    result = self._run_cell(
  File "/root/anaconda3/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 2936, in _run_cell
    return runner(coro)
  File "/root/anaconda3/lib/python3.9/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
    coro.send(None)
  File "/root/anaconda3/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3135, in run_cell_async
    has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
  File "/root/anaconda3/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3338, in run_ast_nodes
    if await self.run_code(code, result, async_=asy):
  File "/root/anaconda3/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3398, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_19239/2623286357.py", line 32, in <cell line: 32>
    model = DecagonModel(
  File "/root/decagon/decagon/deep/model.py", line 62, in __init__
    self.build()
  File "/root/decagon/decagon/deep/model.py", line 36, in build
    self._build()
  File "/root/decagon/decagon/deep/model.py", line 67, in _build
    self.hidden1[i].append(GraphConvolutionSparseMulti(
  File "/root/decagon/decagon/deep/layers.py", line 82, in __init__
    self.vars['weights_%d' % k] = inits.weight_variable_glorot(
  File "/root/decagon/decagon/deep/inits.py", line 10, in weight_variable_glorot
    initial = tf.random_uniform([input_dim, output_dim], minval=-init_range,
  File "/root/anaconda3/lib/python3.9/site-packages/tensorflow/python/util/traceback_utils.py", line 150, in error_handler
    return fn(*args, **kwargs)
  File "/root/anaconda3/lib/python3.9/site-packages/tensorflow/python/util/dispatch.py", line 1082, in op_dispatch_handler
    return dispatch_target(*args, **kwargs)
  File "/root/anaconda3/lib/python3.9/site-packages/tensorflow/python/ops/random_ops.py", line 309, in random_uniform
    result = gen_random_ops.random_uniform(
  File "/root/anaconda3/lib/python3.9/site-packages/tensorflow/python/ops/gen_random_ops.py", line 737, in random_uniform
    _, _, _op, _outputs = _op_def_library._apply_op_helper(
  File "/root/anaconda3/lib/python3.9/site-packages/tensorflow/python/framework/op_def_library.py", line 740, in _apply_op_helper
    op = g._create_op_internal(op_type_name, inputs, dtypes=None,
  File "/root/anaconda3/lib/python3.9/site-packages/tensorflow/python/framework/ops.py", line 3776, in _create_op_internal
    ret = Operation(
  File "/root/anaconda3/lib/python3.9/site-packages/tensorflow/python/framework/ops.py", line 2175, in __init__
    self._traceback = tf_stack.extract_stack_for_node(self._c_op)


In [None]:

###########################################################
#
# Train model
#
###########################################################

print("Train model")
for epoch in range(FLAGS.epochs):

    minibatch.shuffle()
    itr = 0
    while not minibatch.end():
        # Construct feed dictionary
        feed_dict = minibatch.next_minibatch_feed_dict(placeholders=placeholders)
        feed_dict = minibatch.update_feed_dict(
            feed_dict=feed_dict,
            dropout=FLAGS.dropout,
            placeholders=placeholders)

        t = time.time()

        # Training step: run single weight update
        outs = sess.run([opt.opt_op, opt.cost, opt.batch_edge_type_idx], feed_dict=feed_dict)
        train_cost = outs[1]
        batch_edge_type = outs[2]

        if itr % PRINT_PROGRESS_EVERY == 0:
            val_auc, val_auprc, val_apk = get_accuracy_scores(
                minibatch.val_edges, minibatch.val_edges_false,
                minibatch.idx2edge_type[minibatch.current_edge_type_idx])

            print("Epoch:", "%04d" % (epoch + 1), "Iter:", "%04d" % (itr + 1), "Edge:", "%04d" % batch_edge_type,
                  "train_loss=", "{:.5f}".format(train_cost),
                  "val_roc=", "{:.5f}".format(val_auc), "val_auprc=", "{:.5f}".format(val_auprc),
                  "val_apk=", "{:.5f}".format(val_apk), "time=", "{:.5f}".format(time.time() - t))

        itr += 1

print("Optimization finished!")

for et in range(num_edge_types):
    roc_score, auprc_score, apk_score = get_accuracy_scores(
        minibatch.test_edges, minibatch.test_edges_false, minibatch.idx2edge_type[et])
    print("Edge type=", "[%02d, %02d, %02d]" % minibatch.idx2edge_type[et])
    print("Edge type:", "%04d" % et, "Test AUROC score", "{:.5f}".format(roc_score))
    print("Edge type:", "%04d" % et, "Test AUPRC score", "{:.5f}".format(auprc_score))
    print("Edge type:", "%04d" % et, "Test AP@k score", "{:.5f}".format(apk_score))
    print()
