In [34]:
import tensorflow as tf
import numpy as np
import math
import pandas as pd
from pylab import *
from data import *
from sklearn.model_selection import KFold
from sklearn.metrics import auc,roc_auc_score,precision_recall_curve

In [63]:
class NeuRanks():
    def __init__(self,               
                 drugs_num = None, #用户数
                 targets_num = None, #商品数
                 S_d = None, # 药物之间的相似度
                 S_t = None, # 靶之间的相似度
                 batch_size = 64, #batch大小
                 embedding_size = 64, # 嵌入空间维度
                 hidden_size = [32,16], #隐层节点数目
                 learning_rate = 1e-3, #学习率
                 lamda_regularizer = 1e-5, #正则项系数
                 lamda_regularizer_d = 0.1, #药相似度系数
                 lamda_regularizer_t = 0.1 #靶相似度系数
                ):
        self.drugs_num = drugs_num
        self.targets_num = targets_num
        self.S_d = S_d
        self.S_t = S_t
        self.batch_size = batch_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.learning_rate = learning_rate
        self.lamda_regularizer = lamda_regularizer
        self.lamda_regularizer_d= lamda_regularizer_d
        self.lamda_regularizer_t = lamda_regularizer_t

        # loss records
        self.train_loss_records = []   
        self.build_graph()    

        
    def build_graph(self):
        self.graph = tf.Graph()
        with self.graph.as_default():
            #tf.set_random_seed(-1)
            # _________ input data _________
            self.drugs_inputs = tf.placeholder(tf.int32, shape = [None], name='drugs_inputs')
            self.targets_inputs = tf.placeholder(tf.int32, shape = [None], name='targets_inputs')
            self.train_labels = tf.placeholder(tf.float32, shape = [None], name='train_labels') 
            self.S_d = tf.convert_to_tensor(self.S_d, tf.float32)
            self.S_t = tf.convert_to_tensor(self.S_t, tf.float32)
            
            # _________ variables _________
            self.weights = self._initialize_weights()
            
            # _________ train _____________
            self.y_ = self.inference(drugs_inputs=self.drugs_inputs, targets_inputs=self.targets_inputs)
            self.loss_train = self.loss_function(true_labels=self.train_labels, 
                                                 predicted_labels=tf.reshape(self.y_,shape=[-1]),
                                                 lamda_regularizer=self.lamda_regularizer,
                                                 lamda_regularizer_d = self.lamda_regularizer_d,
                                                 lamda_regularizer_t = self.lamda_regularizer_t)
            self.train_op = tf.train.AdamOptimizer(learning_rate=self.learning_rate,beta1=0.9, beta2=0.999, epsilon=1e-08).minimize(self.loss_train) 

            # _________ prediction _____________
            self.predictions = self.inference(drugs_inputs=self.drugs_inputs, targets_inputs=self.targets_inputs)
        
            #变量初始化 init
            self.saver = tf.train.Saver() #  
            init = tf.global_variables_initializer()
            self.sess = self._init_session()
            self.sess.run(init)
    
    
    def _init_session(self):
        # adaptively growing memory
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        return tf.Session(config=config)
    
    
    def _initialize_weights(self):
        all_weights = dict()

        # -----embedding layer------
        all_weights['embedding_drugs'] = tf.Variable(tf.random_normal([self.drugs_num, self.embedding_size], 0, 0.1),name='embedding_drugs')
        all_weights['embedding_targets'] = tf.Variable(tf.random_normal([self.targets_num, self.embedding_size], 0, 0.1),name='embedding_targets') 
        
        # ------hidden layer------
        all_weights['weight_0'] = tf.Variable(tf.random_normal([self.embedding_size,self.hidden_size[0]], 0.0, 0.1),name='weight_0')
        all_weights['bias_0'] = tf.Variable(tf.zeros([self.hidden_size[0]]), name='bias_0')
        all_weights['weight_1'] = tf.Variable(tf.random_normal([self.hidden_size[0],self.hidden_size[1]], 0.0, 0.1), name='weight_1')
        all_weights['bias_1'] = tf.Variable(tf.zeros([self.hidden_size[1]]), name='bias_1')
        #all_weights['weight_2'] = tf.Variable(tf.random_normal([self.hidden_size[1],self.hidden_size[-1]], 0.0, 0.1), name='weight_2')
        #all_weights['bias_2'] = tf.Variable(tf.zeros([self.hidden_size[-1]]), name='bias_2')
        
        # ------output layer-----
        all_weights['weight_n'] = tf.Variable(tf.random_normal([self.hidden_size[-1], 1], 0, 0.1), name='weight_n')
        all_weights['bias_n'] = tf.Variable(tf.zeros([1]), name='bias_n')

        return all_weights
        
    
    def train(self, data_sequence):
        train_size = len(data_sequence)
        np.random.shuffle(data_sequence)
        batch_size = self.batch_size
        total_batch = math.ceil(train_size/batch_size)

        for batch in range(total_batch):
            start = (batch*batch_size)% train_size
            end = min(start+batch_size, train_size)
            data_array = np.array(data_sequence[start:end])
            X = data_array[:,:2] # u,i
            y = data_array[:,-1] # label

            loss_val=self.fit(X=X, y=y)
            self.train_loss_records.append(loss_val)
            
        return self.train_loss_records

        
    # 网络的前向传播
    def inference(self, drugs_inputs, targets_inputs):
        embed_drugs = tf.reshape(tf.nn.embedding_lookup(self.weights['embedding_drugs'], drugs_inputs),
                                 shape=[-1, self.embedding_size])
        embed_targets = tf.reshape(tf.nn.embedding_lookup(self.weights['embedding_targets'], targets_inputs),
                                 shape=[-1, self.embedding_size])
            
        layer0 = tf.nn.relu(tf.matmul(embed_targets*embed_drugs, self.weights['weight_0']) + self.weights['bias_0'])
        layer1 = tf.nn.relu(tf.matmul(layer0, self.weights['weight_1']) + self.weights['bias_1']) 
        #layer2 = tf.nn.relu(tf.matmul(layer1, self.weights['weight_2']) + self.weights['bias_2'])  
        y_ = tf.matmul(layer1,self.weights['weight_n']) + self.weights['bias_n']
        return y_         
        
        
    def fit(self, X, y):
        # X: 输入数据
        # y: 输入标签
        feed_dict = {self.drugs_inputs: X[:,0], self.targets_inputs: X[:,1],self.train_labels:y}  
        loss, opt = self.sess.run([self.loss_train,self.train_op], feed_dict=feed_dict)
        return loss
        
        
    def loss_function(self, true_labels, predicted_labels,lamda_regularizer=1e-5, lamda_regularizer_d=1e-6, lamda_regularizer_t=1e-6):   
        rmse = tf.losses.mean_squared_error(true_labels, predicted_labels)
        regularizer_1 = tf.contrib.layers.l2_regularizer(lamda_regularizer)
        regularization_1 = regularizer_1(
            self.weights['embedding_drugs']) + regularizer_1(
            self.weights['embedding_targets'])+ regularizer_1(
            self.weights['weight_0']) + regularizer_1(
            self.weights['weight_1']) + regularizer_1(
            self.weights['weight_n'])
        
        drug1 = tf.reshape(tf.tile(self.weights['embedding_drugs'],(1,self.drugs_num)),shape=[-1, self.embedding_size])
        drug2 = tf.tile(self.weights['embedding_drugs'],(self.drugs_num,1))
        sim_d = tf.reshape(tf.reduce_sum(drug1*drug2, axis=1), shape=[-1,self.drugs_num])
        s_score = tf.reshape(sim_d, shape=[-1])
        s_true = tf.reshape(self.S_d, shape=[-1])
        regularization_2 = lamda_regularizer_d * tf.losses.mean_squared_error(s_true, s_score)
        
        target1 = tf.reshape(tf.tile(self.weights['embedding_targets'],(1,self.targets_num)),shape=[-1, self.embedding_size])
        target2 = tf.tile(self.weights['embedding_targets'],(self.targets_num,1))
        sim_t = tf.reshape(tf.reduce_sum(target1*target2, axis=1), shape=[-1,self.targets_num])
        s_score = tf.reshape(sim_t, shape=[-1])
        s_true = tf.reshape(self.S_t, shape=[-1])
        regularization_3 = lamda_regularizer_t * tf.losses.mean_squared_error(s_true, s_score)
        
        cost = rmse + regularization_1 + regularization_2 + regularization_3
        return cost   
        
        
    def evaluate(self, X, labels):
        drugs_inputs = X[:,0]
        targets_inputs = X[:,1]
        feed_dict = {self.drugs_inputs: drugs_inputs, self.targets_inputs: targets_inputs}  
        score = self.sess.run([self.predictions], feed_dict=feed_dict)       
        y_pred = np.reshape(score,(-1))
        
        auc_score = roc_auc_score(labels, y_pred)
        precision, recall, pr_thresholds = precision_recall_curve(labels, y_pred)
        aupr_score = auc(recall, precision)
        return auc_score, aupr_score

In [3]:
def generate_data(train_mat, sample_size=4):
    data = []
    drugs_num,targets_num = train_mat.shape
    for d in range(drugs_num):
        positive_targets = np.where(train_mat[d,:]>0)[0] #drug 中大于零的项
        
        for target0 in positive_targets:
            data.append([d,target0,1])
            i = 0
            while i<sample_size:
                target1 = np.random.randint(targets_num)
                if abs(train_mat[d,target1])<1e-6:
                    data.append([d,target1,0])
                    i = i+1
    return data

### train for drug-target pairs

**Enzymes:**

- AUC

embedding_size = 32 # 用户的嵌入空间维度

hidden_size = [16,8] #隐层节点数目

learning_rate = 0.005 #学习率

lamda_regularizer = 1e-5 #正则项系数

lamda_regularizer_d = 0.1 #正则项系数 

lamda_regularizer_t = 0.1 #正则项系数

if loss_records[-1] < 0.05 and len(mode_list) > 2 and mode_list[-3] > mode_list[-1] and mode_list[-3] > mode_list[-2]:

train(data_list=data_list, S_d=S_d, S_t=S_t, drugs_num=drugs_num, targets_num=targets_num, mode='auc')

- AUPR

embedding_size = 32 # 用户的嵌入空间维度

hidden_size = [16,16] #隐层节点数目

learning_rate = 0.0005 #学习率

lamda_regularizer = 1e-4 #正则项系数

lamda_regularizer_d = 1. #正则项系数 

lamda_regularizer_t = 1. #正则项系数 

if loss_records[-1] < 0.1 and len(mode_list) > 10 and mode_list[-3] > mode_list[-1] and mode_list[-3] > mode_list[-2]:

train(data_list=data_list, S_d=S_d, S_t=S_t, drugs_num=drugs_num, targets_num=targets_num, mode='aupr')

**Ion Channels:**

- AUC

embedding_size = 32 # 用户的嵌入空间维度

hidden_size = [16,8] #隐层节点数目

learning_rate = 0.0003 #学习率

lamda_regularizer = 1e-4 #正则项系数

lamda_regularizer_d = 0.01 #正则项系数 

lamda_regularizer_t = 0.01 #正则项系数

if loss_records[-1] < 0.04 and len(mode_list) > 10 and (mode_list[-2] < mode_list[-3] > mode_list[-1]):

train(data_list=data_list, S_d=S_d, S_t=S_t, drugs_num=drugs_num, targets_num=targets_num, mode='auc')

####################################################################################################

embedding_size = 32 # 用户的嵌入空间维度

hidden_size = [16,16] #隐层节点数目

learning_rate = 0.001 #学习率

lamda_regularizer = 1e-4 #正则项系数

lamda_regularizer_d = 1e-4 #正则项系数

lamda_regularizer_t = 1e-4 #正则项系数 

if loss_records[-1] < 0.03 and len(mode_list) > 10 and (mode_list[-2] < mode_list[-3] > mode_list[-1]):

train(data_list=data_list, S_d=S_d, S_t=S_t, drugs_num=drugs_num, targets_num=targets_num, mode='aupr')

**GPCRs:**

- AUC

embedding_size = 8 # 用户的嵌入空间维度

hidden_size = [4,4] #隐层节点数目

learning_rate = 0.001 #学习率

lamda_regularizer = 1e-4 #正则项系数

lamda_regularizer_d = 0.5 #正则项系数 

lamda_regularizer_t = 0.5 #正则项系数 

if loss_records[-1] < 0.04 and len(mode_list) > 10 and mode_list[-3] > mode_list[-1] and mode_list[-3] > mode_list[-2]:

train(data_list=data_list, S_d=S_d, S_t=S_t, drugs_num=drugs_num, targets_num=targets_num, mode='auc')

####################################################################################################

embedding_size = 16 # 用户的嵌入空间维度

hidden_size = [8,8] #隐层节点数目

learning_rate = 0.001 #学习率

lamda_regularizer = 1e-5 #正则项系数

lamda_regularizer_d = 0.1 #正则项系数 

lamda_regularizer_t = 0.1 #正则项系数

if loss_records[-1] < 0.02 and len(mode_list) > 10 and mode_list[-3] > mode_list[-1] and mode_list[-3] > mode_list[-2]:

train(data_list=data_list, S_d=S_d, S_t=S_t, drugs_num=drugs_num, targets_num=targets_num, mode='aupr')

####################################################################################################

embedding_size = 32 # 用户的嵌入空间维度

hidden_size = [16,16] #隐层节点数目

learning_rate = 0.001 #学习率

lamda_regularizer = 1e-3 #正则项系数

lamda_regularizer_d = 1.0 #正则项系数

lamda_regularizer_t = 1.0 #正则项系数

if loss_records[-1] < 0.13 and len(mode_list) > 10 and mode_list[-3] > mode_list[-1] and mode_list[-3] > mode_list[-2]:

train(data_list=data_list, S_d=S_d, S_t=S_t, drugs_num=drugs_num, targets_num=targets_num, mode='aupr')

####################################################################################################

embedding_size = 64 # 用户的嵌入空间维度

hidden_size = [32,32] #隐层节点数目

learning_rate = 0.001 #学习率

lamda_regularizer = 1e-4 #正则项系数

lamda_regularizer_d = 1e-1 #正则项系数

lamda_regularizer_t = 1e-1 #正则项系数

if loss_records[-1] < 0.03 and len(mode_list) > 10 and mode_list[-3] > mode_list[-1] and mode_list[-3] > mode_list[-2]:

train(data_list=data_list, S_d=S_d, S_t=S_t, drugs_num=drugs_num, targets_num=targets_num, mode='aupr')

- AUPR

embedding_size = 32 # 用户的嵌入空间维度

hidden_size = [16,16] #隐层节点数目

learning_rate = 0.001 #学习率 0.0005

lamda_regularizer = 1e-4 #正则项系数

lamda_regularizer_d = 0.1 #正则项系数 

lamda_regularizer_t = 0.1 #正则项系数 

if loss_records[-1] < 0.04 and len(mode_list) > 10 and mode_list[-3] > mode_list[-1] and mode_list[-3] > mode_list[-2]:

train(data_list=data_list, S_d=S_d, S_t=S_t, drugs_num=drugs_num, targets_num=targets_num, mode='aupr')

**Nuclear Receptors:**

- AUPR

embedding_size = 64 # 用户的嵌入空间维度

hidden_size = [32,16] #隐层节点数目

learning_rate = 0.005 #学习率

lamda_regularizer = 1e-5 #正则项系数

lamda_regularizer_d = 0.1 #正则项系数 

lamda_regularizer_t = 0.1 #正则项系数

sample_size = 4

epochs  = 256

STOP CONDITION: if loss_records[-1] < 0.01 and len(mode_list) > 3 and (mode_list[-2] < mode_list[-3] > mode_list[-1]):

train(data_list=data_list, S_d=S_d, S_t=S_t, drugs_num=drugs_num, targets_num=targets_num, mode='aupr')

In [57]:
def train(data_list, S_d, S_t, drugs_num, targets_num, mode='auc'):
    batch_size = 256 #batch大小
    embedding_size = 32 # 用户的嵌入空间维度
    hidden_size = [16, 8] #隐层节点数目
    learning_rate = 0.005 #学习率
    lamda_regularizer = 1e-5 #正则项系数
    lamda_regularizer_d = 0.1 #正则项系数 
    lamda_regularizer_t = 0.1 #正则项系数
    sample_size = 4
    epochs  = 16
    cv = 10
    
    # k折划分子集
    kf = KFold(n_splits=cv, shuffle=True)
    data_mat = sequence2mat(sequence=data_list, N=drugs_num, M=targets_num)
    
    instances_list = []
    [instances_list.append([d,t,data_mat[d,t]]) for d in range(drugs_num) for t in range(targets_num)]
    cv_auc_list, cv_aupr_list = [],[]
    for train_ids, test_ids in kf.split(instances_list):
        train_list = np.array(instances_list)[train_ids]
        test_list, test_labels = np.array(instances_list)[test_ids][:,:2], np.array(instances_list)[test_ids][:,-1]
        train_mat = sequence2mat(sequence=train_list, N=drugs_num, M=targets_num)# train data : user-item matrix
            
        #创建模型
        model = NeuRanks(drugs_num = drugs_num,
                         targets_num = targets_num,
                         S_d = S_d, 
                         S_t = S_t,
                         batch_size = batch_size,
                         embedding_size = embedding_size,
                         hidden_size = hidden_size,
                         learning_rate = learning_rate,
                         lamda_regularizer=lamda_regularizer,
                         lamda_regularizer_d = lamda_regularizer_d,
                         lamda_regularizer_t = lamda_regularizer_t)
        
        auc_score, aupr_score = model.evaluate(X=np.array(test_list), labels=test_labels)
        print('Init: AUC = %.4f, AUPR=%.4f' %(auc_score, aupr_score))
        
        auc_list, aupr_list = [],[]
        auc_list.append(auc_score)
        aupr_list.append(aupr_score)
        for epoch in range(epochs):
            data_sequence = generate_data(train_mat=train_mat, sample_size=sample_size)
            loss_records = model.train(data_sequence=data_sequence)
            auc_score, aupr_score = model.evaluate(X=np.array(test_list), labels=test_labels)
            auc_list.append(auc_score)
            aupr_list.append(aupr_score)
            print('epoch=%d, loss=%.4f, AUC=%.4f, AUPR=%.4f' %(epoch,loss_records[-1],auc_score, aupr_score))
                
            if mode=='auc':
                mode_list = auc_list
            else:
                mode_list = aupr_list

            if loss_records[-1] < 0.05 and len(mode_list) > 2 and mode_list[-3] > mode_list[-1] and mode_list[-3] > mode_list[-2]:
                cv_auc, cv_aupr = auc_list[-3], aupr_list[-3]
                break
            cv_auc, cv_aupr = auc_score, aupr_score
        cv_auc_list.append(cv_auc)
        cv_aupr_list.append(cv_aupr)
    
    print('AUC=%.4f, AUPR=%.4f' %(np.mean(cv_auc_list),np.mean(cv_aupr_list)))

In [60]:
if __name__ == '__main__':
    data_name = 'Enzyme'# Enzyme, Ion Channel, GPCR, Nuclear Receptor
    data_dir = 'datasets/'+ data_name + '.txt'
    drugs_num, targets_num, data_list, drug_ids_dict, target_ids_dict = load_data(file_dir=data_dir)
    print(data_name + ': N=%d, M=%d' %(drugs_num, targets_num))
    
    if data_name == 'Enzyme':
        dg_dir = 'datasets/e_simmat_dg.txt'
        dc_dir = 'datasets/e_simmat_dc.txt'
    elif data_name == 'Ion Channel':
        dg_dir = 'datasets/ic_simmat_dg.txt'
        dc_dir = 'datasets/ic_simmat_dc.txt'
    elif data_name == 'GPCR':
        dg_dir = 'datasets/gpcr_simmat_dg.txt'
        dc_dir = 'datasets/gpcr_simmat_dc.txt'
    elif data_name == 'Nuclear Receptor':
        dg_dir = 'datasets/nr_simmat_dg.txt'
        dc_dir = 'datasets/nr_simmat_dc.txt'
    
    data_dg = pd.read_table(dg_dir,sep="\t", header=0, index_col=0)
    data_dc = pd.read_table(dc_dir,sep="\t", header=0, index_col=0)
    S_d = data_dg.values
    S_t = data_dc.values
    
    for repeat in range(3):
        train(data_list=data_list, S_d=S_d, S_t=S_t, drugs_num=drugs_num, targets_num=targets_num, mode='auc')
        print('---------------------------------------------------------------------------------------------')

Enzyme: N=664, M=445




Init: AUC = 0.4933, AUPR=0.0102
epoch=0, loss=0.1639, AUC=0.7352, AUPR=0.0723
epoch=1, loss=0.0442, AUC=0.9132, AUPR=0.5875
epoch=2, loss=0.0213, AUC=0.9440, AUPR=0.6493
epoch=3, loss=0.0220, AUC=0.9216, AUPR=0.6621
epoch=4, loss=0.0104, AUC=0.8999, AUPR=0.6492
Init: AUC = 0.4711, AUPR=0.0091
epoch=0, loss=0.1691, AUC=0.8258, AUPR=0.1296
epoch=1, loss=0.0649, AUC=0.9182, AUPR=0.5497
epoch=2, loss=0.0683, AUC=0.9429, AUPR=0.6539
epoch=3, loss=0.0247, AUC=0.9414, AUPR=0.6529
epoch=4, loss=0.0160, AUC=0.9001, AUPR=0.6543
Init: AUC = 0.5047, AUPR=0.0095
epoch=0, loss=0.1419, AUC=0.7742, AUPR=0.1263
epoch=1, loss=0.0644, AUC=0.9191, AUPR=0.5702
epoch=2, loss=0.0404, AUC=0.9382, AUPR=0.6281
epoch=3, loss=0.0223, AUC=0.9279, AUPR=0.6412
epoch=4, loss=0.0207, AUC=0.9022, AUPR=0.6734
Init: AUC = 0.5258, AUPR=0.0100
epoch=0, loss=0.1501, AUC=0.8057, AUPR=0.0997
epoch=1, loss=0.0893, AUC=0.9299, AUPR=0.4860
epoch=2, loss=0.0571, AUC=0.9586, AUPR=0.6799
epoch=3, loss=0.0294, AUC=0.9497, AUPR=0.671

Init: AUC = 0.4796, AUPR=0.0088
epoch=0, loss=0.1300, AUC=0.7940, AUPR=0.1035
epoch=1, loss=0.0809, AUC=0.9296, AUPR=0.5884
epoch=2, loss=0.0328, AUC=0.9532, AUPR=0.6919
epoch=3, loss=0.0166, AUC=0.9200, AUPR=0.6908
epoch=4, loss=0.0258, AUC=0.8935, AUPR=0.6892
AUC=0.9475, AUPR=0.6734
---------------------------------------------------------------------------------------------



### train for new drugs

In [55]:
def train(data_list, S_d, S_t, drugs_num, targets_num, mode='auc'):
    batch_size = 256 #batch大小
    embedding_size = 64 # 用户的嵌入空间维度
    hidden_size = [32,32] #隐层节点数目
    learning_rate = 0.001 #学习率
    lamda_regularizer = 1e-4 #正则项系数
    lamda_regularizer_d = 1e-1 #正则项系数 
    lamda_regularizer_t = 1e-1 #正则项系数
    sample_size = 4
    epochs  = 256
    cv = 10
    
    # k折划分子集
    kf = KFold(n_splits=cv, shuffle=True)
    data_mat = sequence2mat(sequence=data_list, N=drugs_num, M=targets_num)
    
    cv_auc_list, cv_aupr_list = [],[]
    for train_ids, test_ids in kf.split(range(drugs_num)):
        instances_train = [[d,t,data_mat[d,t]] for d in train_ids for t in range(targets_num)]
        instances_test = [[d,t,data_mat[d,t]] for d in test_ids for t in range(targets_num)]
        
        train_list = np.array(instances_train)
        test_list, test_labels = np.array(instances_test)[:,:2], np.array(instances_test)[:,-1]
        train_mat = sequence2mat(sequence=train_list, N=drugs_num, M=targets_num)# train data : user-item matrix
            
        #创建模型
        model = NeuRanks(drugs_num = drugs_num,
                         targets_num = targets_num,
                         S_d = S_d, 
                         S_t = S_t,
                         batch_size = batch_size,
                         embedding_size = embedding_size,
                         hidden_size = hidden_size,
                         learning_rate = learning_rate,
                         lamda_regularizer=lamda_regularizer,
                         lamda_regularizer_d = lamda_regularizer_d,
                         lamda_regularizer_t = lamda_regularizer_t)
        
        auc_score, aupr_score = model.evaluate(X=np.array(test_list), labels=test_labels)
        print('Init: AUC = %.4f, AUPR=%.4f' %(auc_score, aupr_score))
        
        auc_list, aupr_list = [],[]
        auc_list.append(auc_score)
        aupr_list.append(aupr_score)
        for epoch in range(epochs):
            data_sequence = generate_data(train_mat=train_mat, sample_size=sample_size)
            loss_records = model.train(data_sequence=data_sequence)
            auc_score, aupr_score = model.evaluate(X=np.array(test_list), labels=test_labels)
            auc_list.append(auc_score)
            aupr_list.append(aupr_score)
            print('epoch=%d, loss=%.4f, AUC=%.4f, AUPR=%.4f' %(epoch,loss_records[-1],auc_score, aupr_score))
                
            if mode=='auc':
                mode_list = auc_list
            else:
                mode_list = aupr_list

            if loss_records[-1] < 0.03 and len(mode_list) > 10 and mode_list[-3] > mode_list[-1] and mode_list[-3] > mode_list[-2]:
                cv_auc, cv_aupr = auc_list[-3], aupr_list[-3]
                break
            cv_auc, cv_aupr = auc_score, aupr_score
        cv_auc_list.append(cv_auc)
        cv_aupr_list.append(cv_aupr)
    
    print('AUC=%.4f, AUPR=%.4f' %(np.mean(cv_auc_list),np.mean(cv_aupr_list)))

### train for new targets

In [71]:
def train(data_list, S_d, S_t, drugs_num, targets_num, mode='auc'):
    batch_size = 64 #batch大小
    embedding_size = 64 # 用户的嵌入空间维度
    hidden_size = [32,16] #隐层节点数目
    learning_rate = 1e-3 #学习率
    lamda_regularizer = 1e-5 #正则项系数
    lamda_regularizer_d = 1e-1 #正则项系数 
    lamda_regularizer_t = 1e-1 #正则项系数 
    sample_size = 4
    epochs  = 40
    cv = 10
    
    # k折划分子集
    kf = KFold(n_splits=cv, shuffle=True)
    data_mat = sequence2mat(sequence=data_list, N=drugs_num, M=targets_num)
    
    cv_auc_list, cv_aupr_list = [],[]
    for train_ids, test_ids in kf.split(range(targets_num)):
        instances_train = [[d,t,data_mat[d,t]] for t in train_ids for d in range(drugs_num)]
        instances_test = [[d,t,data_mat[d,t]] for t in test_ids for d in range(drugs_num)]
        
        train_list = np.array(instances_train)
        test_list, test_labels = np.array(instances_test)[:,:2], np.array(instances_test)[:,-1]
        train_mat = sequence2mat(sequence=train_list, N=drugs_num, M=targets_num)# train data : user-item matrix
            
        #创建模型
        model = NeuRanks(drugs_num = drugs_num,
                         targets_num = targets_num,
                         S_d = S_d, 
                         S_t = S_t,
                         batch_size = batch_size,
                         embedding_size = embedding_size,
                         hidden_size = hidden_size,
                         learning_rate = learning_rate,
                         lamda_regularizer=lamda_regularizer,
                         lamda_regularizer_d = lamda_regularizer_d,
                         lamda_regularizer_t = lamda_regularizer_t)
        auc_score, aupr_score = model.evaluate(X=np.array(test_list), labels=test_labels)
        print('Init: AUC = %.4f, AUPR=%.4f' %(auc_score, aupr_score))
        
        auc_list, aupr_list = [],[]
        auc_list.append(auc_score)
        aupr_list.append(aupr_score)
        for epoch in range(epochs):
            data_sequence = generate_data(train_mat=train_mat, sample_size=sample_size)
            loss_records = model.train(data_sequence=data_sequence)
            auc_score, aupr_score = model.evaluate(X=np.array(test_list), labels=test_labels)
            auc_list.append(auc_score)
            aupr_list.append(aupr_score)
            print('epoch=%d, loss=%.4f, AUC=%.4f, AUPR=%.4f' %(epoch,loss_records[-1],auc_score, aupr_score))
                
            if mode=='auc':
                mode_list = auc_list
            else:
                mode_list = aupr_list

            #if len(mode_list)>10 and mode_list[-2] < mode_list[-3] > mode_list[-1]:
             #   cv_auc, cv_aupr = auc_list[-3], aupr_list[-3]
              #  break
            cv_auc, cv_aupr = auc_score, aupr_score
        cv_auc_list.append(cv_auc)
        cv_aupr_list.append(cv_aupr)
    
    print('AUC=%.4f, AUPR=%.4f' %(np.mean(cv_auc_list),np.mean(cv_aupr_list)))