In [1]:
#!/usr/bin/env python

import pandas as pd
import networkx as nx
from networkx.algorithms.bipartite.matrix import biadjacency_matrix
import numpy as np
from sklearn.metrics import precision_recall_curve, auc
from sklearn.preprocessing import normalize
import random
from sklearn import metrics
import time
from sklearn.decomposition import NMF
from scipy import sparse
# from libnmf.gnmf import GNMF

import numpy.linalg as LA

from scipy import stats

import matplotlib.pyplot as plt
# import math
from scipy.linalg import inv

# from sklearn.metrics import confusion_matrix

from sklearn.linear_model import LinearRegression

# import seaborn as sns
from scipy import optimize
# from sklearn.metrics import r2_score
# from scipy.interpolate import make_interp_spline
# from sklearn.feature_selection import chi2
# import loess.loess_1d as l1d

# import tensorflow as tf

import multiprocessing
from joblib import Parallel, delayed
from math import sqrt
from sklearn.utils import parallel_backend

from itertools import product

from sklearn.metrics import roc_auc_score
from scipy.spatial.distance import cdist

random.seed(1949) # for dataset split
np.random.seed(1949) # for matrix initialization

In [2]:
def option(str):
    global methodOption
    methodOption = str

In [3]:
def GRNMF(bipart_graph, component, WMK, lmd, max_iter, tolerance=1/1000000):
    np.random.seed(1949)
    random.seed(1949)

    #####
    # bipart_graph: bipartite graph X
    # component: number of latent feature
    # WMK: weight matrix kernel
    # lmd: regulization parameter
    # max_iter: maximum iteration of GNMF

    W = WMK.copy()
    X = bipart_graph.copy()
    m, n = X.shape
    k = component
 
    D = np.matrix(np.diag(np.asarray(W.copy()).sum(axis=1)))
    L = D.copy() - W.copy()

    # Initialize U & V

    U = np.random.random((m, k))
    V = np.random.random((n, k))

    # Updating U V
    eps = 2**-8

    term1 = LA.norm(X - np.dot(U, V.T))**2
    term2 = lmd * np.trace(np.dot(np.dot(V.T, L), V))
    Obj0 = term1 + term2
    Obj1 = Obj0


    for i in range(max_iter):
        XV = np.dot(X, V)
        UVtV = np.dot(np.dot(U, V.T), V) + eps

        U *= XV
        U /= UVtV
        
        XtU_lmdWV = np.dot(X.T, U) + lmd*np.dot(W, V)
        VUtU_lmdDV = np.dot(np.dot(V, U.T), U) + lmd*np.dot(D, V) + eps
        V *= XtU_lmdWV
        V /= VUtU_lmdDV

        # Objective function
        
        term1 = LA.norm(X - np.dot(U, V.T))**2
        term2 = lmd * np.trace(np.dot(np.dot(V.T, L), V))
        Obj2 = term1 + term2    
        ObjDiff = Obj1 - Obj2
        Obj1 = Obj2

        if(ObjDiff < (Obj0 *tolerance)):
            print("Converged in iteration: ", i, "ObjDiff: ", ObjDiff, "Obj: ", Obj2)
            return(U, V, np.dot(U, V.T))
        elif i == max_iter - 1:
            print("Has not converged, reach the maximum iteration")
            return(U, V, np.dot(U, V.T))

In [4]:
def graph_normalization(graph):
    graph = np.array(graph.copy())
    graph[graph < 0] = 0
    graph_0 = np.array(graph.copy())
    graph = (graph.copy()/np.sqrt(np.sum(graph_0.copy(), axis = 0) + 0.0000000000001)).T
    graph = (graph.copy()/np.sqrt(np.sum(graph_0.copy(), axis = 1) + 0.0000000000001)).T
    return graph

In [5]:
def KernelRegression(matrix,feature_matrix1,feature_matrix2,idx_train,idx_test,l1,l2,s):
    sigma = s
    lmd1 = l1
    lmd2 = l2
    # feature_matrix1 = (feature_matrix1.copy() - feature_matrix1.mean()) / feature_matrix1.std()
    # feature_matrix2 = (feature_matrix2.copy() - feature_matrix2.mean()) / feature_matrix2.std()
        
    X1 = np.array(feature_matrix1[idx_train, :].copy()).tolist()
    X_new1 = np.array(feature_matrix1[idx_test, :].copy()).tolist()
    X2 = np.array(feature_matrix2[idx_train, :].copy()).tolist()
    X_new2 = np.array(feature_matrix2[idx_test, :].copy()).tolist()
    y = matrix[:, idx_train].copy()
    Y = pd.DataFrame(y.T.copy())
    # y_new = matrix[:, idx_test].copy()
    matrix_new = matrix.copy().astype(float)

    distance1 = cdist(X_new1, X1)**2
    distance2 = cdist(X_new2, X2)**2

    kernel1 = np.exp(-distance1/sigma**2)
    kernel2 = np.exp(-distance2/sigma**2)

    similarity1 = cdist(X1, X1)**2
    similarity2 = cdist(X2, X2)**2

    K1 = pd.DataFrame(np.exp(-similarity1/sigma**2))
    K2 = pd.DataFrame(np.exp(-similarity2/sigma**2))

    n = len(idx_train) # size of known drug

    
    if methodOption == "GRNMF1":
        Lmd = np.diag(np.ones(n)*lmd1)
        W = inv(K1.dot(K1)+Lmd).dot(K1.dot(Y))
        y_new = kernel1.dot(W)
    elif methodOption == "GRNMF2":
        Lmd = np.diag(np.ones(n)*lmd1)
        W = inv(K2.dot(K2)+Lmd).dot(K2.dot(Y))
        y_new = kernel2.dot(W)
    # elif methodOption == "KR2":
    #     Lmd = np.diag(np.ones(n)*lmd1)
    #     W = inv(K2.dot(K2)+Lmd).dot(K2.dot(Y))
    #     y_new = kernel2.dot(W)
    elif methodOption == "GRNMF1&2":
        c1 = 0.5
        c2 = 0.5
        Lmd = np.diag(np.ones(n)*lmd1)
        K = c1*K1 + c2*K2
        W = inv(K.dot(K)+Lmd).dot(K.dot(Y))
        y_new = (c1*kernel1+c2*kernel2).dot(W)
    # elif methodOption == "MKR":
    #     K = pd.concat([K1, K2], axis = 0).copy()
    #     KT = pd.concat([K1, K2], axis = 1).copy()
    #     KY = pd.concat([K1.dot(Y), K2.dot(Y)], axis = 0).copy()
    #     lmd1Vector = np.ones(n)*lmd1
    #     lmd2Vector = np.ones(n)*lmd2
    #     Lmd = np.diag(np.hstack([lmd1Vector, lmd2Vector]))
    #     W = inv(K.dot(KT)+Lmd).dot(KY)
    #     W1 = W[0:n, :].copy()
    #     W2 = W[n:2*n, :].copy()

    #     y_new = (kernel1.dot(W1) + kernel2.dot(W2))
        

    matrix_new[:, idx_test] = y_new.T
    return matrix_new

In [6]:
def Adaptive(matrix,feature_matrix1,feature_matrix2,idx_train,idx_test,l1,l2,s,k):
    sigma = s
    lmd1 = l1
    lmd2 = l2

    X1 = np.array(feature_matrix1[idx_train, :].copy()).tolist()
    X_new1 = np.array(feature_matrix1[idx_test, :].copy()).tolist()
    X2 = np.array(feature_matrix2[idx_train, :].copy()).tolist()
    X_new2 = np.array(feature_matrix2[idx_test, :].copy()).tolist()
    y = matrix[:, idx_train].copy()
    # Y = pd.DataFrame(y.T.copy())
    # y_new = matrix[:, idx_test].copy()
    matrix_new = matrix.copy().astype(float)

    similarity1 = cdist(X1, X1)**2
    WMK1 = pd.DataFrame(np.exp(-similarity1/sigma**2))
    similarity2 = cdist(X2, X2)**2
    WMK2 = pd.DataFrame(np.exp(-similarity2/sigma**2))
    
    # WMK1 = 1 - cdist(X1, X1, "jaccard")
    # WMK2 = 1 - cdist(X2, X2, "jaccard")

    m, n = matrix.shape
    Vout = np.zeros((n, k))

    if methodOption == "GRNMF1":
        U,V,preds = GRNMF(y, component=k, WMK=WMK1, lmd=lmd2, max_iter=10000)
        Vout[idx_train, :] = V
    elif methodOption == "GRNMF2":
        U,V,preds = GRNMF(y, component=k, WMK=WMK2, lmd=lmd2, max_iter=10000)
        Vout[idx_train, :] = V
    elif methodOption == "GRNMF1&2":
        c1=0.5
        c2=0.5
        U,V,preds = GRNMF(y, component=k, WMK=c1*WMK1+c2*WMK2, lmd=lmd2, max_iter=10000)
        Vout[idx_train, :] = V

    Vpreds = KernelRegression(Vout.T,feature_matrix1,feature_matrix2,idx_train,idx_test,l1,l2,s)


    preds = U.dot(Vpreds)
    
    return preds



In [7]:
def FeaturePreprocess(df_all, drug_nodes):
    
    drug_nodes_df = np.intersect1d(df_all.index, drug_nodes)
    df = df_all.loc[drug_nodes_df]
    _, q = df.shape
    drug_nodes_diff = np.setdiff1d(drug_nodes, (df.index).tolist())
    n = len(drug_nodes_diff)
    df_diff = pd.DataFrame(np.zeros(n*q).reshape(n,q))
    df_diff.index = drug_nodes_diff
    df_diff.columns = df.columns
    df_all = pd.concat([df, df_diff], axis = 0)
    featureMat = df_all.loc[drug_nodes]
    return np.array(featureMat)

In [8]:
def network_preprocess(dir, drug_nodes_order):
    col_names = ["left_side","right_side","similairity"]
    df_drugs_sim = pd.read_csv(dir, sep =" ", names =col_names, header=None)

    source =df_drugs_sim["left_side"]
    destination = df_drugs_sim["right_side"]
    similarity = df_drugs_sim["similairity"]

    ###Drugs similarity Network###
    edge_list = zip(source,destination,similarity) # integrate 3 variables into tuple
    #print edge_list
    print("Side effect graph information loading...")
    G = nx.Graph() # creat a graph
    G.add_weighted_edges_from(edge_list) # add weighted edge from edge_list

    weight_matrix = nx.attr_matrix(G, edge_attr='weight', rc_order=drug_nodes_order)
    weight_matrix = np.array(weight_matrix)

    return weight_matrix

In [9]:
def pr_roc_curve(Ground_Truth, score):

    recall_fix = 0.2
    sort_idx = np.argsort(-score)
    Pre = score[sort_idx].copy()
    Gro = Ground_Truth[sort_idx].copy()
    ind = np.hstack(np.where(Gro > 0))
    thresholdList = np.unique(Pre[ind].copy())
    # print(thresholdList)
    if thresholdList.size == 0:
        Precision = np.array([0])
        Recall = np.array([0])
        TPR = np.array([0])
        FPR = np.array([0])
        F1 = np.array([0])
        TP = np.array([0])
        FP = np.array([sum(Pre != 0)])
        TN = np.array([sum(Pre == 0)])
        FN = np.array([0])
        return Precision, Recall, TPR, FPR, F1, TP, FP, TN, FN
    # thresholdList = np.hstack([min(Pre), thresholdList.copy(), max(Pre)])
    thresholdList = np.hstack(thresholdList.copy())
    thresholdList = np.unique(thresholdList)
    thresholdList = -np.sort(-thresholdList)
    AC_P = len(ind)
    AC_N = len(Gro) - AC_P
    N_thresholdList = len(thresholdList)

    
    TP = np.zeros(N_thresholdList)
    TN = np.zeros(N_thresholdList)
    FP = np.zeros(N_thresholdList)
    FN = np.zeros(N_thresholdList)


    N_pre = len(Pre)


    LOC = np.zeros(N_thresholdList)
    for i in range(len(thresholdList)):
        if np.isin(thresholdList[i], Pre):
            locs = np.hstack(np.where(thresholdList[i] == Pre))
            LOC[i] = locs[-1]
    LOC = LOC.astype(int)


    for i in range(len(LOC)):
        if i == 0:
            loc1 = 0
            loc2 = LOC[i]
            I = np.arange(loc1, loc2 + 1)
            J = np.arange(loc2 + 1, N_pre)
            TP[i] = len(np.hstack(np.where(Gro[I] == 1)))
            FP[i] = len(np.hstack(np.where(Gro[I] == 0)))

            TN[i] = len(np.hstack(np.where(Gro[J] == 0)))
            FN[i] = len(np.hstack(np.where(Gro[J] == 1)))
        else:
            loc1 = LOC[i - 1]
            loc2 = LOC[i]

            I = np.arange(loc1 + 1, loc2 + 1)

            TP[i] = TP[i - 1] + len(np.hstack(np.where(Gro[I] == 1)))
            FP[i] = FP[i - 1] + len(np.hstack(np.where(Gro[I] == 0)))

            TN[i] = TN[i - 1] - len(np.hstack(np.where(Gro[I] == 0)))
            FN[i] = FN[i - 1] - len(np.hstack(np.where(Gro[I] == 1)))

    TP = np.hstack([0, TP, AC_P])
    FP = np.hstack([0, FP, AC_N])
    TN = np.hstack([AC_N, TN, 0])
    FN = np.hstack([AC_P, FN, 0])

    # FPR = FP / AC_N
    # TPR = TP / AC_P
    FPR = np.zeros(N_thresholdList + 2)
    for i in range(N_thresholdList + 2):
        if TN[i] + FP[i] == 0 :
            FPR[i] = 0
        else:
            FPR[i] = FP[i] / (TN[i] + FP[i])

    # FPR = FP / (TN + FP)
    TPR = TP / (TP + FN)

    # Recall = TP / (TP + FN)
    Recall = TPR
    Precision = np.zeros(N_thresholdList + 2)
    # Precision = np.zeros(N_thresholdList)
    for i in range(N_thresholdList + 2):
        if TP[i] + FP[i] == 0 :
            Precision[i] = 0
        else:
            Precision[i] = TP[i] / (TP[i] + FP[i])
    # if (TP + FP== 0):
    #     Precision = np.zeros(N_thresholdList)
    # else:
    #     Precision = TP / (TP + FP)
    # jac=TP / (TP + FN + FP)
    # JAC = max(jac)
    # t = 2 / (1 / Recall + 1 / Precision)
    # score_F1 = max(t)

    # print('---TPR FPR')

    # print(TPR)
    # print(FPR)

    # print('---FP TN')

    # print(FP)
    # print(TN)

    # print('thlist')
    # print(thresholdList)
    # print(Pre)

    F1 = (2 * (Precision * Recall) / (Precision + Recall + 0.0000000000001)).max()



    return Precision, Recall, TPR, FPR, F1, TP, FP, TN, FN


In [10]:
def FmaxSmin(Ground_Truth_mat, score_mat, target_idx, TP, FP, TN, FN):
    m,n = Ground_Truth_mat.shape


    Ground_Truth = (Ground_Truth_mat[:, target_idx].copy()).ravel()
    score = (score_mat[:, target_idx].copy()).ravel()
    existing_drug_idx = np.setdiff1d(np.arange(n), target_idx)
    likelihood_obs = Ground_Truth_mat[:, existing_drug_idx].copy()
    ic = - np.log2((likelihood_obs.sum(axis=1) + 1)/(likelihood_obs.shape[1] + 2))

    sort_idx = np.argsort(-score)
    Pre = score[sort_idx].copy()
    Gro = Ground_Truth[sort_idx].copy()
    ind = np.hstack(np.where(Gro > 0))
    thresholdList = np.unique(Pre[ind].copy())
    # thresholdList = np.hstack([min(Pre), thresholdList.copy(), max(Pre)])
    thresholdList = np.hstack(thresholdList.copy())
    thresholdList = np.unique(thresholdList)
    thresholdList = -np.sort(-thresholdList)
    AC_P = len(ind)
    AC_N = len(Gro) - AC_P
    N_thresholdList = len(thresholdList)

    
    # TP = np.zeros(N_thresholdList)
    # TN = np.zeros(N_thresholdList)
    # FP = np.zeros(N_thresholdList)
    # FN = np.zeros(N_thresholdList)

    # m_t = np.zeros(N_thresholdList)
    n_e = np.zeros(N_thresholdList)


    icI_FN = np.zeros(N_thresholdList)
    icI_FP = np.zeros(N_thresholdList)

    icI_TP = np.zeros(N_thresholdList)

    Obs = Ground_Truth_mat[:, target_idx]
    for i in range(N_thresholdList):
        Pred = (score_mat[:, target_idx] > thresholdList[i])
        n_e[i] = sum((Pred.sum(axis = 1)) > 0)
        Result = Pred - Obs
        Result2 = Pred + Obs
        FP_col = (Result > 0)
        FN_col = (Result < 0)
        TP_col = (Result2 == 2)

        icI_FN[i] = sum(ic*(FN_col.sum(axis = 1)))
        icI_FP[i] = sum(ic*(FP_col.sum(axis = 1)))
        icI_TP[i] = sum(ic*(TP_col.sum(axis = 1)))

    n_e = np.hstack([0, n_e, m])

    AllN_icI_FN = sum(ic*(Obs.sum(axis = 1)))
    AllP_icI_FP = sum(ic*((1 - Obs).sum(axis = 1)))
    AllP_icI_TP = AllN_icI_FN


    icI_FN = np.hstack([AllN_icI_FN, icI_FN, 0])
    icI_FP = np.hstack([0, icI_FP, AllP_icI_FP])
    icI_TP = np.hstack([0, icI_TP, AllP_icI_TP])

    # FPR = FP / AC_N
    # TPR = TP / AC_P
    # FPR = FP / (TN + FP)
    # TPR = TP / (TP + FN)
    

    # Recall = TP / (TP + FN)
    # Recall = TPR
    Precision = np.zeros(N_thresholdList + 2)
    pr = np.zeros(N_thresholdList + 2)
    rc = np.zeros(N_thresholdList + 2)
    ru = np.zeros(N_thresholdList + 2)
    mi = np.zeros(N_thresholdList + 2)
    # Precision = np.zeros(N_thresholdList)
    for i in range(N_thresholdList + 2):
        if icI_TP[i] + icI_FP[i] == 0 :
            Precision[i] = 0
        else:
            Precision[i] = icI_TP[i] / (icI_TP[i] + icI_FP[i])

    pr = Precision
    rc = icI_TP / (icI_TP + icI_FN)

    for i in range(N_thresholdList + 2):
        if n_e[i] == 0 :
            # pr[i] = 0
            # rc[i] = 0
            ru[i] = 0
            mi[i] = 0
        else:
            # pr[i] = Precision[i] / n_e[i]
            # rc[i] = Recall[i] / n_e[i]

            ru[i] = icI_FN[i] / n_e[i]
            mi[i] = icI_FP[i] / n_e[i]
    

    # for i in range(N_thresholdList + 2):
    #     if n_e[i] == 0 :
    #         pr[i] = 0
    #         rc[i] = 0
    #         ru[i] = 0
    #         mi[i] = 0
    #     else:
    #         pr[i] = Precision[i]
    #         rc[i] = Recall[i]
    #         ru[i] = icI_FN[i]
    #         mi[i] = icI_FP[i]

    F = 2 * pr * rc / (pr + rc + 0.0000000000001)
    Fmax = max(F[n_e > 0])
    S = np.sqrt(ru**2 + mi**2)
    Smin = min(S[n_e > 0])

    return Fmax, Smin

In [11]:
def fold(IDX1,IDX2,feature_matrix1,feature_matrix2,matrix,l1,l2,s,k):
    # IDX1 target index, need to be evaluated
    # IDX2 test index, masked

    print('First few target index:', IDX1[0:10])
    print('First few mask index:', IDX2[0:10])

    target_idx = IDX1
    mask_idx = IDX2
    Ground_Truth = matrix.copy()
    side_effects_drug_relation_copy = matrix.copy()

    # target_idx = IDX2
    ### making all the links to predict as 0 ###############    
    for i in range(len(mask_idx)):
        side_effects_drug_relation_copy[:, mask_idx[i]] = 0
    
    m,n = side_effects_drug_relation_copy.shape

    drug_idx = list(range(n))
    existing_drug_idx = np.setdiff1d(drug_idx, mask_idx)
    
    # calculate the mean for each drug
    mean_side_effect_score = (Ground_Truth.copy()[:, existing_drug_idx]).mean(axis=1)
    score_mean = side_effects_drug_relation_copy.copy().astype(float)

    # Set the prediction into mean
    for i in range(m):
        score_mean[i, mask_idx] =  mean_side_effect_score[i]

    print(methodOption + ' starts:')
    # real_stdout = sys.stdout
    # sys.stdout = open(os.devnull, "w")
    side_effects_drug_relation_fact = Adaptive(matrix=side_effects_drug_relation_copy,\
        feature_matrix1=feature_matrix1,feature_matrix2=feature_matrix2,idx_train=existing_drug_idx,idx_test=target_idx,l1=l1,l2=l2,s=s,k=k)
    # sys.stdout = real_stdout
    print(methodOption + ' ends:')


    # Set the out put of GNMF as prediction score
    score = side_effects_drug_relation_fact.copy()

    # Random score
    random_score = np.random.rand(m,n)

    pr_auc_all_mean = 0
    roc_auc_all_mean = 0
    F1_mean = 0
    pr_auc_per_drug_mean = 0
    roc_auc_per_drug_mean = 0
    fmax_mean = 0
    smin_mean = 0
    pr_auc_per_drug = 0
    roc_auc_per_drug = 0
    fmax = 0
    smin = 0
    F1 = 0
    pr_auc_all_rd = 0
    roc_auc_all_rd = 0
    pr_auc_per_drug_rd = 0
    roc_auc_per_drug_rd = 0
    fmax_rd = 0
    smin_rd = 0
    F1_rd = 0
    roc_auc_topn_rd = 0
    pr_auc_topn_rd = 0
    pr_auc_all = 0
    roc_auc_all = 0
    roc_auc_topn_mean = 0
    roc_auc_topn = 0
    pr_auc_topn_mean = 0
    pr_auc_topn = 0

    

    print("proportion of ground truth:", sum(Ground_Truth[:, target_idx].ravel())/(Ground_Truth[:, target_idx].shape[0]*Ground_Truth[:, target_idx].shape[1]))

    print('---evaluation---')


    prec, recall, threshold = precision_recall_curve(Ground_Truth[:, target_idx].ravel(), score[:, target_idx].ravel())
    pr_auc_all = auc(recall, prec)
    roc_auc_all = roc_auc_score(Ground_Truth[:, target_idx].ravel(), score[:, target_idx].ravel())

    # # ###### Evaluation all entries ######
    # ### GNMF
    # prec, recall, tpr, fpr, F1, TP, FP, TN, FN = pr_roc_curve(Ground_Truth[:, target_idx].ravel(), score[:, target_idx].ravel())
    # # Precision, Recall, TPR, FPR, F1, TP, FP, TN, FN
    # pr_auc_all = auc(recall, prec) 
    # roc_auc_all = auc(fpr, tpr)
    # fmax, smin = FmaxSmin(Ground_Truth_mat=Ground_Truth, score_mat=score, target_idx=target_idx, TP = TP, FP = FP, TN = TN, FN = FN)

    # # ### Naive Model
    # prec, recall, tpr, fpr, F1_mean, TP, FP, TN, FN = pr_roc_curve(Ground_Truth[:, target_idx].ravel(), score_mean[:, target_idx].ravel())
    
    # pr_auc_all_mean = auc(recall, prec)
    # roc_auc_all_mean = auc(fpr, tpr)
    # fmax_mean, smin_mean = FmaxSmin(Ground_Truth_mat=Ground_Truth, score_mat=score_mean, target_idx=target_idx, TP = TP, FP = FP, TN = TN, FN = FN)

    # ###### Evaluation per drug ######
    # ### GNMF
    # ALL_pr_auc = np.zeros(len(target_idx))
    # ALL_roc_auc = np.zeros(len(target_idx))
    # for i in range(len(target_idx)):
    #     prec, recall, tpr, fpr, _, _, _, _, _ = pr_roc_curve(Ground_Truth[:, target_idx[i]], score[:, target_idx[i]])
    #     ALL_pr_auc[i] = auc(recall, prec)
    #     ALL_roc_auc[i] = metrics.auc(fpr, tpr)  
    # pr_auc_per_drug = ALL_pr_auc.mean()
    # roc_auc_per_drug = ALL_roc_auc.mean()

    # ### Naive model
    # ALL_pr_auc = np.zeros(len(target_idx))
    # ALL_roc_auc = np.zeros(len(target_idx))
    # for i in range(len(target_idx)):
    #     prec, recall, tpr, fpr, _, _, _, _, _ = pr_roc_curve(Ground_Truth[:, target_idx[i]], score_mean[:, target_idx[i]])
    #     ALL_pr_auc[i] = auc(recall, prec)
    #     ALL_roc_auc[i] = metrics.auc(fpr, tpr)      
    # pr_auc_per_drug_mean = ALL_pr_auc.mean()
    # roc_auc_per_drug_mean = ALL_roc_auc.mean()

    # # ### Random
    # # ALL_pr_auc = np.zeros(len(target_idx))
    # # ALL_roc_auc = np.zeros(len(target_idx))
    # # for i in range(len(target_idx)):
    # #     prec, recall, tpr, fpr, _, _, _, _, _ = pr_roc_curve(Ground_Truth[:, target_idx[i]], random_score[:, target_idx[i]])
    # #     ALL_pr_auc[i] = auc(recall, prec)
    # #     ALL_roc_auc[i] = metrics.auc(fpr, tpr) 
    # # pr_auc_per_drug_rd = ALL_pr_auc.mean()
    # # roc_auc_per_drug_rd = ALL_roc_auc.mean()

    # ###### Evaluation per drug top 20 SEs ######
    # ### GNMF
    # topN = 100
    # ALL_pr_auc = np.zeros(len(target_idx))
    # ALL_roc_auc = np.zeros(len(target_idx))
    # for i in range(len(target_idx)):
    #     topSE_idx = np.argsort(-score[:, target_idx[i]])[0:(topN-1)]
    #     prec, recall, tpr, fpr, _, _, _, _, _ = pr_roc_curve(Ground_Truth[topSE_idx, target_idx[i]], score[topSE_idx, target_idx[i]])
    #     if len(recall) == 1:
    #         ALL_pr_auc[i] = 0
    #         ALL_roc_auc[i] = 0
    #     else:
    #         ALL_pr_auc[i] = auc(recall, prec)
    #         ALL_roc_auc[i] = metrics.auc(fpr, tpr)  
    # pr_auc_topn = ALL_pr_auc.mean()
    # roc_auc_topn = ALL_roc_auc.mean()

    # ### Naive model
    # ALL_pr_auc = np.zeros(len(target_idx))
    # ALL_roc_auc = np.zeros(len(target_idx))
    # for i in range(len(target_idx)):
    #     topSE_idx = np.argsort(-score_mean[:, target_idx[i]])[0:(topN-1)]
    #     prec, recall, tpr, fpr, _, _, _, _, _ = pr_roc_curve(Ground_Truth[topSE_idx, target_idx[i]], score_mean[topSE_idx, target_idx[i]])
    #     if len(recall) == 1:
    #         ALL_pr_auc[i] = 0
    #         ALL_roc_auc[i] = 0
    #     else:
    #         ALL_pr_auc[i] = auc(recall, prec)
    #         ALL_roc_auc[i] = metrics.auc(fpr, tpr)     
    # pr_auc_topn_mean = ALL_pr_auc.mean()
    # roc_auc_topn_mean = ALL_roc_auc.mean()

    # ### Random
    # ALL_pr_auc = np.zeros(len(target_idx))
    # ALL_roc_auc = np.zeros(len(target_idx))
    # for i in range(len(target_idx)):
    #     topSE_idx = np.argsort(-score[:, target_idx[i]])[0:(topN-1)]
    #     prec, recall, tpr, fpr, _, _, _, _, _ = pr_roc_curve(Ground_Truth[topSE_idx, target_idx[i]], random_score[topSE_idx, target_idx[i]])
    #     ALL_pr_auc[i] = auc(recall, prec)
    #     ALL_roc_auc[i] = metrics.auc(fpr, tpr) 
    # pr_auc_topn_rd = ALL_pr_auc.mean()
    # roc_auc_topn_rd = ALL_roc_auc.mean()

    # pr_auc_all_mean = 0
    # roc_auc_all_mean = 0
    # F1_mean = 0
    # pr_auc_per_drug_mean = 0
    # roc_auc_per_drug_mean = 0
    # fmax_mean = 0
    # smin_mean = 0
    # pr_auc_per_drug = 0
    # roc_auc_per_drug = 0
    # fmax = 0
    # smin = 0
    # F1 = 0
    pr_auc_all_rd = 0
    roc_auc_all_rd = 0
    pr_auc_per_drug_rd = 0
    roc_auc_per_drug_rd = 0
    fmax_rd = 0
    smin_rd = 0
    F1_rd = 0
    roc_auc_topn_rd = 0
    pr_auc_topn_rd = 0



    print("-----")

    print("AUC-PR mean:", pr_auc_all_mean)
    # print("AUC-PR rd:", pr_auc_all_rd)
    print("AUC-PR all:", pr_auc_all)

    print("-----")

    print("AUC-ROC mean:", roc_auc_all_mean)
    # print("AUC-ROC rd:", roc_auc_all_rd)
    print("AUC-ROC all:", roc_auc_all)

    print("-----")

    print("AUC-PR per drug mean:", pr_auc_per_drug_mean)
    # print("AUC-PR per drug rd:", pr_auc_per_drug_rd)
    print("AUC-PR per drug:", pr_auc_per_drug)

    print("-----")

    print("AUC-ROC per drug mean:", roc_auc_per_drug_mean)
    # print("AUC-ROC per drug rd:", roc_auc_per_drug_rd)
    print("AUC-ROC per drug:", roc_auc_per_drug)

    print("-----")

    print("AUC-ROC top N mean:", roc_auc_topn_mean)
    # print("AUC-ROC top N rd:", roc_auc_topn_rd)
    print("AUC-ROC top N:", roc_auc_topn)

    print("-----")

    print("AUC-PR top N mean:", pr_auc_topn_mean)
    # print("AUC-PR top N rd:", pr_auc_topn_rd)
    print("AUC-PR top N:", pr_auc_topn)
    
    print("-----")

    print("F1 mean:", F1_mean)
    # print("F1 rd:", F1_rd)
    print("F1:", F1)

    print("-----")

    print("Fmax_mean", fmax_mean)
    # print("Fmax_rd", fmax_rd)
    print("Fmax", fmax)

    print("-----")

    print("Smin_mean", smin_mean)
    # print("Smin_rd", smin_rd)
    print("Smin", smin)



    return pr_auc_all_mean, roc_auc_all_mean, F1_mean, pr_auc_per_drug_mean, roc_auc_per_drug_mean, fmax_mean, smin_mean, \
        pr_auc_all, roc_auc_all, pr_auc_per_drug, roc_auc_per_drug, fmax, smin, F1, \
            pr_auc_all_rd, roc_auc_all_rd, pr_auc_per_drug_rd, roc_auc_per_drug_rd, fmax_rd, smin_rd, F1_rd, \
                roc_auc_topn_mean, roc_auc_topn_rd, roc_auc_topn, pr_auc_topn_mean, pr_auc_topn_rd, pr_auc_topn


In [12]:
def innerfold(IDX1,IDX2,feature_matrix1,feature_matrix2,matrix,l1,l2,s,k):
    # IDX1 target index, need to be evaluated
    # IDX2 test index, masked

    print('First few target index:', IDX1[0:10])
    print('First few mask index:', IDX2[0:10])

    target_idx = IDX1
    mask_idx = IDX2
    Ground_Truth = matrix.copy()
    side_effects_drug_relation_copy = matrix.copy()

    # target_idx = IDX2
    ### making all the links to predict as 0 ###############    
    for i in range(len(mask_idx)):
        side_effects_drug_relation_copy[:, mask_idx[i]] = 0
    
    m,n = side_effects_drug_relation_copy.shape

    drug_idx = list(range(n))
    existing_drug_idx = np.setdiff1d(drug_idx, mask_idx)
    
    # calculate the mean for each drug
    mean_side_effect_score = (Ground_Truth.copy()[:, existing_drug_idx]).mean(axis=1)
    score_mean = side_effects_drug_relation_copy.copy().astype(float)

    # Set the prediction into mean
    for i in range(m):
        score_mean[i, mask_idx] =  mean_side_effect_score[i]

    print(methodOption + ' starts:')
    # real_stdout = sys.stdout
    # sys.stdout = open(os.devnull, "w")
    side_effects_drug_relation_fact = Adaptive(matrix=side_effects_drug_relation_copy,\
        feature_matrix1=feature_matrix1,feature_matrix2=feature_matrix2,idx_train=existing_drug_idx, idx_test=target_idx,l1=l1,l2=l2,s=s,k=k)
    # sys.stdout = real_stdout
    print(methodOption + ' ends:')

    # Set the out put of GNMF as prediction score
    score = side_effects_drug_relation_fact.copy()

    # Random score
    random_score = np.random.rand(m,n)
    

    print("proportion of ground truth:", sum(Ground_Truth[:, target_idx].ravel())/(Ground_Truth[:, target_idx].shape[0]*Ground_Truth[:, target_idx].shape[1]))

    print('---evaluation---')



    pr_auc_all_mean = 0
    roc_auc_all_mean = 0
    F1_mean = 0
    pr_auc_per_drug_mean = 0
    roc_auc_per_drug_mean = 0
    fmax_mean = 0
    smin_mean = 0
    pr_auc_per_drug = 0
    roc_auc_per_drug = 0
    fmax = 0
    smin = 0
    F1 = 0
    pr_auc_all_rd = 0
    roc_auc_all_rd = 0
    pr_auc_per_drug_rd = 0
    roc_auc_per_drug_rd = 0
    fmax_rd = 0
    smin_rd = 0
    F1_rd = 0
    roc_auc_topn_rd = 0
    pr_auc_topn_rd = 0
    pr_auc_all = 0
    roc_auc_all = 0
    roc_auc_topn_mean = 0
    roc_auc_topn = 0
    pr_auc_topn_mean = 0
    pr_auc_topn = 0

    # time1 = time.time()
    

    # # ###### Evaluation all entries ######

    # prec, recall, tpr, fpr, F1, TP, FP, TN, FN = pr_roc_curve(Ground_Truth[:, target_idx].ravel(), score[:, target_idx].ravel())
    # # Precision, Recall, TPR, FPR, F1, TP, FP, TN, FN
    # pr_auc_all = auc(recall, prec) 
    # # roc_auc_all = auc(fpr, tpr)
    # # # fmax, smin = FmaxSmin(Ground_Truth_mat=Ground_Truth, score_mat=score, target_idx=target_idx, TP = TP, FP = FP, TN = TN, FN = FN)

    # # # ### Naive Model
    # prec, recall, tpr, fpr, F1_mean, TP, FP, TN, FN = pr_roc_curve(Ground_Truth[:, target_idx].ravel(), score_mean[:, target_idx].ravel())
    # pr_auc_all_mean = auc(recall, prec)
    # # roc_auc_all_mean = auc(fpr, tpr)
    # # # fmax_mean, smin_mean = FmaxSmin(Ground_Truth_mat=Ground_Truth, score_mat=score_mean, target_idx=target_idx, TP = TP, FP = FP, TN = TN, FN = FN)

    # print("our")
    # print("-----")

    # print("AUC-PR mean:", pr_auc_all_mean)
    # # print("AUC-PR rd:", pr_auc_all_rd)
    # print("AUC-PR all:", pr_auc_all)

    # print("-----")

    # print("AUC-ROC mean:", roc_auc_all_mean)
    # # print("AUC-ROC rd:", roc_auc_all_rd)
    # print("AUC-ROC all:", roc_auc_all)

    # print("-----")

###############################
    # time2 = time.time()
    # print("time for our AUC: ", time2 - time1)

    prec, recall, threshold = precision_recall_curve(Ground_Truth[:, target_idx].ravel(), score[:, target_idx].ravel())
    pr_auc_all = auc(recall, prec)
    # roc_auc_all = roc_auc_score(Ground_Truth[:, target_idx].ravel(), score[:, target_idx].ravel())

    # prec, recall, threshold = precision_recall_curve(Ground_Truth[:, target_idx].ravel(), score_mean[:, target_idx].ravel())
    # pr_auc_all_mean = auc(recall, prec)
    # roc_auc_all_mean = roc_auc_score(Ground_Truth[:, target_idx].ravel(), score_mean[:, target_idx].ravel())

    # time3 = time.time()
###############################
    # print("sklearn")
    print("-----")

    # print("AUC-PR mean:", pr_auc_all_mean)
    # print("AUC-PR rd:", pr_auc_all_rd)
    print("AUC-PR all:", pr_auc_all)

    print("-----")

    # print("AUC-ROC mean:", roc_auc_all_mean)
    # # print("AUC-ROC rd:", roc_auc_all_rd)
    # print("AUC-ROC all:", roc_auc_all)

    # print("-----")
    # # print("time for sklearn AUC: ", time3 - time2)


    # ###### Evaluation per drug ######
    # ### GNMF
    # ALL_pr_auc = np.zeros(len(target_idx))
    # ALL_roc_auc = np.zeros(len(target_idx))
    # for i in range(len(target_idx)):
    #     prec, recall, tpr, fpr, _, _, _, _, _ = pr_roc_curve(Ground_Truth[:, target_idx[i]], score[:, target_idx[i]])
    #     ALL_pr_auc[i] = auc(recall, prec)
    #     ALL_roc_auc[i] = metrics.auc(fpr, tpr)  
    # pr_auc_per_drug = ALL_pr_auc.mean()
    # roc_auc_per_drug = ALL_roc_auc.mean()

    # ### Naive model
    # ALL_pr_auc = np.zeros(len(target_idx))
    # ALL_roc_auc = np.zeros(len(target_idx))
    # for i in range(len(target_idx)):
    #     prec, recall, tpr, fpr, _, _, _, _, _ = pr_roc_curve(Ground_Truth[:, target_idx[i]], score_mean[:, target_idx[i]])
    #     ALL_pr_auc[i] = auc(recall, prec)
    #     ALL_roc_auc[i] = metrics.auc(fpr, tpr)      
    # pr_auc_per_drug_mean = ALL_pr_auc.mean()
    # roc_auc_per_drug_mean = ALL_roc_auc.mean()

    

    # print("-----")

    # print("AUC-PR per drug mean:", pr_auc_per_drug_mean)
    # print("AUC-PR per drug:", pr_auc_per_drug)

    # print("-----")

    # print("AUC-ROC per drug mean:", roc_auc_per_drug_mean)
    # print("AUC-ROC per drug:", roc_auc_per_drug)

    # print("-----")





    return pr_auc_all_mean, roc_auc_all_mean, F1_mean, pr_auc_per_drug_mean, roc_auc_per_drug_mean, fmax_mean, smin_mean, \
        pr_auc_all, roc_auc_all, pr_auc_per_drug, roc_auc_per_drug, fmax, smin, F1, \
            pr_auc_all_rd, roc_auc_all_rd, pr_auc_per_drug_rd, roc_auc_per_drug_rd, fmax_rd, smin_rd, F1_rd, \
                roc_auc_topn_mean, roc_auc_topn_rd, roc_auc_topn, pr_auc_topn_mean, pr_auc_topn_rd, pr_auc_topn


In [13]:
def plotfold(IDX1,IDX2,feature_matrix1,feature_matrix2,matrix,l1,l2,s,k):
    # IDX1 target index, need to be evaluated
    # IDX2 test index, masked

    print('First few target index:', IDX1[0:10])
    print('First few mask index:', IDX2[0:10])

    target_idx = IDX1
    mask_idx = IDX2
    Ground_Truth = matrix.copy()
    side_effects_drug_relation_copy = matrix.copy()

    # target_idx = IDX2
    ### making all the links to predict as 0 ###############    
    for i in range(len(mask_idx)):
        side_effects_drug_relation_copy[:, mask_idx[i]] = 0
    
    m,n = side_effects_drug_relation_copy.shape

    drug_idx = list(range(n))
    existing_drug_idx = np.setdiff1d(drug_idx, mask_idx)
    
    # calculate the mean for each drug
    mean_side_effect_score = (Ground_Truth.copy()[:, existing_drug_idx]).mean(axis=1)
    score_mean = side_effects_drug_relation_copy.copy().astype(float)

    # Set the prediction into mean
    for i in range(m):
        score_mean[i, mask_idx] =  mean_side_effect_score[i]

    print(methodOption + ' starts:')
    # real_stdout = sys.stdout
    # sys.stdout = open(os.devnull, "w")
    side_effects_drug_relation_fact = Adaptive(matrix=side_effects_drug_relation_copy,\
        feature_matrix1=feature_matrix1,feature_matrix2=feature_matrix2,idx_train=existing_drug_idx,idx_test=target_idx,l1=l1,l2=l2,s=s,k=k)
    # sys.stdout = real_stdout
    print(methodOption + ' ends:')


    # Set the out put of GNMF as prediction score
    score = side_effects_drug_relation_fact.copy()

    # Random score
    random_score = np.random.rand(m,n)

    pr_auc_all_mean = 0
    roc_auc_all_mean = 0
    F1_mean = 0
    pr_auc_per_drug_mean = 0
    roc_auc_per_drug_mean = 0
    fmax_mean = 0
    smin_mean = 0
    pr_auc_per_drug = 0
    roc_auc_per_drug = 0
    fmax = 0
    smin = 0
    F1 = 0
    pr_auc_all_rd = 0
    roc_auc_all_rd = 0
    pr_auc_per_drug_rd = 0
    roc_auc_per_drug_rd = 0
    fmax_rd = 0
    smin_rd = 0
    F1_rd = 0
    roc_auc_topn_rd = 0
    pr_auc_topn_rd = 0
    pr_auc_all = 0
    roc_auc_all = 0
    roc_auc_topn_mean = 0
    roc_auc_topn = 0
    pr_auc_topn_mean = 0
    pr_auc_topn = 0

    

    print("proportion of ground truth:", sum(Ground_Truth[:, target_idx].ravel())/(Ground_Truth[:, target_idx].shape[0]*Ground_Truth[:, target_idx].shape[1]))

    print('---evaluation---')


    prec, recall, prthreshold = precision_recall_curve(Ground_Truth[:, target_idx].ravel(), score[:, target_idx].ravel())
    pr_auc_all = auc(recall, prec)
    fpr, tpr, rocthreshold = metrics.roc_curve(Ground_Truth[:, target_idx].ravel(), score[:, target_idx].ravel())
    roc_auc_all = auc(fpr, tpr)

    # # ###### Evaluation all entries ######
    # ### GNMF
    # prec, recall, tpr, fpr, F1, TP, FP, TN, FN = pr_roc_curve(Ground_Truth[:, target_idx].ravel(), score[:, target_idx].ravel())
    # # Precision, Recall, TPR, FPR, F1, TP, FP, TN, FN
    # pr_auc_all = auc(recall, prec) 
    # roc_auc_all = auc(fpr, tpr)
    # fmax, smin = FmaxSmin(Ground_Truth_mat=Ground_Truth, score_mat=score, target_idx=target_idx, TP = TP, FP = FP, TN = TN, FN = FN)

    # # ### Naive Model
    # prec, recall, tpr, fpr, F1_mean, TP, FP, TN, FN = pr_roc_curve(Ground_Truth[:, target_idx].ravel(), score_mean[:, target_idx].ravel())
    
    # pr_auc_all_mean = auc(recall, prec)
    # roc_auc_all_mean = auc(fpr, tpr)
    # fmax_mean, smin_mean = FmaxSmin(Ground_Truth_mat=Ground_Truth, score_mat=score_mean, target_idx=target_idx, TP = TP, FP = FP, TN = TN, FN = FN)

    # ###### Evaluation per drug ######
    # ### GNMF
    # ALL_pr_auc = np.zeros(len(target_idx))
    # ALL_roc_auc = np.zeros(len(target_idx))
    # for i in range(len(target_idx)):
    #     prec, recall, tpr, fpr, _, _, _, _, _ = pr_roc_curve(Ground_Truth[:, target_idx[i]], score[:, target_idx[i]])
    #     ALL_pr_auc[i] = auc(recall, prec)
    #     ALL_roc_auc[i] = metrics.auc(fpr, tpr)  
    # pr_auc_per_drug = ALL_pr_auc.mean()
    # roc_auc_per_drug = ALL_roc_auc.mean()

    # ### Naive model
    # ALL_pr_auc = np.zeros(len(target_idx))
    # ALL_roc_auc = np.zeros(len(target_idx))
    # for i in range(len(target_idx)):
    #     prec, recall, tpr, fpr, _, _, _, _, _ = pr_roc_curve(Ground_Truth[:, target_idx[i]], score_mean[:, target_idx[i]])
    #     ALL_pr_auc[i] = auc(recall, prec)
    #     ALL_roc_auc[i] = metrics.auc(fpr, tpr)      
    # pr_auc_per_drug_mean = ALL_pr_auc.mean()
    # roc_auc_per_drug_mean = ALL_roc_auc.mean()

    # # ### Random
    # # ALL_pr_auc = np.zeros(len(target_idx))
    # # ALL_roc_auc = np.zeros(len(target_idx))
    # # for i in range(len(target_idx)):
    # #     prec, recall, tpr, fpr, _, _, _, _, _ = pr_roc_curve(Ground_Truth[:, target_idx[i]], random_score[:, target_idx[i]])
    # #     ALL_pr_auc[i] = auc(recall, prec)
    # #     ALL_roc_auc[i] = metrics.auc(fpr, tpr) 
    # # pr_auc_per_drug_rd = ALL_pr_auc.mean()
    # # roc_auc_per_drug_rd = ALL_roc_auc.mean()

    # ###### Evaluation per drug top 20 SEs ######
    # ### GNMF
    # topN = 100
    # ALL_pr_auc = np.zeros(len(target_idx))
    # ALL_roc_auc = np.zeros(len(target_idx))
    # for i in range(len(target_idx)):
    #     topSE_idx = np.argsort(-score[:, target_idx[i]])[0:(topN-1)]
    #     prec, recall, tpr, fpr, _, _, _, _, _ = pr_roc_curve(Ground_Truth[topSE_idx, target_idx[i]], score[topSE_idx, target_idx[i]])
    #     if len(recall) == 1:
    #         ALL_pr_auc[i] = 0
    #         ALL_roc_auc[i] = 0
    #     else:
    #         ALL_pr_auc[i] = auc(recall, prec)
    #         ALL_roc_auc[i] = metrics.auc(fpr, tpr)  
    # pr_auc_topn = ALL_pr_auc.mean()
    # roc_auc_topn = ALL_roc_auc.mean()

    # ### Naive model
    # ALL_pr_auc = np.zeros(len(target_idx))
    # ALL_roc_auc = np.zeros(len(target_idx))
    # for i in range(len(target_idx)):
    #     topSE_idx = np.argsort(-score_mean[:, target_idx[i]])[0:(topN-1)]
    #     prec, recall, tpr, fpr, _, _, _, _, _ = pr_roc_curve(Ground_Truth[topSE_idx, target_idx[i]], score_mean[topSE_idx, target_idx[i]])
    #     if len(recall) == 1:
    #         ALL_pr_auc[i] = 0
    #         ALL_roc_auc[i] = 0
    #     else:
    #         ALL_pr_auc[i] = auc(recall, prec)
    #         ALL_roc_auc[i] = metrics.auc(fpr, tpr)     
    # pr_auc_topn_mean = ALL_pr_auc.mean()
    # roc_auc_topn_mean = ALL_roc_auc.mean()

    # ### Random
    # ALL_pr_auc = np.zeros(len(target_idx))
    # ALL_roc_auc = np.zeros(len(target_idx))
    # for i in range(len(target_idx)):
    #     topSE_idx = np.argsort(-score[:, target_idx[i]])[0:(topN-1)]
    #     prec, recall, tpr, fpr, _, _, _, _, _ = pr_roc_curve(Ground_Truth[topSE_idx, target_idx[i]], random_score[topSE_idx, target_idx[i]])
    #     ALL_pr_auc[i] = auc(recall, prec)
    #     ALL_roc_auc[i] = metrics.auc(fpr, tpr) 
    # pr_auc_topn_rd = ALL_pr_auc.mean()
    # roc_auc_topn_rd = ALL_roc_auc.mean()

    # pr_auc_all_mean = 0
    # roc_auc_all_mean = 0
    # F1_mean = 0
    # pr_auc_per_drug_mean = 0
    # roc_auc_per_drug_mean = 0
    # fmax_mean = 0
    # smin_mean = 0
    # pr_auc_per_drug = 0
    # roc_auc_per_drug = 0
    # fmax = 0
    # smin = 0
    # F1 = 0
    pr_auc_all_rd = 0
    roc_auc_all_rd = 0
    pr_auc_per_drug_rd = 0
    roc_auc_per_drug_rd = 0
    fmax_rd = 0
    smin_rd = 0
    F1_rd = 0
    roc_auc_topn_rd = 0
    pr_auc_topn_rd = 0



    print("-----")

    print("AUC-PR mean:", pr_auc_all_mean)
    # print("AUC-PR rd:", pr_auc_all_rd)
    print("AUC-PR all:", pr_auc_all)

    print("-----")

    print("AUC-ROC mean:", roc_auc_all_mean)
    # print("AUC-ROC rd:", roc_auc_all_rd)
    print("AUC-ROC all:", roc_auc_all)

    print("-----")

    print("AUC-PR per drug mean:", pr_auc_per_drug_mean)
    # print("AUC-PR per drug rd:", pr_auc_per_drug_rd)
    print("AUC-PR per drug:", pr_auc_per_drug)

    print("-----")

    print("AUC-ROC per drug mean:", roc_auc_per_drug_mean)
    # print("AUC-ROC per drug rd:", roc_auc_per_drug_rd)
    print("AUC-ROC per drug:", roc_auc_per_drug)

    print("-----")

    print("AUC-ROC top N mean:", roc_auc_topn_mean)
    # print("AUC-ROC top N rd:", roc_auc_topn_rd)
    print("AUC-ROC top N:", roc_auc_topn)

    print("-----")

    print("AUC-PR top N mean:", pr_auc_topn_mean)
    # print("AUC-PR top N rd:", pr_auc_topn_rd)
    print("AUC-PR top N:", pr_auc_topn)
    
    print("-----")

    print("F1 mean:", F1_mean)
    # print("F1 rd:", F1_rd)
    print("F1:", F1)

    print("-----")

    print("Fmax_mean", fmax_mean)
    # print("Fmax_rd", fmax_rd)
    print("Fmax", fmax)

    print("-----")

    print("Smin_mean", smin_mean)
    # print("Smin_rd", smin_rd)
    print("Smin", smin)



    Out1 = pd.DataFrame([prec, recall, prthreshold])
    Out2 = pd.DataFrame([fpr, tpr, rocthreshold])
    return Out1, Out2

In [14]:
def setvar_tune(size):
# set var for hyper pars tuning size is the hyper par size ALL_...
    global ALL_AUCPR_all_mean
    global ALL_AUROC_all_mean
    global ALL_AUCPR_per_drug_mean
    global ALL_AUROC_per_drug_mean
    global ALL_AUCPR_topn_mean
    global ALL_AUROC_topn_mean
    global ALL_F1_mean
    global ALL_Fmax_mean
    global ALL_Smin_mean

    global ALL_AUCPR_all_rd
    global ALL_AUROC_all_rd
    global ALL_AUCPR_per_drug_rd
    global ALL_AUROC_per_drug_rd
    global ALL_AUCPR_topn_rd
    global ALL_AUROC_topn_rd
    global ALL_F1_rd
    global ALL_Fmax_rd
    global ALL_Smin_rd

    global ALL_AUCPR_all
    global ALL_AUROC_all
    global ALL_AUCPR_per_drug
    global ALL_AUROC_per_drug
    global ALL_AUCPR_topn
    global ALL_AUROC_topn
    global ALL_F1
    global ALL_Fmax
    global ALL_Smin

    ALL_AUCPR_all_mean = np.zeros(size)
    ALL_AUROC_all_mean = np.zeros(size)
    ALL_AUCPR_per_drug_mean = np.zeros(size)
    ALL_AUROC_per_drug_mean = np.zeros(size)
    ALL_AUCPR_topn_mean = np.zeros(size)
    ALL_AUROC_topn_mean = np.zeros(size)
    ALL_F1_mean = np.zeros(size)
    ALL_Fmax_mean = np.zeros(size)
    ALL_Smin_mean = np.zeros(size)

    ALL_AUCPR_all_rd = np.zeros(size)
    ALL_AUROC_all_rd = np.zeros(size)
    ALL_AUCPR_per_drug_rd = np.zeros(size)
    ALL_AUROC_per_drug_rd = np.zeros(size)
    ALL_AUCPR_topn_rd = np.zeros(size)
    ALL_AUROC_topn_rd = np.zeros(size)
    ALL_F1_rd = np.zeros(size)
    ALL_Fmax_rd = np.zeros(size)
    ALL_Smin_rd = np.zeros(size)

    ALL_AUCPR_all = np.zeros(size)
    ALL_AUROC_all = np.zeros(size)
    ALL_AUCPR_per_drug = np.zeros(size)
    ALL_AUROC_per_drug = np.zeros(size)
    ALL_AUCPR_topn = np.zeros(size)
    ALL_AUROC_topn = np.zeros(size)
    ALL_F1 = np.zeros(size)
    ALL_Fmax = np.zeros(size)
    ALL_Smin = np.zeros(size)

In [15]:
def setvar_cv(FOLDS):
# set var for cv 
    global AUC_roc_all_mean
    global AUC_pr_all_mean
    global AUC_roc_per_drug_mean
    global AUC_pr_per_drug_mean
    global AUC_roc_topn_mean
    global AUC_pr_topn_mean
    global Fmax_mean
    global Smin_mean
    global F1_mean

    global AUC_roc_all
    global AUC_pr_all
    global AUC_roc_per_drug
    global AUC_pr_per_drug
    global AUC_roc_topn
    global AUC_pr_topn
    global Fmax
    global Smin
    global F1

    global AUC_roc_all_rd
    global AUC_pr_all_rd
    global AUC_roc_per_drug_rd
    global AUC_pr_per_drug_rd
    global AUC_roc_topn_rd
    global AUC_pr_topn_rd
    global Fmax_rd
    global Smin_rd
    global F1_rd
    
    AUC_roc_all_mean = np.zeros(FOLDS)
    AUC_pr_all_mean = np.zeros(FOLDS)
    AUC_roc_per_drug_mean = np.zeros(FOLDS)
    AUC_pr_per_drug_mean = np.zeros(FOLDS)
    AUC_roc_topn_mean = np.zeros(FOLDS)
    AUC_pr_topn_mean = np.zeros(FOLDS)
    Fmax_mean = np.zeros(FOLDS)
    Smin_mean = np.zeros(FOLDS)
    F1_mean = np.zeros(FOLDS)
    
    AUC_roc_all = np.zeros(FOLDS)
    AUC_pr_all = np.zeros(FOLDS)
    AUC_roc_per_drug = np.zeros(FOLDS)
    AUC_pr_per_drug = np.zeros(FOLDS)
    AUC_roc_topn = np.zeros(FOLDS)
    AUC_pr_topn = np.zeros(FOLDS)
    Fmax = np.zeros(FOLDS)
    Smin = np.zeros(FOLDS)
    F1 = np.zeros(FOLDS)

    AUC_roc_all_rd = np.zeros(FOLDS)
    AUC_pr_all_rd = np.zeros(FOLDS)
    AUC_roc_per_drug_rd = np.zeros(FOLDS)
    AUC_pr_per_drug_rd = np.zeros(FOLDS)
    AUC_roc_topn_rd = np.zeros(FOLDS)
    AUC_pr_topn_rd = np.zeros(FOLDS)
    Fmax_rd = np.zeros(FOLDS)
    Smin_rd = np.zeros(FOLDS)
    F1_rd = np.zeros(FOLDS)

In [16]:
def asgvar_tune(idx, results):
    # assign var for cv from results
    # f: size of hyper pars
    ALL_AUCPR_all_mean[idx] = results[0]
    ALL_AUROC_all_mean[idx] = results[1]
    ALL_F1_mean[idx] = results[2]
    ALL_AUCPR_per_drug_mean[idx] = results[3]
    ALL_AUROC_per_drug_mean[idx] = results[4]
    ALL_Fmax_mean[idx] = results[5]
    ALL_Smin_mean[idx] = results[6]
    ALL_AUCPR_all[idx] = results[7]
    ALL_AUROC_all[idx] = results[8]
    ALL_AUCPR_per_drug[idx] = results[9]
    ALL_AUROC_per_drug[idx] = results[10]
    ALL_Fmax[idx] = results[11]
    ALL_Smin[idx] = results[12]
    ALL_F1[idx] = results[13]
    ALL_AUCPR_all_rd[idx] = results[14]
    ALL_AUROC_all_rd[idx] = results[15]
    ALL_AUCPR_per_drug_rd[idx] = results[16]
    ALL_AUROC_per_drug_rd[idx] = results[17]
    ALL_Fmax_rd[idx] = results[18]
    ALL_Smin_rd[idx] = results[19]
    ALL_F1_rd[idx] = results[20]
    ALL_AUROC_topn_mean[idx] = results[21]
    ALL_AUROC_topn_rd[idx] = results[22]
    ALL_AUROC_topn[idx] = results[23]
    ALL_AUCPR_topn_mean[idx] = results[24]
    ALL_AUCPR_topn_rd[idx] = results[25]
    ALL_AUCPR_topn[idx] = results[26]

In [17]:
def asgvar_cv(f, results):
    # assign var for cv from results
    # f: size of hyper pars
    AUC_pr_all_mean[f] = results[0]
    AUC_roc_all_mean[f] = results[1]
    F1_mean[f] = results[2]
    AUC_pr_per_drug_mean[f] = results[3]
    AUC_roc_per_drug_mean[f] = results[4]
    Fmax_mean[f] = results[5]
    Smin_mean[f] = results[6]
    AUC_pr_all[f] = results[7]
    AUC_roc_all[f] = results[8]
    AUC_pr_per_drug[f] = results[9]
    AUC_roc_per_drug[f] = results[10]
    Fmax[f] = results[11]
    Smin[f] = results[12]
    F1[f] = results[13]
    AUC_pr_all_rd[f] = results[14]
    AUC_roc_all_rd[f] = results[15]
    AUC_pr_per_drug_rd[f] = results[16]
    AUC_roc_per_drug_rd[f] = results[17]
    Fmax_rd[f] = results[18]
    Smin_rd[f] = results[19]
    F1_rd[f] = results[20]
    AUC_roc_topn_mean[f] = results[21]
    AUC_roc_topn_rd[f] = results[22]
    AUC_roc_topn[f] = results[23]
    AUC_pr_topn_mean[f] = results[24]
    AUC_pr_topn_rd[f] = results[25]
    AUC_pr_topn[f] = results[26]

In [18]:
def tuning_plot(tuneVar, tune):
    if tune == None:
        return
    elif tune == "C":
        plt.figure()
        plt.plot(tuneVar, ALL_AUROC_all, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUROC_all_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUROC_all_rd, color='black', linewidth=0.5)
        plt.xlabel("C")
        plt.ylabel("AUROC")
        plt.title('AUROC_all-C')
        plt.show()
        
        plt.figure()
        plt.plot(tuneVar, ALL_AUCPR_all, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUCPR_all_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUCPR_all_rd, color='black', linewidth=0.5)
        plt.xlabel("C")
        plt.ylabel("AUPRC")
        plt.title('AUPRC_all-C')
        plt.show()     
        # per drug
        plt.figure()
        plt.plot(tuneVar, ALL_AUROC_per_drug, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUROC_per_drug_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUROC_per_drug_rd, color='black', linewidth=0.5)
        plt.xlabel("C")
        plt.ylabel("AUROC")
        plt.title('AUROC_per_drug-C')
        plt.show()
        
        plt.figure()
        plt.plot(tuneVar, ALL_AUCPR_per_drug, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUCPR_per_drug_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUCPR_per_drug_rd, color='black', linewidth=0.5)
        plt.xlabel("C")
        plt.ylabel("AUPRC")
        plt.title('AUPRC_per_drug-C')
        plt.show()
        # topn
        plt.figure()
        plt.plot(tuneVar, ALL_AUROC_topn, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUROC_topn_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUROC_topn_rd, color='black', linewidth=0.5)
        plt.xlabel("C")
        plt.ylabel("AUROC")
        plt.title('AUROC_topn-C')
        plt.show()
        
        plt.figure()
        plt.plot(tuneVar, ALL_AUCPR_topn, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUCPR_topn_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUCPR_topn_rd, color='black', linewidth=0.5)
        plt.xlabel("C")
        plt.ylabel("AUPRC")
        plt.title('AUPRC_topn-C')
        plt.show()
        # Fmax Smin
        plt.figure()
        plt.plot(tuneVar, ALL_Fmax, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_Fmax_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_Fmax_rd, color='black', linewidth=0.5)
        plt.xlabel("C")
        plt.ylabel("Fmax")
        plt.title('Fmax-C')
        plt.show()
    
        plt.figure()
        plt.plot(tuneVar, ALL_Smin, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_Smin_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_Smin_rd, color='black', linewidth=0.5)
        plt.xlabel("C")
        plt.ylabel("Smin")
        plt.title('Smin-C')
        plt.show()
    
        plt.figure()
        plt.plot(tuneVar, ALL_F1, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_F1_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_F1_rd, color='black', linewidth=0.5)
        plt.xlabel("C")
        plt.ylabel("F1")
        plt.title('F1-C')
        plt.show()
    elif tune == "lmd":
        plt.figure()
        plt.plot(tuneVar, ALL_AUROC_all, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUROC_all_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUROC_all_rd, color='black', linewidth=0.5)
        plt.xlabel("lambda")
        plt.ylabel("AUROC")
        plt.title('AUROC_all-lambda')
        plt.show()
        
        plt.figure()
        plt.plot(tuneVar, ALL_AUCPR_all, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUCPR_all_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUCPR_all_rd, color='black', linewidth=0.5)
        plt.xlabel("lambda")
        plt.ylabel("AUPRC")
        plt.title('AUPRC_all-lambda')
        plt.show()     
        # per drug
        plt.figure()
        plt.plot(tuneVar, ALL_AUROC_per_drug, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUROC_per_drug_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUROC_per_drug_rd, color='black', linewidth=0.5)
        plt.xlabel("lambda")
        plt.ylabel("AUROC")
        plt.title('AUROC_per_drug-lambda')
        plt.show()
        
        plt.figure()
        plt.plot(tuneVar, ALL_AUCPR_per_drug, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUCPR_per_drug_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUCPR_per_drug_rd, color='black', linewidth=0.5)
        plt.xlabel("lambda")
        plt.ylabel("AUPRC")
        plt.title('AUPRC_per_drug-lambda')
        plt.show()  
        # per drug
        plt.figure()
        plt.plot(tuneVar, ALL_AUROC_topn, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUROC_topn_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUROC_topn_rd, color='black', linewidth=0.5)
        plt.xlabel("lambda")
        plt.ylabel("AUROC")
        plt.title('AUROC_topn-lambda')
        plt.show()
        
        plt.figure()
        plt.plot(tuneVar, ALL_AUCPR_topn, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUCPR_topn_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUCPR_topn_rd, color='black', linewidth=0.5)
        plt.xlabel("lambda")
        plt.ylabel("AUPRC")
        plt.title('AUPRC_topn-lambda')
        plt.show() 
        # Fmax Smin
        plt.figure()
        plt.plot(tuneVar, ALL_Fmax, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_Fmax_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_Fmax_rd, color='black', linewidth=0.5)
        plt.xlabel("lambda")
        plt.ylabel("Fmax")
        plt.title('Fmax-lambda')
        plt.show()
    
        plt.figure()
        plt.plot(tuneVar, ALL_Smin, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_Smin_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_Smin_rd, color='black', linewidth=0.5)
        plt.xlabel("lambda")
        plt.ylabel("Smin")
        plt.title('Smin-lambda')
        plt.show()
    
        plt.figure()
        plt.plot(tuneVar, ALL_F1, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_F1_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_F1_rd, color='black', linewidth=0.5)
        plt.xlabel("lambda")
        plt.ylabel("F1")
        plt.title('F1-lambda')
        plt.show()
    elif tune == "component":
        plt.figure()
        plt.plot(tuneVar, ALL_AUROC_all, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUROC_all_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUROC_all_rd, color='black', linewidth=0.5)
        plt.xlabel("k")
        plt.ylabel("AUROC")
        plt.title('AUROC_all-k')
        plt.show()
        
        plt.figure()
        plt.plot(tuneVar, ALL_AUCPR_all, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUCPR_all_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUCPR_all_rd, color='black', linewidth=0.5)
        plt.xlabel("k")
        plt.ylabel("AUPRC")
        plt.title('AUPRC_all-k')
        plt.show()     
        # per drug
        plt.figure()
        plt.plot(tuneVar, ALL_AUROC_per_drug, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUROC_per_drug_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUROC_per_drug_rd, color='black', linewidth=0.5)
        plt.xlabel("k")
        plt.ylabel("AUROC")
        plt.title('AUROC_per_drug-k')
        plt.show()
        
        plt.figure()
        plt.plot(tuneVar, ALL_AUCPR_per_drug, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUCPR_per_drug_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUCPR_per_drug_rd, color='black', linewidth=0.5)
        plt.xlabel("k")
        plt.ylabel("AUPRC")
        plt.title('AUPRC_per_drug-k')
        plt.show()  
        # per drug
        plt.figure()
        plt.plot(tuneVar, ALL_AUROC_topn, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUROC_topn_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUROC_topn_rd, color='black', linewidth=0.5)
        plt.xlabel("k")
        plt.ylabel("AUROC")
        plt.title('AUROC_topn-k')
        plt.show()
        
        plt.figure()
        plt.plot(tuneVar, ALL_AUCPR_topn, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUCPR_topn_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUCPR_topn_rd, color='black', linewidth=0.5)
        plt.xlabel("k")
        plt.ylabel("AUPRC")
        plt.title('AUPRC_topn-k')
        plt.show() 
        # Fmax Smin
        plt.figure()
        plt.plot(tuneVar, ALL_Fmax, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_Fmax_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_Fmax_rd, color='black', linewidth=0.5)
        plt.xlabel("k")
        plt.ylabel("Fmax")
        plt.title('Fmax-k')
        plt.show()
    
        plt.figure()
        plt.plot(tuneVar, ALL_Smin, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_Smin_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_Smin_rd, color='black', linewidth=0.5)
        plt.xlabel("k")
        plt.ylabel("Smin")
        plt.title('Smin-k')
        plt.show()
    
        plt.figure()
        plt.plot(tuneVar, ALL_F1, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_F1_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_F1_rd, color='black', linewidth=0.5)
        plt.xlabel("k")
        plt.ylabel("F1")
        plt.title('F1-k')
        plt.show()

In [19]:
def tuning_results(tuneVar):
    idx = np.argmax(ALL_AUCPR_all)
    Var = tuneVar[idx]
    Value = ALL_AUCPR_all[idx]

    # idx = np.argmax(ALL_AUCPR_per_drug)
    # Var = tuneVar[idx]
    # Value = ALL_AUCPR_per_drug[idx]
    print("best hyperpar: ", Var)
    print("AUPRC: ", Value)

    
    # ALL_AUCPR_all[idx]
    # ALL_AUROC_all[idx]
    # ALL_AUCPR_per_drug[idx]
    # ALL_AUROC_per_drug[idx]
    # ALL_Fmax[idx]
    # ALL_Smin[idx]
    # ALL_F1[idx]
    return Var, Value

In [20]:
def setvar_besttune(innerfolds):
    global besttunevalue
    global besttunevar
    besttunevalue = np.zeros(innerfolds) # best metric value
    besttunevar = np.zeros(innerfolds) # the value of best var
    besttunevar = besttunevar.tolist()

In [21]:
def asg_besttune(f, value, var):
    besttunevalue[f] = value
    besttunevar[f] = var

In [22]:
def besttune():
    idx = np.argmax(besttunevalue)
    value = besttunevalue[idx]
    var = besttunevar[idx]
    return value, var

In [23]:
def cv_results():
    # print("Mean AUC-PR", AUC_test_pr.mean()," ", "Standard Deviation:", AUC_test_pr.std())
    # print("Mean AUC-ROC",AUC_test_roc.mean()," ", "Standard Deviation:", AUC_test_roc.std())
    # print("Mean NDCG:", ndcg_folds.mean(),"  " , " Standard Deviation:", ndcg_folds.std())
    
    print("Mean AUC_pr_all_mean:", AUC_pr_all_mean.mean()," ", "Standard Deviation:", AUC_pr_all_mean.std())
    print("Mean AUC_roc_all_mean", AUC_roc_all_mean.mean()," ", "Standard Deviation:", AUC_roc_all_mean.std())
    print("Mean AUC_pr_per_drug_mean", AUC_pr_per_drug_mean.mean()," ", "Standard Deviation:", AUC_pr_per_drug_mean.std())
    print("Mean AUC_roc_per_drug_mean", AUC_roc_per_drug_mean.mean()," ", "Standard Deviation:", AUC_roc_per_drug_mean.std())
    print("Mean AUC_pr_topn_mean", AUC_pr_topn_mean.mean()," ", "Standard Deviation:", AUC_pr_topn_mean.std())
    print("Mean AUC_roc_topn_mean", AUC_roc_topn_mean.mean()," ", "Standard Deviation:", AUC_roc_topn_mean.std())
    print("Mean Fmax_mean", Fmax_mean.mean()," ", "Standard Deviation:", Fmax_mean.std())
    print("Mean Smin_mean", Smin_mean.mean()," ", "Standard Deviation:", Smin_mean.std())
    print("Mean F1_mean", F1_mean.mean()," ", "Standard Deviation:", F1_mean.std())
    print("-----------")
    results_mean = np.array([AUC_pr_all_mean, AUC_roc_all_mean, AUC_pr_per_drug_mean, AUC_roc_per_drug_mean,\
        AUC_pr_topn_mean, AUC_roc_topn_mean, Fmax_mean, Smin_mean, F1_mean])

    print("Mean AUC_pr_all", AUC_pr_all.mean()," ", "Standard Deviation:", AUC_pr_all.std())
    print("Mean AUC_roc_all", AUC_roc_all.mean()," ", "Standard Deviation:", AUC_roc_all.std())
    print("Mean AUC_pr_per_drug", AUC_pr_per_drug.mean()," ", "Standard Deviation:", AUC_pr_per_drug.std())
    print("Mean AUC_roc_per_drug", AUC_roc_per_drug.mean()," ", "Standard Deviation:", AUC_roc_per_drug.std())
    print("Mean AUC_pr_topn", AUC_pr_topn.mean()," ", "Standard Deviation:", AUC_pr_topn.std())
    print("Mean AUC_roc_topn", AUC_roc_topn.mean()," ", "Standard Deviation:", AUC_roc_topn.std())
    print("Mean Fmax", Fmax.mean()," ", "Standard Deviation:", Fmax.std())
    print("Mean Smin", Smin.mean()," ", "Standard Deviation:", Smin.std())
    print("Mean F1", F1.mean()," ", "Standard Deviation:", F1.std())
    print("-----------")
    results = np.array([AUC_pr_all, AUC_roc_all, AUC_pr_per_drug, AUC_roc_per_drug,\
        AUC_pr_topn, AUC_roc_topn, Fmax, Smin, F1])

    print("Mean AUC_pr_all_rd", AUC_pr_all_rd.mean()," ", "Standard Deviation:", AUC_pr_all_rd.std())
    print("Mean AUC_roc_all_rd", AUC_roc_all_rd.mean()," ", "Standard Deviation:", AUC_roc_all_rd.std())
    print("Mean AUC_pr_per_drug_rd", AUC_pr_per_drug_rd.mean()," ", "Standard Deviation:", AUC_pr_per_drug_rd.std())
    print("Mean AUC_roc_per_drug_rd", AUC_roc_per_drug_rd.mean()," ", "Standard Deviation:", AUC_roc_per_drug_rd.std())
    print("Mean AUC_pr_topn_rd", AUC_pr_topn_rd.mean()," ", "Standard Deviation:", AUC_pr_topn_rd.std())
    print("Mean AUC_roc_topn_rd", AUC_roc_topn_rd.mean()," ", "Standard Deviation:", AUC_roc_topn_rd.std())
    print("Mean Fmax_rd", Fmax_rd.mean()," ", "Standard Deviation:", Fmax_rd.std())
    print("Mean Smin_rd", Smin_rd.mean()," ", "Standard Deviation:", Smin_rd.std())
    print("Mean F1_rd", F1_rd.mean()," ", "Standard Deviation:", F1_rd.std())
    return results_mean, results

In [24]:
def tuning_loop(innermatrix, idx_train_inner, idx_test_inner, feature_matrix_inner1, feature_matrix_inner2, hyperparList, i):
    
    l1,l2,s,k = hyperparList[i]
    idx_target_inner = idx_test_inner
    print('target size:', len(idx_target_inner))
    results = innerfold(idx_target_inner,idx_test_inner,feature_matrix1=feature_matrix_inner1,feature_matrix2=feature_matrix_inner2,matrix=innermatrix,l1=l1,l2=l2,s=s,k=k)
    asgvar_tune(i, results=results)
    print("------ lmdKR: ", l1, "lmdGNMF: ", l2, "sigma: ", s, "k: ", k, "------")

In [25]:
def main(method_option,normalization=True,Validation=False,sets="intersect", l1=0.5, l2=0, s=0.5, k=20):
    random.seed(1949) # for dataset split
    np.random.seed(1949) # for matrix initialization
    option(method_option)

    random.seed(1949) # for dataset split
    np.random.seed(1949) # for matrix initialization
    df = pd.read_csv("data/side-effect-and-drug_name_upper.tsv",sep = "\t")
    drug_id = df["drugbank_id"] # put col of df in var
    drug_name = df["drugbank_name"]
    side_effect = df["side_effect_name"]
    
    
    edgelist1 = zip(side_effect, drug_name)
    ##making Biparite Graph##
    B = nx.DiGraph()
    B.add_nodes_from(side_effect,bipartite = 0)
    B.add_nodes_from(drug_name,bipartite = 1)
    B.add_edges_from(edgelist1)
    # B.add_weighted_edges_from(edgelist2)
    drug_nodes = {n for n, d in B.nodes(data=True) if d['bipartite']==1}
    side_effect_nodes = {n for n, d in B.nodes(data=True) if d['bipartite']==0}
    drug_nodes = list(drug_nodes)
    drug_nodes.sort()
    side_effect_nodes = list(side_effect_nodes)
    side_effect_nodes.sort()
    ###Getting the Bi-Adjacency matrix between side effects and drugs ###################
    matrix_all = biadjacency_matrix(B, row_order = side_effect_nodes, column_order = drug_nodes) # create biadjacency matrix for drug side effect graph
    matrix_all = matrix_all.A
    m_all,n_all = matrix_all.shape # number of side effect # number of drug
    
    
    ### Setting validation set / training set / testing set ###
    validate_sz = int(0.25 * n_all)
    IDX_all = list(range(n_all))
    random.shuffle(IDX_all)
    IDX_validate = sorted(IDX_all[0:validate_sz])
    print("first few validation set idx:")
    print(IDX_validate[0:10])
    IDX_validate_diff = np.setdiff1d(IDX_all, IDX_validate)
    matrix = matrix_all[:, IDX_validate_diff].copy()
    # featureMat1 = featureMat1_all[IDX_validate_diff, :][:, IDX_validate_diff].copy()
    # featureMat2 = featureMat2_all[IDX_validate_diff, :][:, IDX_validate_diff].copy()
    # print("WMK shape:")
    # print(featureMat1.shape)
    
    df1 = pd.read_csv("data/intersection_DGIdb_mat.tsv",sep = "\t")
    df2 = pd.read_csv("data/intersection_Fingerprint_mat.tsv",sep = "\t")
    featureMat1_all = FeaturePreprocess(df1, drug_nodes=drug_nodes)
    featureMat2_all = FeaturePreprocess(df2, drug_nodes=drug_nodes)
    # drug_nodes_feature1 = featureMat1_all.index
    # drug_nodes_feature2 = featureMat2_all.index
    featureMat1 = featureMat1_all[IDX_validate_diff, :].copy()
    featureMat2 = featureMat2_all[IDX_validate_diff, :].copy()
    
    
    non_zero_idx_union = np.hstack(np.where(~((featureMat1.sum(1) == 0) & (featureMat2.sum(1) == 0))))
    non_zero_idx_missing = np.hstack(np.where(~(~(featureMat1.sum(1) == 0) & ~(featureMat2.sum(1) == 0))))
    non_zero_idx_intersect = np.hstack(np.where(~(featureMat1.sum(1) == 0) & ~(featureMat2.sum(1) == 0)))
    if sets == "union":
        # union
        matrix = matrix[:, non_zero_idx_union].copy()
        featureMat1 = featureMat1[non_zero_idx_union, :].copy()
        featureMat2 = featureMat2[non_zero_idx_union, :].copy()
    elif sets == "intersect":
        # intersect
        non_zero_idx_intersect_all = np.hstack(np.where(~(featureMat1_all.sum(1) == 0) & ~(featureMat2_all.sum(1) == 0)))
    
        matrix_all = matrix_all[:, non_zero_idx_intersect_all].copy()
        featureMat1_all = featureMat1_all[non_zero_idx_intersect_all, :].copy()
        featureMat2_all = featureMat2_all[non_zero_idx_intersect_all, :].copy()
    
        matrix = matrix[:, non_zero_idx_intersect].copy()
        featureMat1 = featureMat1[non_zero_idx_intersect, :].copy()
        featureMat2 = featureMat2[non_zero_idx_intersect, :].copy()
    
        IDX_validate = np.setdiff1d(non_zero_idx_intersect_all, IDX_validate_diff)
        IDX_validate_diff = np.setdiff1d(non_zero_idx_intersect_all, IDX_validate)
    
        drug_nodes_intersect_all = np.array(drug_nodes)[non_zero_idx_intersect_all]
        drug_nodes_intersect_validate_diff = np.array(drug_nodes)[IDX_validate_diff]
        drug_nodes_intersect_validate = np.array(drug_nodes)[IDX_validate]
    
        IDX_validate = np.array([x for x in range(len(drug_nodes_intersect_all)) if drug_nodes_intersect_all[x] in drug_nodes_intersect_validate])
        IDX_validate_diff = np.array([x for x in range(len(drug_nodes_intersect_all)) if drug_nodes_intersect_all[x] in drug_nodes_intersect_validate_diff])
    
    m,n = matrix.shape # number of side effect # number of drug


    random.seed(1949) # for dataset split
    np.random.seed(1949) # for matrix initialization
    start_time = time.time()



    FOLDS = 5
    innerFOLDS = 4
    ####for test sets####
    setvar_cv(FOLDS)

    sz = n
    IDX = list(range(sz))
    fsz = int(sz/FOLDS)
    random.shuffle(IDX)
    IDX = np.array(IDX)
    offset = 0

    innersz = sz - fsz
    innerIDX = list(range(innersz))
    random.shuffle(innerIDX)
    innerIDX = np.array(innerIDX)
    innerfsz = int(innersz / innerFOLDS)
    inneroffset = 0
    # setvar_cv(FOLDS=FOLDS)

    if Validation == "nested_cv":
        for f in range(FOLDS):  # range(FOLDS):
            offset = 0 + fsz*f 
            idx_test = IDX[offset:offset + fsz]
    
            idx_train = IDX[np.setdiff1d(np.arange(len(IDX)), np.arange(offset,offset + fsz))]
            print("Fold:",f)
                
            innermatrix = matrix[:, idx_train]
    
            innerfeatureMat1 = featureMat1[idx_train, :].copy()
            innerfeatureMat2 = featureMat2[idx_train, :].copy()
            # if normalization == True:
            #     innerfeatureMat = (innerfeatureMat.copy() - innerfeatureMat.mean()) / innerfeatureMat.std()
            # print(type(weight_matrix1_inner))
    
            setvar_besttune(innerFOLDS)
    
            for innerf in range(innerFOLDS):
                inneroffset = 0 + innerf*innerfsz
                idx_test_inner = innerIDX[inneroffset:inneroffset + innerfsz]
                idx_train_inner = innerIDX[np.array(np.setdiff1d(np.arange(len(idx_train)), np.arange(inneroffset,inneroffset + innerfsz)))]
    
                print("Inner Fold:", innerf)


                
                if method_option == "GRNMF1":
                    lmd1 = (10**np.arange(-3, 1, 1, dtype=float)).tolist()
                    lmd2 = (np.arange(0, 1, 1, dtype=float)).tolist()
                    sigma = (10**np.arange(0, 3, 1, dtype=float)).tolist()
                    comp = np.arange(5, 25, 5, dtype=int).tolist()
                elif method_option == "GRNMF2":
                    lmd1 = (10**np.arange(-4, 2, 1, dtype=float)).tolist()
                    lmd2 = (np.arange(0, 1, 1, dtype=float)).tolist()
                    sigma = (10**np.arange(0, 5, 1, dtype=float)).tolist()
                    comp = np.arange(5, 30, 5, dtype=int).tolist()
                if method_option == "GRNMF1&2":
                    lmd1 = (10**np.arange(-3, 1, 1, dtype=float)).tolist()
                    lmd2 = (np.arange(0, 1, 1, dtype=float)).tolist()
                    sigma = (10**np.arange(0, 3, 1, dtype=float)).tolist()
                    comp = np.arange(10, 45, 5, dtype=int).tolist()
                    
                hyperparList = list(product(lmd1, lmd2, sigma, comp))

    
                setvar_tune(len(hyperparList))
    
                with parallel_backend('threading'):
                    Parallel(n_jobs=5)(delayed(tuning_loop)(innermatrix = innermatrix, idx_train_inner = idx_train_inner, \
                            idx_test_inner = idx_test_inner, feature_matrix_inner1 = innerfeatureMat1, feature_matrix_inner2 = innerfeatureMat2, \
                                hyperparList = hyperparList, i = i) \
                                    for i in range(len(hyperparList)))
    
                # tuning_plot(tuneVar=C, tune="C")
                hyperpars, evalValue = tuning_results(tuneVar=hyperparList)
    
    
                asg_besttune(innerf, value=evalValue, var=hyperpars)
                    
            _, bestHyperPars = besttune()
    
            print("--- tuning end ---")
            l1, l2, s, k = bestHyperPars
            idx_target = idx_test
            print('target size:', len(idx_target))
    
            print("------ lmdKR: ", l1, "lmdGNMF: ", l2, "sigma: ", s, "k: ", k, "------")
            # if normalization == True:
            #     featureMat = (featureMat.copy() - featureMat.mean()) / featureMat.std()
    
            results = fold(idx_target,idx_test,featureMat1,featureMat2,matrix,l1=l1,l2=l2,s=s,k=k)
            asgvar_cv(f=f, results=results)

        out_mean, out = cv_results()
        return out_mean, out

    elif Validation == "cv":
        # if normalization == True:
        #     featureMat = (featureMat.copy() - featureMat.mean()) / featureMat.std()
        
        setvar_besttune(FOLDS)

        for f in range(FOLDS):
            offset = 0 + fsz*f 
            idx_test = IDX[offset:offset + fsz]
            idx_train = IDX[np.setdiff1d(np.arange(len(IDX)), np.arange(offset,offset + fsz))]

            print("Fold:", f)
            if method_option == "GRNMF1":
                lmd1 = (10**np.arange(-3, 1, 1, dtype=float)).tolist()
                lmd2 = (np.arange(0, 1, 1, dtype=float)).tolist()
                sigma = (10**np.arange(0, 3, 1, dtype=float)).tolist()
                comp = np.arange(5, 40, 5, dtype=int).tolist()
            elif method_option == "GRNMF2":
                lmd1 = (10**np.arange(-3, 2, 1, dtype=float)).tolist()
                lmd2 = (np.arange(0, 1, 1, dtype=float)).tolist()
                sigma = (10**np.arange(0, 4, 1, dtype=float)).tolist()
                comp = np.arange(5, 30, 5, dtype=int).tolist()
            if method_option == "GRNMF1&2":
                lmd1 = (10**np.arange(-3, 2, 1, dtype=float)).tolist()
                lmd2 = (np.arange(0, 1, 1, dtype=float)).tolist()
                sigma = (10**np.arange(0, 3, 1, dtype=float)).tolist()
                comp = np.arange(5, 40, 5, dtype=int).tolist()
            hyperparList = list(product(lmd1, lmd2, sigma, comp))

            setvar_tune(len(hyperparList))
    
            with parallel_backend('threading'):
                Parallel(n_jobs=5)(delayed(tuning_loop)(innermatrix = matrix, idx_train_inner = idx_train, \
                        idx_test_inner = idx_test, feature_matrix_inner1 = featureMat1, feature_matrix_inner2 = featureMat2, \
                            hyperparList = hyperparList, i = i) \
                                for i in range(len(hyperparList)))
    
            hyperpars, evalValue = tuning_results(tuneVar=hyperparList)
            asg_besttune(f, value=evalValue, var=hyperpars)

            # l1, l2, s = hyperpars
            # idx_target = idx_test
            # print('target size:', len(idx_target))

            # print("------ lmd1: ", l1, "lmd1: ", l2, "sigma: ", s, "------")
            # if normalization == True:
            #     featureMat = (featureMat.copy() - featureMat.mean()) / featureMat.std()
    
            # results = fold(idx_target,idx_test,featureMat1,featureMat2,matrix,l1=l1,l2=l2,s=s)
            # asgvar_cv(f=f, results=results)
    
        print("--- tuning end ---")
        # cv_results()
        _, bestHyperPars = besttune()
    elif Validation == "Validation":

        # validation
        idx_test = IDX_validate
        idx_train = IDX_validate_diff
        idx_target = idx_test
        print('target size:', len(idx_target))
        print("------ lmdKR: ", l1, "lmdGRNMF: ", l2, "sigma: ", s, "k: ", k, "------")
        # if normalization == True:
        #     featureMat_all = (featureMat_all.copy() - featureMat_all.mean()) / featureMat_all.std()
        results = fold(idx_target,idx_test,featureMat1_all,featureMat2_all,matrix_all,l1=l1,l2=l2,s=s,k=k)
        return
    elif Validation == "plot":

        # validation
        idx_test = IDX_validate
        idx_train = IDX_validate_diff
        idx_target = idx_test
        print('target size:', len(idx_target))
        print("------ lmdKR: ", l1, "lmdGRNMF: ", l2, "sigma: ", s, "k: ", k, "------")
        # if normalization == True:
        #     featureMat_all = (featureMat_all.copy() - featureMat_all.mean()) / featureMat_all.std()
        pr, roc = plotfold(idx_target,idx_test,featureMat1_all,featureMat2_all,matrix_all,l1=l1,l2=l2,s=s,k=k)
        return pr, roc

In [154]:
results_GRNMF1_mean, results_GRNMF1 = main(method_option="GRNMF1",Validation="nested_cv")

first few validation set idx:
[2, 7, 12, 13, 17, 20, 26, 39, 45, 47]
Fold: 0
Inner Fold: 0
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]

First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size:target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size:target size: 11

In [26]:
results_GRNMF2_mean, results_GRNMF2 = main(method_option="GRNMF2",Validation="nested_cv")

first few validation set idx:
[2, 7, 12, 13, 17, 20, 26, 39, 45, 47]
Fold: 0
Inner Fold: 0
target size: 115target size: 115

First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
GRNMF2 starts:
GRNMF2 starts:
GRNMF2 starts:
GRNMF2 starts:
GRNMF2 starts:
Converged in iteration:  37 ObjDiff:  49.41347033004786 Obj:  25234.70296760566
Converged in iterat

In [166]:
results_GRNMF1_2_mean, results_GRNMF1_2 = main(method_option="GRNMF1&2",Validation="nested_cv")

first few validation set idx:
[2, 7, 12, 13, 17, 20, 26, 39, 45, 47]
Fold: 0
Inner Fold: 0
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
GRNMF1&2 starts:
GRNMF1&2 starts:
GRNMF1&2 starts:
GRNMF1&2 starts:
GRNMF1&2 starts:
Converged in iteration:  37 ObjDiff:  49.41347033004786 Obj:  25234.70296760566
Converged

In [157]:
main(method_option="GRNMF1",Validation="cv") # 0.01 10 20

first few validation set idx:
[2, 7, 12, 13, 17, 20, 26, 39, 45, 47]
Fold: 0
target size: 115
target size: 115First few target index:target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
 target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]

First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
[384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: 

In [158]:
main(method_option="GRNMF2",Validation="cv") # 0.1 10 20

first few validation set idx:
[2, 7, 12, 13, 17, 20, 26, 39, 45, 47]
Fold: 0
target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
First few mask index:target size: 115 [384 293 396 263  31 466 141 431  32 337]

target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: 

In [159]:
main(method_option="GRNMF1&2",Validation="cv") # 0.01 10 25

first few validation set idx:
[2, 7, 12, 13, 17, 20, 26, 39, 45, 47]
Fold: 0
target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: 

In [162]:
main(method_option="GRNMF1", Validation="Validation", l1=0.01, l2=0, s=10, k=20)

first few validation set idx:
[2, 7, 12, 13, 17, 20, 26, 39, 45, 47]
target size: 201
------ lmdKR:  0.01 lmdGRNMF:  0 sigma:  10 k:  20 ------
First few target index: [ 4 11 14 27 32 34 35 37 41 47]
First few mask index: [ 4 11 14 27 32 34 35 37 41 47]
GRNMF1 starts:
Converged in iteration:  33 ObjDiff:  82.66115328598971 Obj:  43535.147895502996
GRNMF1 ends:
proportion of ground truth: 0.02256888445786783
---evaluation---
-----
AUC-PR mean: 0
AUC-PR all: 0.40797219094123977
-----
AUC-ROC mean: 0
AUC-ROC all: 0.9112659568021647
-----
AUC-PR per drug mean: 0
AUC-PR per drug: 0
-----
AUC-ROC per drug mean: 0
AUC-ROC per drug: 0
-----
AUC-ROC top N mean: 0
AUC-ROC top N: 0
-----
AUC-PR top N mean: 0
AUC-PR top N: 0
-----
F1 mean: 0
F1: 0
-----
Fmax_mean 0
Fmax 0
-----
Smin_mean 0
Smin 0


In [163]:
main(method_option="GRNMF2", Validation="Validation", l1=0.1, l2=0, s=10, k=20) 

first few validation set idx:
[2, 7, 12, 13, 17, 20, 26, 39, 45, 47]
target size: 201
------ lmdKR:  0.1 lmdGRNMF:  0 sigma:  10 k:  20 ------
First few target index: [ 4 11 14 27 32 34 35 37 41 47]
First few mask index: [ 4 11 14 27 32 34 35 37 41 47]
GRNMF2 starts:
Converged in iteration:  33 ObjDiff:  82.66115328598971 Obj:  43535.147895502996
GRNMF2 ends:
proportion of ground truth: 0.02256888445786783
---evaluation---
-----
AUC-PR mean: 0
AUC-PR all: 0.3738913225650598
-----
AUC-ROC mean: 0
AUC-ROC all: 0.9066122269309099
-----
AUC-PR per drug mean: 0
AUC-PR per drug: 0
-----
AUC-ROC per drug mean: 0
AUC-ROC per drug: 0
-----
AUC-ROC top N mean: 0
AUC-ROC top N: 0
-----
AUC-PR top N mean: 0
AUC-PR top N: 0
-----
F1 mean: 0
F1: 0
-----
Fmax_mean 0
Fmax 0
-----
Smin_mean 0
Smin 0


In [164]:
main(method_option="GRNMF1&2", Validation="Validation", l1=0.01, l2=0, s=10, k=25) 

first few validation set idx:
[2, 7, 12, 13, 17, 20, 26, 39, 45, 47]
target size: 201
------ lmdKR:  0.01 lmdGRNMF:  0 sigma:  10 k:  25 ------
First few target index: [ 4 11 14 27 32 34 35 37 41 47]
First few mask index: [ 4 11 14 27 32 34 35 37 41 47]
GRNMF1&2 starts:
Converged in iteration:  29 ObjDiff:  129.4298518763535 Obj:  42700.51214593004
GRNMF1&2 ends:
proportion of ground truth: 0.02256888445786783
---evaluation---
-----
AUC-PR mean: 0
AUC-PR all: 0.4147354246029788
-----
AUC-ROC mean: 0
AUC-ROC all: 0.9116486151924379
-----
AUC-PR per drug mean: 0
AUC-PR per drug: 0
-----
AUC-ROC per drug mean: 0
AUC-ROC per drug: 0
-----
AUC-ROC top N mean: 0
AUC-ROC top N: 0
-----
AUC-PR top N mean: 0
AUC-PR top N: 0
-----
F1 mean: 0
F1: 0
-----
Fmax_mean 0
Fmax 0
-----
Smin_mean 0
Smin 0


In [55]:
KRNMF1_pr, KRNMF1_roc = \
    main(method_option="GRNMF1", Validation="plot", l1=0.01, l2=0, s=10, k=20)          
KRNMF1_pr.T.to_csv("Figs/KRNMF1_pr.csv", index=False)
KRNMF1_roc.T.to_csv("Figs/KRNMF1_roc.csv", index=False)

first few validation set idx:
[2, 7, 12, 13, 17, 20, 26, 39, 45, 47]
target size: 201
------ lmdKR:  0.01 lmdGRNMF:  0 sigma:  10 k:  20 ------
First few target index: [ 4 11 14 27 32 34 35 37 41 47]
First few mask index: [ 4 11 14 27 32 34 35 37 41 47]
GRNMF1 starts:
Converged in iteration:  33 ObjDiff:  82.66115328598971 Obj:  43535.147895502996
GRNMF1 ends:
proportion of ground truth: 0.02256888445786783
---evaluation---
-----
AUC-PR mean: 0
AUC-PR all: 0.40797219094123977
-----
AUC-ROC mean: 0
AUC-ROC all: 0.9112659568021648
-----
AUC-PR per drug mean: 0
AUC-PR per drug: 0
-----
AUC-ROC per drug mean: 0
AUC-ROC per drug: 0
-----
AUC-ROC top N mean: 0
AUC-ROC top N: 0
-----
AUC-PR top N mean: 0
AUC-PR top N: 0
-----
F1 mean: 0
F1: 0
-----
Fmax_mean 0
Fmax 0
-----
Smin_mean 0
Smin 0


In [56]:
KRNMF2_pr, KRNMF2_roc = \
    main(method_option="GRNMF2", Validation="plot", l1=0.1, l2=0, s=10, k=20)
KRNMF2_pr.T.to_csv("Figs/KRNMF2_pr.csv", index=False)
KRNMF2_roc.T.to_csv("Figs/KRNMF2_roc.csv", index=False)

first few validation set idx:
[2, 7, 12, 13, 17, 20, 26, 39, 45, 47]
target size: 201
------ lmdKR:  0.1 lmdGRNMF:  0 sigma:  10 k:  20 ------
First few target index: [ 4 11 14 27 32 34 35 37 41 47]
First few mask index: [ 4 11 14 27 32 34 35 37 41 47]
GRNMF2 starts:
Converged in iteration:  33 ObjDiff:  82.66115328598971 Obj:  43535.147895502996
GRNMF2 ends:
proportion of ground truth: 0.02256888445786783
---evaluation---
-----
AUC-PR mean: 0
AUC-PR all: 0.37389132256505975
-----
AUC-ROC mean: 0
AUC-ROC all: 0.9066122269309099
-----
AUC-PR per drug mean: 0
AUC-PR per drug: 0
-----
AUC-ROC per drug mean: 0
AUC-ROC per drug: 0
-----
AUC-ROC top N mean: 0
AUC-ROC top N: 0
-----
AUC-PR top N mean: 0
AUC-PR top N: 0
-----
F1 mean: 0
F1: 0
-----
Fmax_mean 0
Fmax 0
-----
Smin_mean 0
Smin 0


In [57]:
KRNMF12_pr, KRNMF12_roc = \
    main(method_option="GRNMF1&2", Validation="plot", l1=0.01, l2=0, s=10, k=25)
KRNMF12_pr.T.to_csv("Figs/KRNMF12_pr.csv", index=False)
KRNMF12_roc.T.to_csv("Figs/KRNMF12_roc.csv", index=False)

first few validation set idx:
[2, 7, 12, 13, 17, 20, 26, 39, 45, 47]
target size: 201
------ lmdKR:  0.01 lmdGRNMF:  0 sigma:  10 k:  25 ------
First few target index: [ 4 11 14 27 32 34 35 37 41 47]
First few mask index: [ 4 11 14 27 32 34 35 37 41 47]
GRNMF1&2 starts:
Converged in iteration:  29 ObjDiff:  129.4298518763535 Obj:  42700.51214593004
GRNMF1&2 ends:
proportion of ground truth: 0.02256888445786783
---evaluation---
-----
AUC-PR mean: 0
AUC-PR all: 0.4147354246029788
-----
AUC-ROC mean: 0
AUC-ROC all: 0.9116486151924379
-----
AUC-PR per drug mean: 0
AUC-PR per drug: 0
-----
AUC-ROC per drug mean: 0
AUC-ROC per drug: 0
-----
AUC-ROC top N mean: 0
AUC-ROC top N: 0
-----
AUC-PR top N mean: 0
AUC-PR top N: 0
-----
F1 mean: 0
F1: 0
-----
Fmax_mean 0
Fmax 0
-----
Smin_mean 0
Smin 0
