In [1]:
#!/usr/bin/env python

import pandas as pd
import networkx as nx
from networkx.algorithms.bipartite.matrix import biadjacency_matrix
import numpy as np
from sklearn.metrics import precision_recall_curve, auc
from sklearn.preprocessing import normalize
import random
from sklearn import metrics
import time
from sklearn.decomposition import NMF
from scipy import sparse
# from libnmf.gnmf import GNMF

import numpy.linalg as LA

from scipy import stats

import matplotlib.pyplot as plt
# import math
# from scipy.linalg import logm

# from sklearn.metrics import confusion_matrix

from sklearn.linear_model import LinearRegression

# import seaborn as sns
from scipy import optimize
# from sklearn.metrics import r2_score
# from scipy.interpolate import make_interp_spline
# from sklearn.feature_selection import chi2
# import loess.loess_1d as l1d

# import tensorflow as tf

import multiprocessing
from joblib import Parallel, delayed
from math import sqrt
from sklearn.utils import parallel_backend

from itertools import product

from sklearn.metrics import roc_auc_score
from sklearn.neighbors import NearestNeighbors
from sklearn.linear_model import Ridge

random.seed(1949) # for dataset split
np.random.seed(1949) # for matrix initialization

In [2]:
def RLN(feature_matrix, idx_train, idx_test):
    X = feature_matrix[idx_train, :]
    X_new = feature_matrix[idx_test, :]

    neigh = NearestNeighbors(n_neighbors = 200)
    neigh.fit(X)
    # N = neigh.kneighbors(X, 200, return_distance=False)
    # X_knn = X[N]
    # N_new = neigh.kneighbors(X_new, 200, return_distance=False)
    # X_knn_new = X[N_new]
    W = np.zeros([len(idx_train), len(idx_train)])
    W_new = np.zeros([len(idx_test), len(idx_train)])
    clf = Ridge(alpha=1)

    N = neigh.kneighbors(X, 200, return_distance=False)
    for i in range(len(idx_train)):
        # print("test")
        X_knn = X[N[i], :]
        clf.fit(X_knn.T, X[i, :])
        W[i, N] = clf.coef_

    N_new = neigh.kneighbors(X_new, 200, return_distance=False)
    for i in range(len(idx_test)):
        X_knn_new = X[N_new[i], :]
        clf.fit(X_knn_new.T, X_new[i, :])
        W_new[i, N_new[i]] = clf.coef_

    return W, W_new

In [3]:
def LNSM(matrix, feature_matrix, alpha, idx_train, idx_test):
    Y_0 = (matrix[:, idx_train].copy()).T

    W, W_new = RLN(feature_matrix=feature_matrix, idx_train=idx_train, idx_test=idx_test)

    max_iter = 1000
    Y_t1 = Y_0.copy()
    cost_t1 = alpha * np.trace(np.dot(np.dot(Y_t1.T, 1 - W), Y_t1)) + (1 - alpha)*LA.norm(Y_t1 - Y_0)**2

    for i in range(max_iter):
        Y_t2 = alpha * np.dot(W, Y_t1) + (1 - alpha) * Y_0

        cost_t2 = alpha * np.trace(np.dot(np.dot(Y_t2.T, 1 - W), Y_t2)) + (1 - alpha)*LA.norm(Y_t2 - Y_0)**2
        # print("---")
        # print(alpha)
        # print(cost_t2)
        Y_t1 = Y_t2.copy()
        cost_t1 = cost_t2

        if (cost_t2 - cost_t1) < (cost_t1 / 10000):
            print("LNSM converged")
            break
        if i == (max_iter - 1):
            print("maximum iteration reached")

    Y = Y_t2.copy()

    
    # Y = (1 - alpha) * np.dot(np.linalg.pinv(1 - alpha * W), Y_0)
    Y_new = np.dot(W_new, Y)
    matrix_new = matrix.copy().astype(float)
    matrix_new[:, idx_test] = Y_new.T
    # print(sum(sum(matrix_new[:, idx_test])))
    return matrix_new

In [4]:
def option(str):
    global methodOption
    methodOption = str

In [5]:
def option2(str):
    global validationOption
    validationOption = str

In [6]:
def network_preprocess(dir, drug_nodes_order):
    col_names = ["left_side","right_side","similairity"]
    df_drugs_sim = pd.read_csv(dir, sep =" ", names =col_names, header=None)

    source =df_drugs_sim["left_side"]
    destination = df_drugs_sim["right_side"]
    similarity = df_drugs_sim["similairity"]

    ###Drugs similarity Network###
    edge_list = zip(source,destination,similarity) # integrate 3 variables into tuple
    #print edge_list
    print("Side effect graph information loading...")
    G = nx.Graph() # creat a graph
    G.add_weighted_edges_from(edge_list) # add weighted edge from edge_list

    weight_matrix = nx.attr_matrix(G, edge_attr='weight', rc_order=drug_nodes_order)
    weight_matrix = np.array(weight_matrix)

    return weight_matrix

In [7]:
def LNSM_SMI(matrix, feature_matrix1, feature_matrix2, alpha, idx_train, idx_test):
    Y_0 = (matrix[:, idx_train].copy()).T
    # W1 = WMK1[idx_train, :][:, idx_train].copy()
    # W2 = WMK2[idx_train, :][:, idx_train].copy()
    # W1_new = WMK1[idx_test, :][:, idx_train].copy()
    # W2_new = WMK2[idx_test, :][:, idx_train].copy()
    
    W1, W1_new = RLN(feature_matrix=feature_matrix1, idx_train=idx_train, idx_test=idx_test)    
    W2, W2_new = RLN(feature_matrix=feature_matrix2, idx_train=idx_train, idx_test=idx_test)
    


    # Y = (1 - alpha) * np.linalg.inv(1 - alpha * W)
    c1 = np.trace(np.dot(np.dot(Y_0.T, 1 - W1), Y_0))
    c2 = np.trace(np.dot(np.dot(Y_0.T, 1 - W2), Y_0))
    cmax = max(c1, c2)
    theta1 = (cmax - c1) / ((cmax - c1) + (cmax - c2))
    theta2 = (cmax - c2) / ((cmax - c1) + (cmax - c2))

    Y_new = np.dot((np.dot(theta1, W1_new) + np.dot(theta2, W2_new)), Y_0)
    matrix_new = matrix.copy().astype(float)
    matrix_new[:, idx_test] = Y_new.T
    # print(sum(sum(matrix_new[:, idx_test])))
    return matrix_new

In [8]:
def LNSM_CMI(matrix, feature_matrix1, feature_matrix2, alpha, idx_train, idx_test):
    Y_0 = (matrix[:, idx_train].copy()).T
    # W1 = WMK1[idx_train, :][:, idx_train].copy()
    # W2 = WMK2[idx_train, :][:, idx_train].copy()
    # W1_new = WMK1[idx_test, :][:, idx_train].copy()
    # W2_new = WMK2[idx_test, :][:, idx_train].copy()
    W1, W1_new = RLN(feature_matrix=feature_matrix1, idx_train=idx_train, idx_test=idx_test)    
    W2, W2_new = RLN(feature_matrix=feature_matrix2, idx_train=idx_train, idx_test=idx_test)


    max_iter = 1000
    Y_t1 = Y_0.copy()
    cost_t1 = alpha * np.trace(np.dot(np.dot(Y_t1.T, 1 - W1), Y_t1)) + (1 - alpha)*LA.norm(Y_t1 - Y_0)**2

    for i in range(max_iter):
        Y_t2 = alpha * np.dot(W1, Y_t1) + (1 - alpha) * Y_0

        cost_t2 = alpha * np.trace(np.dot(np.dot(Y_t2.T, 1 - W1), Y_t2)) + (1 - alpha)*LA.norm(Y_t2 - Y_0)**2
        # print("---")
        # print(alpha)
        Y_t1 = Y_t2.copy()
        cost_t1 = cost_t2

        if (cost_t2 - cost_t1) < (cost_t1 / 10000):
            print("LNSM converged")
            break
        if i == (max_iter - 1):
            print("maximum iteration reached")

    Y1 = Y_t2.copy()

    Y_t1 = Y_0.copy()
    cost_t1 = alpha * np.trace(np.dot(np.dot(Y_t1.T, 1 - W2), Y_t1)) + (1 - alpha)*LA.norm(Y_t1 - Y_0)**2

    for i in range(max_iter):
        Y_t2 = alpha * np.dot(W2, Y_t1) + (1 - alpha) * Y_0

        cost_t2 = alpha * np.trace(np.dot(np.dot(Y_t2.T, 1 - W2), Y_t2)) + (1 - alpha)*LA.norm(Y_t2 - Y_0)**2
        # print("---")
        # print(alpha)
        # print(cost_t2)
        Y_t1 = Y_t2.copy()
        cost_t1 = cost_t2

        if (cost_t2 - cost_t1) < (cost_t1 / 10000):
            print("LNSM converged")
            break
        if i == (max_iter - 1):
            print("maximum iteration reached")

    Y2 = Y_t2.copy()



    # Y1 = (1 - alpha) * np.dot(np.linalg.pinv(1 - alpha * W1), Y_0)
    # Y2 = (1 - alpha) * np.dot(np.linalg.pinv(1 - alpha * W2), Y_0)
    Y1_new = np.dot(W1_new, Y1)
    Y2_new = np.dot(W2_new, Y2)

    cost1 = alpha * np.trace(np.dot(np.dot(Y1.T, 1 - W1), Y1)) + (1 - alpha)*LA.norm(Y1 - Y_0)**2
    cost2 = alpha * np.trace(np.dot(np.dot(Y2.T, 1 - W2), Y2)) + (1 - alpha)*LA.norm(Y2 - Y_0)**2
    costmax = max(cost1, cost2)
    theta1 = (costmax - cost1) / ((costmax - cost1) + (costmax - cost2))
    theta2 = (costmax - cost2) / ((costmax - cost1) + (costmax - cost2))
    # print(cost1)
    # print(cost2)
    # print(costmax - cost1)
    # print(costmax - cost2)

    Y_new = theta1 * Y1_new + theta2 * Y2_new
    matrix_new = matrix.copy().astype(float)
    matrix_new[:, idx_test] = Y_new.T
    # print(sum(sum(matrix_new[:, idx_test])))
    return matrix_new

In [9]:
def FeaturePreprocess(df_all, drug_nodes):
    
    drug_nodes_df = np.intersect1d(df_all.index, drug_nodes)
    df = df_all.loc[drug_nodes_df]
    _, q = df.shape
    drug_nodes_diff = np.setdiff1d(drug_nodes, (df.index).tolist())
    n = len(drug_nodes_diff)
    df_diff = pd.DataFrame(np.zeros(n*q).reshape(n,q))
    df_diff.index = drug_nodes_diff
    df_diff.columns = df.columns
    df_all = pd.concat([df, df_diff], axis = 0)
    featureMat = df_all.loc[drug_nodes]
    return np.array(featureMat)

In [10]:
def network_combination(W1, W2, T, train_idx):

    W1_vector = W1[train_idx, :][:, train_idx].ravel()
    W2_vector = W2[train_idx, :][:, train_idx].ravel()
    # W1_vector_sq = W1_vector**2
    # W2_vector_sq = W2_vector**2

    # W3_vector = W3[train_idx, :][:, train_idx].ravel()
    T_vector = T[train_idx, :][:, train_idx].ravel()
    lrModel = LinearRegression()
    # x = np.array([W1_vector, W2_vector, W1_vector_sq, W2_vector_sq]).T
    x = np.array([W1_vector, W2_vector]).T
    y = T_vector
    lrModel.fit(x,y)
    print("network coefs")
    print(lrModel.coef_[0])
    print(lrModel.coef_[1])
    # print(lrModel.coef_[2])
    # print(lrModel.coef_[3])

    print("intercept")
    print(lrModel.intercept_)
    # W = lrModel.intercept_ + lrModel.coef_[0]*W1 + lrModel.coef_[1]*W2 + lrModel.coef_[2]*W1**2 + lrModel.coef_[3]*W2**2
    W = lrModel.intercept_ + lrModel.coef_[0]*W1 + lrModel.coef_[1]*W2
    return W

In [11]:
def pr_roc_curve(Ground_Truth, score):

    recall_fix = 0.2
    sort_idx = np.argsort(-score)
    Pre = score[sort_idx].copy()
    Gro = Ground_Truth[sort_idx].copy()
    ind = np.hstack(np.where(Gro > 0))
    thresholdList = np.unique(Pre[ind].copy())
    # print(thresholdList)
    if thresholdList.size == 0:
        Precision = np.array([0])
        Recall = np.array([0])
        TPR = np.array([0])
        FPR = np.array([0])
        F1 = np.array([0])
        TP = np.array([0])
        FP = np.array([sum(Pre != 0)])
        TN = np.array([sum(Pre == 0)])
        FN = np.array([0])
        return Precision, Recall, TPR, FPR, F1, TP, FP, TN, FN
    # thresholdList = np.hstack([min(Pre), thresholdList.copy(), max(Pre)])
    thresholdList = np.hstack(thresholdList.copy())
    thresholdList = np.unique(thresholdList)
    thresholdList = -np.sort(-thresholdList)
    AC_P = len(ind)
    AC_N = len(Gro) - AC_P
    N_thresholdList = len(thresholdList)

    
    TP = np.zeros(N_thresholdList)
    TN = np.zeros(N_thresholdList)
    FP = np.zeros(N_thresholdList)
    FN = np.zeros(N_thresholdList)


    N_pre = len(Pre)


    LOC = np.zeros(N_thresholdList)
    for i in range(len(thresholdList)):
        if np.isin(thresholdList[i], Pre):
            locs = np.hstack(np.where(thresholdList[i] == Pre))
            LOC[i] = locs[-1]
    LOC = LOC.astype(int)


    for i in range(len(LOC)):
        if i == 0:
            loc1 = 0
            loc2 = LOC[i]
            I = np.arange(loc1, loc2 + 1)
            J = np.arange(loc2 + 1, N_pre)
            TP[i] = len(np.hstack(np.where(Gro[I] == 1)))
            FP[i] = len(np.hstack(np.where(Gro[I] == 0)))

            TN[i] = len(np.hstack(np.where(Gro[J] == 0)))
            FN[i] = len(np.hstack(np.where(Gro[J] == 1)))
        else:
            loc1 = LOC[i - 1]
            loc2 = LOC[i]

            I = np.arange(loc1 + 1, loc2 + 1)

            TP[i] = TP[i - 1] + len(np.hstack(np.where(Gro[I] == 1)))
            FP[i] = FP[i - 1] + len(np.hstack(np.where(Gro[I] == 0)))

            TN[i] = TN[i - 1] - len(np.hstack(np.where(Gro[I] == 0)))
            FN[i] = FN[i - 1] - len(np.hstack(np.where(Gro[I] == 1)))

    TP = np.hstack([0, TP, AC_P])
    FP = np.hstack([0, FP, AC_N])
    TN = np.hstack([AC_N, TN, 0])
    FN = np.hstack([AC_P, FN, 0])

    # FPR = FP / AC_N
    # TPR = TP / AC_P
    FPR = np.zeros(N_thresholdList + 2)
    for i in range(N_thresholdList + 2):
        if TN[i] + FP[i] == 0 :
            FPR[i] = 0
        else:
            FPR[i] = FP[i] / (TN[i] + FP[i])

    # FPR = FP / (TN + FP)
    TPR = TP / (TP + FN)

    # Recall = TP / (TP + FN)
    Recall = TPR
    Precision = np.zeros(N_thresholdList + 2)
    # Precision = np.zeros(N_thresholdList)
    for i in range(N_thresholdList + 2):
        if TP[i] + FP[i] == 0 :
            Precision[i] = 0
        else:
            Precision[i] = TP[i] / (TP[i] + FP[i])
    # if (TP + FP== 0):
    #     Precision = np.zeros(N_thresholdList)
    # else:
    #     Precision = TP / (TP + FP)
    # jac=TP / (TP + FN + FP)
    # JAC = max(jac)
    # t = 2 / (1 / Recall + 1 / Precision)
    # score_F1 = max(t)

    # print('---TPR FPR')

    # print(TPR)
    # print(FPR)

    # print('---FP TN')

    # print(FP)
    # print(TN)

    # print('thlist')
    # print(thresholdList)
    # print(Pre)

    F1 = (2 * (Precision * Recall) / (Precision + Recall + 0.0000000000001)).max()



    return Precision, Recall, TPR, FPR, F1, TP, FP, TN, FN


In [12]:
def FmaxSmin(Ground_Truth_mat, score_mat, target_idx, TP, FP, TN, FN):
    m,n = Ground_Truth_mat.shape


    Ground_Truth = (Ground_Truth_mat[:, target_idx].copy()).ravel()
    score = (score_mat[:, target_idx].copy()).ravel()
    existing_drug_idx = np.setdiff1d(np.arange(n), target_idx)
    likelihood_obs = Ground_Truth_mat[:, existing_drug_idx].copy()
    ic = - np.log2((likelihood_obs.sum(axis=1) + 1)/(likelihood_obs.shape[1] + 2))

    sort_idx = np.argsort(-score)
    Pre = score[sort_idx].copy()
    Gro = Ground_Truth[sort_idx].copy()
    ind = np.hstack(np.where(Gro > 0))
    thresholdList = np.unique(Pre[ind].copy())
    # thresholdList = np.hstack([min(Pre), thresholdList.copy(), max(Pre)])
    thresholdList = np.hstack(thresholdList.copy())
    thresholdList = np.unique(thresholdList)
    thresholdList = -np.sort(-thresholdList)
    AC_P = len(ind)
    AC_N = len(Gro) - AC_P
    N_thresholdList = len(thresholdList)

    
    # TP = np.zeros(N_thresholdList)
    # TN = np.zeros(N_thresholdList)
    # FP = np.zeros(N_thresholdList)
    # FN = np.zeros(N_thresholdList)

    # m_t = np.zeros(N_thresholdList)
    n_e = np.zeros(N_thresholdList)


    icI_FN = np.zeros(N_thresholdList)
    icI_FP = np.zeros(N_thresholdList)

    icI_TP = np.zeros(N_thresholdList)

    Obs = Ground_Truth_mat[:, target_idx]
    for i in range(N_thresholdList):
        Pred = (score_mat[:, target_idx] > thresholdList[i])
        n_e[i] = sum((Pred.sum(axis = 1)) > 0)
        Result = Pred - Obs
        Result2 = Pred + Obs
        FP_col = (Result > 0)
        FN_col = (Result < 0)
        TP_col = (Result2 == 2)

        icI_FN[i] = sum(ic*(FN_col.sum(axis = 1)))
        icI_FP[i] = sum(ic*(FP_col.sum(axis = 1)))
        icI_TP[i] = sum(ic*(TP_col.sum(axis = 1)))

    n_e = np.hstack([0, n_e, m])

    AllN_icI_FN = sum(ic*(Obs.sum(axis = 1)))
    AllP_icI_FP = sum(ic*((1 - Obs).sum(axis = 1)))
    AllP_icI_TP = AllN_icI_FN


    icI_FN = np.hstack([AllN_icI_FN, icI_FN, 0])
    icI_FP = np.hstack([0, icI_FP, AllP_icI_FP])
    icI_TP = np.hstack([0, icI_TP, AllP_icI_TP])

    # FPR = FP / AC_N
    # TPR = TP / AC_P
    # FPR = FP / (TN + FP)
    # TPR = TP / (TP + FN)
    

    # Recall = TP / (TP + FN)
    # Recall = TPR
    Precision = np.zeros(N_thresholdList + 2)
    pr = np.zeros(N_thresholdList + 2)
    rc = np.zeros(N_thresholdList + 2)
    ru = np.zeros(N_thresholdList + 2)
    mi = np.zeros(N_thresholdList + 2)
    # Precision = np.zeros(N_thresholdList)
    for i in range(N_thresholdList + 2):
        if icI_TP[i] + icI_FP[i] == 0 :
            Precision[i] = 0
        else:
            Precision[i] = icI_TP[i] / (icI_TP[i] + icI_FP[i])

    pr = Precision
    rc = icI_TP / (icI_TP + icI_FN)

    for i in range(N_thresholdList + 2):
        if n_e[i] == 0 :
            # pr[i] = 0
            # rc[i] = 0
            ru[i] = 0
            mi[i] = 0
        else:
            # pr[i] = Precision[i] / n_e[i]
            # rc[i] = Recall[i] / n_e[i]

            ru[i] = icI_FN[i] / n_e[i]
            mi[i] = icI_FP[i] / n_e[i]
    

    # for i in range(N_thresholdList + 2):
    #     if n_e[i] == 0 :
    #         pr[i] = 0
    #         rc[i] = 0
    #         ru[i] = 0
    #         mi[i] = 0
    #     else:
    #         pr[i] = Precision[i]
    #         rc[i] = Recall[i]
    #         ru[i] = icI_FN[i]
    #         mi[i] = icI_FP[i]

    F = 2 * pr * rc / (pr + rc + 0.0000000000001)
    Fmax = max(F[n_e > 0])
    S = np.sqrt(ru**2 + mi**2)
    Smin = min(S[n_e > 0])

    return Fmax, Smin

In [13]:
def fold(IDX1,IDX2,feature_matrix1,feature_matrix2,alpha,matrix):
    # IDX1 target index, need to be evaluated
    # IDX2 test index, masked

    print('First few target index:', IDX1[0:10])
    print('First few mask index:', IDX2[0:10])

    target_idx = IDX1
    mask_idx = IDX2
    Ground_Truth = matrix.copy()
    side_effects_drug_relation_copy = matrix.copy()

    # target_idx = IDX2
    ### making all the links to predict as 0 ###############    
    for i in range(len(mask_idx)):
        side_effects_drug_relation_copy[:, mask_idx[i]] = 0
    
    m,n = side_effects_drug_relation_copy.shape

    drug_idx = list(range(n))
    existing_drug_idx = np.setdiff1d(drug_idx, mask_idx)
    
    # calculate the mean for each drug
    mean_side_effect_score = (Ground_Truth.copy()[:, existing_drug_idx]).mean(axis=1)
    score_mean = side_effects_drug_relation_copy.copy().astype(float)

    # Set the prediction into mean
    for i in range(m):
        score_mean[i, mask_idx] =  mean_side_effect_score[i]
    print('LNSM starts:')
    if methodOption == "LNSM_CMI":
        side_effects_drug_relation_fact = LNSM_CMI(matrix=matrix, feature_matrix1=feature_matrix1, feature_matrix2=feature_matrix2, alpha=alpha, idx_train=existing_drug_idx, idx_test=target_idx)
    elif methodOption == "LNSM_SMI":
        side_effects_drug_relation_fact = LNSM_SMI(matrix=matrix, feature_matrix1=feature_matrix1, feature_matrix2=feature_matrix2, alpha=alpha, idx_train=existing_drug_idx, idx_test=target_idx)
    elif methodOption == "LNSM_WMK1":
        side_effects_drug_relation_fact = LNSM(matrix=matrix, feature_matrix=feature_matrix1, alpha=alpha, idx_train=existing_drug_idx, 
        idx_test=target_idx)
    elif methodOption == "LNSM_WMK2":
        side_effects_drug_relation_fact = LNSM(matrix=matrix, feature_matrix=feature_matrix2, alpha=alpha, idx_train=existing_drug_idx, 
        idx_test=target_idx)
    # side_effects_drug_relation_fact = perform_matrix_reconstruction(side_effects_drug_relation_copy, Gamma, weight_matrix, lmd, update_normalization)

    # Set the out put of GNMF as prediction score
    score = side_effects_drug_relation_fact.copy()

    # Random score
    random_score = np.random.rand(m,n)
    
    pr_auc_all_mean = 0
    roc_auc_all_mean = 0
    F1_mean = 0
    pr_auc_per_drug_mean = 0
    roc_auc_per_drug_mean = 0
    fmax_mean = 0
    smin_mean = 0
    pr_auc_per_drug = 0
    roc_auc_per_drug = 0
    fmax = 0
    smin = 0
    F1 = 0
    pr_auc_all_rd = 0
    roc_auc_all_rd = 0
    pr_auc_per_drug_rd = 0
    roc_auc_per_drug_rd = 0
    fmax_rd = 0
    smin_rd = 0
    F1_rd = 0
    roc_auc_topn_rd = 0
    pr_auc_topn_rd = 0
    pr_auc_all = 0
    roc_auc_all = 0
    roc_auc_topn_mean = 0
    roc_auc_topn = 0
    pr_auc_topn_mean = 0
    pr_auc_topn = 0


    print("proportion of ground truth:", sum(Ground_Truth[:, target_idx].ravel())/(Ground_Truth[:, target_idx].shape[0]*Ground_Truth[:, target_idx].shape[1]))

    print('---evaluation---')

    prec, recall, threshold = precision_recall_curve(Ground_Truth[:, target_idx].ravel(), score[:, target_idx].ravel())
    pr_auc_all = auc(recall, prec)
    roc_auc_all = roc_auc_score(Ground_Truth[:, target_idx].ravel(), score[:, target_idx].ravel())

    print("-----")

    print("AUC-PR mean:", pr_auc_all_mean)
    # print("AUC-PR rd:", pr_auc_all_rd)
    print("AUC-PR all:", pr_auc_all)

    print("-----")

    print("AUC-ROC mean:", roc_auc_all_mean)
    # print("AUC-ROC rd:", roc_auc_all_rd)
    print("AUC-ROC all:", roc_auc_all)

    print("-----")

    print("AUC-PR per drug mean:", pr_auc_per_drug_mean)
    # print("AUC-PR per drug rd:", pr_auc_per_drug_rd)
    print("AUC-PR per drug:", pr_auc_per_drug)

    print("-----")

    print("AUC-ROC per drug mean:", roc_auc_per_drug_mean)
    # print("AUC-ROC per drug rd:", roc_auc_per_drug_rd)
    print("AUC-ROC per drug:", roc_auc_per_drug)

    print("-----")

    print("AUC-ROC top N mean:", roc_auc_topn_mean)
    # print("AUC-ROC top N rd:", roc_auc_topn_rd)
    print("AUC-ROC top N:", roc_auc_topn)

    print("-----")

    print("AUC-PR top N mean:", pr_auc_topn_mean)
    # print("AUC-PR top N rd:", pr_auc_topn_rd)
    print("AUC-PR top N:", pr_auc_topn)
    
    print("-----")

    print("F1 mean:", F1_mean)
    # print("F1 rd:", F1_rd)
    print("F1:", F1)

    print("-----")

    print("Fmax_mean", fmax_mean)
    # print("Fmax_rd", fmax_rd)
    print("Fmax", fmax)

    print("-----")

    print("Smin_mean", smin_mean)
    # print("Smin_rd", smin_rd)
    print("Smin", smin)



    return pr_auc_all_mean, roc_auc_all_mean, F1_mean, pr_auc_per_drug_mean, roc_auc_per_drug_mean, fmax_mean, smin_mean, \
        pr_auc_all, roc_auc_all, pr_auc_per_drug, roc_auc_per_drug, fmax, smin, F1, \
            pr_auc_all_rd, roc_auc_all_rd, pr_auc_per_drug_rd, roc_auc_per_drug_rd, fmax_rd, smin_rd, F1_rd, \
                roc_auc_topn_mean, roc_auc_topn_rd, roc_auc_topn, pr_auc_topn_mean, pr_auc_topn_rd, pr_auc_topn


In [14]:
def innerfold(IDX1,IDX2,feature_matrix1,feature_matrix2,alpha,matrix):
    # IDX1 target index, need to be evaluated
    # IDX2 test index, masked

    print('First few target index:', IDX1[0:10])
    print('First few mask index:', IDX2[0:10])

    target_idx = IDX1
    mask_idx = IDX2
    Ground_Truth = matrix.copy()
    side_effects_drug_relation_copy = matrix.copy()

    # target_idx = IDX2
    ### making all the links to predict as 0 ###############    
    for i in range(len(mask_idx)):
        side_effects_drug_relation_copy[:, mask_idx[i]] = 0
    
    m,n = side_effects_drug_relation_copy.shape

    drug_idx = list(range(n))
    existing_drug_idx = np.setdiff1d(drug_idx, mask_idx)
    
    # calculate the mean for each drug
    mean_side_effect_score = (Ground_Truth.copy()[:, existing_drug_idx]).mean(axis=1)
    score_mean = side_effects_drug_relation_copy.copy().astype(float)

    # Set the prediction into mean
    for i in range(m):
        score_mean[i, mask_idx] =  mean_side_effect_score[i]

    print(methodOption + ' starts:')
    if methodOption == "LNSM_CMI":
        side_effects_drug_relation_fact = LNSM_CMI(matrix=matrix, feature_matrix1=feature_matrix1, feature_matrix2=feature_matrix2, alpha=alpha, idx_train=existing_drug_idx, idx_test=target_idx)
    elif methodOption == "LNSM_SMI":
        side_effects_drug_relation_fact = LNSM_SMI(matrix=matrix, feature_matrix1=feature_matrix1, feature_matrix2=feature_matrix2, alpha=alpha, idx_train=existing_drug_idx, idx_test=target_idx)
    elif methodOption == "LNSM_WMK1":
        side_effects_drug_relation_fact = LNSM(matrix=matrix, feature_matrix=feature_matrix1, alpha=alpha, idx_train=existing_drug_idx, 
        idx_test=target_idx)
    elif methodOption == "LNSM_WMK2":
        side_effects_drug_relation_fact = LNSM(matrix=matrix, feature_matrix=feature_matrix2, alpha=alpha, idx_train=existing_drug_idx, 
        idx_test=target_idx)

    # Set the out put of GNMF as prediction score
    score = side_effects_drug_relation_fact.copy()

    # Random score
    random_score = np.random.rand(m,n)
    

    print("proportion of ground truth:", sum(Ground_Truth[:, target_idx].ravel())/(Ground_Truth[:, target_idx].shape[0]*Ground_Truth[:, target_idx].shape[1]))

    print('---evaluation---')



    pr_auc_all_mean = 0
    roc_auc_all_mean = 0
    F1_mean = 0
    pr_auc_per_drug_mean = 0
    roc_auc_per_drug_mean = 0
    fmax_mean = 0
    smin_mean = 0
    pr_auc_per_drug = 0
    roc_auc_per_drug = 0
    fmax = 0
    smin = 0
    F1 = 0
    pr_auc_all_rd = 0
    roc_auc_all_rd = 0
    pr_auc_per_drug_rd = 0
    roc_auc_per_drug_rd = 0
    fmax_rd = 0
    smin_rd = 0
    F1_rd = 0
    roc_auc_topn_rd = 0
    pr_auc_topn_rd = 0
    pr_auc_all = 0
    roc_auc_all = 0
    roc_auc_topn_mean = 0
    roc_auc_topn = 0
    pr_auc_topn_mean = 0
    pr_auc_topn = 0

    # time1 = time.time()
    

    # # ###### Evaluation all entries ######
    # ### GNMF
    # prec, recall, tpr, fpr, F1, TP, FP, TN, FN = pr_roc_curve(Ground_Truth[:, target_idx].ravel(), score[:, target_idx].ravel())
    # # Precision, Recall, TPR, FPR, F1, TP, FP, TN, FN
    # pr_auc_all = auc(recall, prec) 
    # roc_auc_all = auc(fpr, tpr)
    # # fmax, smin = FmaxSmin(Ground_Truth_mat=Ground_Truth, score_mat=score, target_idx=target_idx, TP = TP, FP = FP, TN = TN, FN = FN)

    # # ### Naive Model
    # prec, recall, tpr, fpr, F1_mean, TP, FP, TN, FN = pr_roc_curve(Ground_Truth[:, target_idx].ravel(), score_mean[:, target_idx].ravel())
    # pr_auc_all_mean = auc(recall, prec)
    # roc_auc_all_mean = auc(fpr, tpr)
    # # fmax_mean, smin_mean = FmaxSmin(Ground_Truth_mat=Ground_Truth, score_mat=score_mean, target_idx=target_idx, TP = TP, FP = FP, TN = TN, FN = FN)

    # print("our")
    # print("-----")

    # print("AUC-PR mean:", pr_auc_all_mean)
    # # print("AUC-PR rd:", pr_auc_all_rd)
    # print("AUC-PR all:", pr_auc_all)

    # print("-----")

    # print("AUC-ROC mean:", roc_auc_all_mean)
    # # print("AUC-ROC rd:", roc_auc_all_rd)
    # print("AUC-ROC all:", roc_auc_all)

    # print("-----")


    # time2 = time.time()
    # print("time for our AUC: ", time2 - time1)

    prec, recall, threshold = precision_recall_curve(Ground_Truth[:, target_idx].ravel(), score[:, target_idx].ravel())
    pr_auc_all = auc(recall, prec)
    # roc_auc_all = roc_auc_score(Ground_Truth[:, target_idx].ravel(), score[:, target_idx].ravel())

    # prec, recall, threshold = precision_recall_curve(Ground_Truth[:, target_idx].ravel(), score_mean[:, target_idx].ravel())
    # pr_auc_all_mean = auc(recall, prec)
    # roc_auc_all_mean = roc_auc_score(Ground_Truth[:, target_idx].ravel(), score_mean[:, target_idx].ravel())

    # time3 = time.time()

    # print("sklearn")
    print("-----")

    # print("AUC-PR mean:", pr_auc_all_mean)
    # print("AUC-PR rd:", pr_auc_all_rd)
    print("AUC-PR all:", pr_auc_all)

    print("-----")

    # print("AUC-ROC mean:", roc_auc_all_mean)
    # # print("AUC-ROC rd:", roc_auc_all_rd)
    # print("AUC-ROC all:", roc_auc_all)

    # print("-----")
    # # print("time for sklearn AUC: ", time3 - time2)


    # ###### Evaluation per drug ######
    # ### GNMF
    # ALL_pr_auc = np.zeros(len(target_idx))
    # ALL_roc_auc = np.zeros(len(target_idx))
    # for i in range(len(target_idx)):
    #     prec, recall, tpr, fpr, _, _, _, _, _ = pr_roc_curve(Ground_Truth[:, target_idx[i]], score[:, target_idx[i]])
    #     ALL_pr_auc[i] = auc(recall, prec)
    #     ALL_roc_auc[i] = metrics.auc(fpr, tpr)  
    # pr_auc_per_drug = ALL_pr_auc.mean()
    # roc_auc_per_drug = ALL_roc_auc.mean()

    # ### Naive model
    # ALL_pr_auc = np.zeros(len(target_idx))
    # ALL_roc_auc = np.zeros(len(target_idx))
    # for i in range(len(target_idx)):
    #     prec, recall, tpr, fpr, _, _, _, _, _ = pr_roc_curve(Ground_Truth[:, target_idx[i]], score_mean[:, target_idx[i]])
    #     ALL_pr_auc[i] = auc(recall, prec)
    #     ALL_roc_auc[i] = metrics.auc(fpr, tpr)      
    # pr_auc_per_drug_mean = ALL_pr_auc.mean()
    # roc_auc_per_drug_mean = ALL_roc_auc.mean()

    

    # print("-----")

    # print("AUC-PR per drug mean:", pr_auc_per_drug_mean)
    # print("AUC-PR per drug:", pr_auc_per_drug)

    # print("-----")

    # print("AUC-ROC per drug mean:", roc_auc_per_drug_mean)
    # print("AUC-ROC per drug:", roc_auc_per_drug)

    # print("-----")





    return pr_auc_all_mean, roc_auc_all_mean, F1_mean, pr_auc_per_drug_mean, roc_auc_per_drug_mean, fmax_mean, smin_mean, \
        pr_auc_all, roc_auc_all, pr_auc_per_drug, roc_auc_per_drug, fmax, smin, F1, \
            pr_auc_all_rd, roc_auc_all_rd, pr_auc_per_drug_rd, roc_auc_per_drug_rd, fmax_rd, smin_rd, F1_rd, \
                roc_auc_topn_mean, roc_auc_topn_rd, roc_auc_topn, pr_auc_topn_mean, pr_auc_topn_rd, pr_auc_topn


In [15]:
def plotfold(IDX1,IDX2,feature_matrix1,feature_matrix2,alpha,matrix):
    # IDX1 target index, need to be evaluated
    # IDX2 test index, masked

    print('First few target index:', IDX1[0:10])
    print('First few mask index:', IDX2[0:10])

    target_idx = IDX1
    mask_idx = IDX2
    Ground_Truth = matrix.copy()
    side_effects_drug_relation_copy = matrix.copy()

    # target_idx = IDX2
    ### making all the links to predict as 0 ###############    
    for i in range(len(mask_idx)):
        side_effects_drug_relation_copy[:, mask_idx[i]] = 0
    
    m,n = side_effects_drug_relation_copy.shape

    drug_idx = list(range(n))
    existing_drug_idx = np.setdiff1d(drug_idx, mask_idx)
    
    # calculate the mean for each drug
    mean_side_effect_score = (Ground_Truth.copy()[:, existing_drug_idx]).mean(axis=1)
    score_mean = side_effects_drug_relation_copy.copy().astype(float)

    # Set the prediction into mean
    for i in range(m):
        score_mean[i, mask_idx] =  mean_side_effect_score[i]
    print('LNSM starts:')
    if methodOption == "LNSM_CMI":
        side_effects_drug_relation_fact = LNSM_CMI(matrix=matrix, feature_matrix1=feature_matrix1, feature_matrix2=feature_matrix2, alpha=alpha, idx_train=existing_drug_idx, idx_test=target_idx)
    elif methodOption == "LNSM_SMI":
        side_effects_drug_relation_fact = LNSM_SMI(matrix=matrix, feature_matrix1=feature_matrix1, feature_matrix2=feature_matrix2, alpha=alpha, idx_train=existing_drug_idx, idx_test=target_idx)
    elif methodOption == "LNSM_WMK1":
        side_effects_drug_relation_fact = LNSM(matrix=matrix, feature_matrix=feature_matrix1, alpha=alpha, idx_train=existing_drug_idx, 
        idx_test=target_idx)
    elif methodOption == "LNSM_WMK2":
        side_effects_drug_relation_fact = LNSM(matrix=matrix, feature_matrix=feature_matrix2, alpha=alpha, idx_train=existing_drug_idx, 
        idx_test=target_idx)
    # side_effects_drug_relation_fact = perform_matrix_reconstruction(side_effects_drug_relation_copy, Gamma, weight_matrix, lmd, update_normalization)

    # Set the out put of GNMF as prediction score
    score = side_effects_drug_relation_fact.copy()

    # Random score
    random_score = np.random.rand(m,n)
    
    pr_auc_all_mean = 0
    roc_auc_all_mean = 0
    F1_mean = 0
    pr_auc_per_drug_mean = 0
    roc_auc_per_drug_mean = 0
    fmax_mean = 0
    smin_mean = 0
    pr_auc_per_drug = 0
    roc_auc_per_drug = 0
    fmax = 0
    smin = 0
    F1 = 0
    pr_auc_all_rd = 0
    roc_auc_all_rd = 0
    pr_auc_per_drug_rd = 0
    roc_auc_per_drug_rd = 0
    fmax_rd = 0
    smin_rd = 0
    F1_rd = 0
    roc_auc_topn_rd = 0
    pr_auc_topn_rd = 0
    pr_auc_all = 0
    roc_auc_all = 0
    roc_auc_topn_mean = 0
    roc_auc_topn = 0
    pr_auc_topn_mean = 0
    pr_auc_topn = 0


    print("proportion of ground truth:", sum(Ground_Truth[:, target_idx].ravel())/(Ground_Truth[:, target_idx].shape[0]*Ground_Truth[:, target_idx].shape[1]))

    print('---evaluation---')

    prec, recall, prthreshold = precision_recall_curve(Ground_Truth[:, target_idx].ravel(), score[:, target_idx].ravel())
    pr_auc_all = auc(recall, prec)
    
    fpr, tpr, rocthreshold = metrics.roc_curve(Ground_Truth[:, target_idx].ravel(), score[:, target_idx].ravel())
    roc_auc_all = auc(fpr, tpr)

    print("-----")

    print("AUC-PR mean:", pr_auc_all_mean)
    # print("AUC-PR rd:", pr_auc_all_rd)
    print("AUC-PR all:", pr_auc_all)

    print("-----")

    print("AUC-ROC mean:", roc_auc_all_mean)
    # print("AUC-ROC rd:", roc_auc_all_rd)
    print("AUC-ROC all:", roc_auc_all)

    print("-----")

    print("AUC-PR per drug mean:", pr_auc_per_drug_mean)
    # print("AUC-PR per drug rd:", pr_auc_per_drug_rd)
    print("AUC-PR per drug:", pr_auc_per_drug)

    print("-----")

    print("AUC-ROC per drug mean:", roc_auc_per_drug_mean)
    # print("AUC-ROC per drug rd:", roc_auc_per_drug_rd)
    print("AUC-ROC per drug:", roc_auc_per_drug)

    print("-----")

    print("AUC-ROC top N mean:", roc_auc_topn_mean)
    # print("AUC-ROC top N rd:", roc_auc_topn_rd)
    print("AUC-ROC top N:", roc_auc_topn)

    print("-----")

    print("AUC-PR top N mean:", pr_auc_topn_mean)
    # print("AUC-PR top N rd:", pr_auc_topn_rd)
    print("AUC-PR top N:", pr_auc_topn)
    
    print("-----")

    print("F1 mean:", F1_mean)
    # print("F1 rd:", F1_rd)
    print("F1:", F1)

    print("-----")

    print("Fmax_mean", fmax_mean)
    # print("Fmax_rd", fmax_rd)
    print("Fmax", fmax)

    print("-----")

    print("Smin_mean", smin_mean)
    # print("Smin_rd", smin_rd)
    print("Smin", smin)

    Out1 = pd.DataFrame([prec, recall, prthreshold])
    Out2 = pd.DataFrame([fpr, tpr, rocthreshold])
    return Out1, Out2

In [16]:
def generate_T(matrix, C = 0):
    print("---generate T---")

    num_se,num_drug = matrix.shape
    p_plus_vector = matrix.sum(axis=1)/num_drug
    p_minus_vector = 1 - p_plus_vector
    num_positive = matrix.sum(axis=1)
    num_negative = num_drug - num_positive
    
    T = np.zeros(shape=(num_drug, num_drug))
    for i in range(num_se):
        side_effect = matrix[i, :].copy()
        side_effect = np.mat(side_effect)
        T_1 = np.dot(side_effect.T, side_effect)*p_minus_vector[i]**2
        T_2 = np.dot((1 - side_effect).T, (1 - side_effect))*p_plus_vector[i]**2
        T_3 = -np.dot(side_effect.T, (1 - side_effect))*p_plus_vector[i]*p_minus_vector[i]
        T_4 = -np.dot((1 - side_effect).T, side_effect)*p_plus_vector[i]*p_minus_vector[i]
        T = T.copy() + T_1.copy() + T_2.copy() + T_3.copy() + T_4.copy() 
    T = (T + C) * num_drug / sum(num_positive * num_negative)
    scaler = num_drug / sum(num_positive * num_negative)
    # print("scaler:", scaler)

    return T

In [17]:
def generate_T_2(matrix):
    print("---generate T_2---")
    num_se,num_drug = matrix.shape
    num_positive = matrix.sum(axis=1)
    p_plus_vector = np.zeros(len(num_positive))
    for i in range(len(num_positive)):
        if num_positive[i] == 0:
            p_plus_vector[i] = 0
        else:
            p_plus_vector[i] = 1 / (num_positive[i])
    p_minus_vector = 1 / (num_drug - num_positive)
    p = (num_positive * (num_drug - num_positive)) / num_drug**2

    T = np.zeros(shape=(num_drug, num_drug))
    for i in range(num_se):
        side_effect = matrix[i, :].copy()
        side_effect = np.mat(side_effect)
        T_1 = np.dot(side_effect.T, side_effect)*p_plus_vector[i]
        T_2 = np.dot((1 - side_effect).T, (1 - side_effect))*p_minus_vector[i]
        T = T.copy() + (T_1.copy() + T_2.copy()) * p[i]
    T = T / sum(p)

    return T - 0.002


In [18]:
def f_3(x, A, B):
    y = A*x + B
    return y

In [19]:
def f_2(x, B):
    y = x + B
    return y

In [20]:
def f_xx(x, A, B, C):
    x1 = x[0]
    x2 = x[1]
    return A*x1 + B*x2 + C

In [21]:
def f_ReLU(x, A, B):
    y = A*x + B
    y[y < 0] = 0
    return y

In [22]:
def f_ReLU2(x, A, B, C):
    x1 = x[0]
    x2 = x[1]
    y = A*x1 + B*x2 + C
    y[y < 0] = 0
    return y

In [23]:
def fitC(matrix, train_idx):
    y_GeneMania = generate_T(matrix[:, train_idx].copy())
    y_us = generate_T_2(matrix[:, train_idx].copy())
    # x_vector = x.ravel()
    y_GeneMania_vector = np.hstack(np.array(y_GeneMania.ravel()))
    y_us_vector = np.hstack(np.array(y_us.ravel()))

    B = optimize.curve_fit(f_2, y_GeneMania_vector, y_us_vector)[0]
    y_new = f_2(y_GeneMania, B)
    # print("alpha:", A)
    print("C:", B)
    return y_new

In [24]:
def fitW(W, matrix, train_idx):
    x = W[train_idx, :][:, train_idx].copy()
    # y = generate_T_2(matrix[:, train_idx].copy())
    y = matrix.copy()
    # y[y < 0] = 0
    x_vector = x.ravel()
    y_vector = np.hstack(np.array(y.ravel()))
    # E = optimize.curve_fit(relu, x_vector, y_vector)[0]
    # ynew = relu(W, E)
    # B1, B2, B0 = optimize.curve_fit(relu2, x_vector, y_vector)[0]
    # ynew = relu2(W, B1, B2, B0)
    # A, B, C = optimize.curve_fit(quad, x_vector, y_vector)[0]
    # ynew = quad(W, A, B, C)
    A, B = optimize.curve_fit(f_3, x_vector, y_vector)[0]
    ynew = f_3(W, A, B)
    # print("par:", A, B)
    # print(ynew)
    
    return ynew

In [25]:
def fitW2(W1, W2, matrix, train_idx):
    x1 = W1[train_idx, :][:, train_idx].copy()
    x2 = W2[train_idx, :][:, train_idx].copy()
    y = matrix.copy()

    x1_vector = x1.ravel()
    x2_vector = x2.ravel()
    y_vector = np.hstack(np.array(y.ravel()))
    A, B, C = optimize.curve_fit(f_xx, [x1_vector, x2_vector], y_vector)[0]
    ynew = f_xx([W1, W2], A, B, C)
    return ynew

In [26]:
def graph_normalization(graph):
    graph = np.array(graph.copy())
    graph[graph < 0] = 0
    graph_0 = np.array(graph.copy())
    graph = (graph.copy()/np.sqrt(np.sum(graph_0.copy(), axis = 0) + 0.0000000000001)).T
    graph = (graph.copy()/np.sqrt(np.sum(graph_0.copy(), axis = 1) + 0.0000000000001)).T
    return graph

In [27]:
def setvar_tune(size):
# set var for hyper pars tuning size is the hyper par size ALL_...
    global ALL_AUCPR_all_mean
    global ALL_AUROC_all_mean
    global ALL_AUCPR_per_drug_mean
    global ALL_AUROC_per_drug_mean
    global ALL_AUCPR_topn_mean
    global ALL_AUROC_topn_mean
    global ALL_F1_mean
    global ALL_Fmax_mean
    global ALL_Smin_mean

    global ALL_AUCPR_all_rd
    global ALL_AUROC_all_rd
    global ALL_AUCPR_per_drug_rd
    global ALL_AUROC_per_drug_rd
    global ALL_AUCPR_topn_rd
    global ALL_AUROC_topn_rd
    global ALL_F1_rd
    global ALL_Fmax_rd
    global ALL_Smin_rd

    global ALL_AUCPR_all
    global ALL_AUROC_all
    global ALL_AUCPR_per_drug
    global ALL_AUROC_per_drug
    global ALL_AUCPR_topn
    global ALL_AUROC_topn
    global ALL_F1
    global ALL_Fmax
    global ALL_Smin

    ALL_AUCPR_all_mean = np.zeros(size)
    ALL_AUROC_all_mean = np.zeros(size)
    ALL_AUCPR_per_drug_mean = np.zeros(size)
    ALL_AUROC_per_drug_mean = np.zeros(size)
    ALL_AUCPR_topn_mean = np.zeros(size)
    ALL_AUROC_topn_mean = np.zeros(size)
    ALL_F1_mean = np.zeros(size)
    ALL_Fmax_mean = np.zeros(size)
    ALL_Smin_mean = np.zeros(size)

    ALL_AUCPR_all_rd = np.zeros(size)
    ALL_AUROC_all_rd = np.zeros(size)
    ALL_AUCPR_per_drug_rd = np.zeros(size)
    ALL_AUROC_per_drug_rd = np.zeros(size)
    ALL_AUCPR_topn_rd = np.zeros(size)
    ALL_AUROC_topn_rd = np.zeros(size)
    ALL_F1_rd = np.zeros(size)
    ALL_Fmax_rd = np.zeros(size)
    ALL_Smin_rd = np.zeros(size)

    ALL_AUCPR_all = np.zeros(size)
    ALL_AUROC_all = np.zeros(size)
    ALL_AUCPR_per_drug = np.zeros(size)
    ALL_AUROC_per_drug = np.zeros(size)
    ALL_AUCPR_topn = np.zeros(size)
    ALL_AUROC_topn = np.zeros(size)
    ALL_F1 = np.zeros(size)
    ALL_Fmax = np.zeros(size)
    ALL_Smin = np.zeros(size)

In [28]:
def setvar_cv(FOLDS):
# set var for cv 
    global AUC_roc_all_mean
    global AUC_pr_all_mean
    global AUC_roc_per_drug_mean
    global AUC_pr_per_drug_mean
    global AUC_roc_topn_mean
    global AUC_pr_topn_mean
    global Fmax_mean
    global Smin_mean
    global F1_mean

    global AUC_roc_all
    global AUC_pr_all
    global AUC_roc_per_drug
    global AUC_pr_per_drug
    global AUC_roc_topn
    global AUC_pr_topn
    global Fmax
    global Smin
    global F1

    global AUC_roc_all_rd
    global AUC_pr_all_rd
    global AUC_roc_per_drug_rd
    global AUC_pr_per_drug_rd
    global AUC_roc_topn_rd
    global AUC_pr_topn_rd
    global Fmax_rd
    global Smin_rd
    global F1_rd
    
    AUC_roc_all_mean = np.zeros(FOLDS)
    AUC_pr_all_mean = np.zeros(FOLDS)
    AUC_roc_per_drug_mean = np.zeros(FOLDS)
    AUC_pr_per_drug_mean = np.zeros(FOLDS)
    AUC_roc_topn_mean = np.zeros(FOLDS)
    AUC_pr_topn_mean = np.zeros(FOLDS)
    Fmax_mean = np.zeros(FOLDS)
    Smin_mean = np.zeros(FOLDS)
    F1_mean = np.zeros(FOLDS)
    
    AUC_roc_all = np.zeros(FOLDS)
    AUC_pr_all = np.zeros(FOLDS)
    AUC_roc_per_drug = np.zeros(FOLDS)
    AUC_pr_per_drug = np.zeros(FOLDS)
    AUC_roc_topn = np.zeros(FOLDS)
    AUC_pr_topn = np.zeros(FOLDS)
    Fmax = np.zeros(FOLDS)
    Smin = np.zeros(FOLDS)
    F1 = np.zeros(FOLDS)

    AUC_roc_all_rd = np.zeros(FOLDS)
    AUC_pr_all_rd = np.zeros(FOLDS)
    AUC_roc_per_drug_rd = np.zeros(FOLDS)
    AUC_pr_per_drug_rd = np.zeros(FOLDS)
    AUC_roc_topn_rd = np.zeros(FOLDS)
    AUC_pr_topn_rd = np.zeros(FOLDS)
    Fmax_rd = np.zeros(FOLDS)
    Smin_rd = np.zeros(FOLDS)
    F1_rd = np.zeros(FOLDS)

In [29]:
def asgvar_tune(idx, results):
    # assign var for cv from results
    # f: size of hyper pars
    ALL_AUCPR_all_mean[idx] = results[0]
    ALL_AUROC_all_mean[idx] = results[1]
    ALL_F1_mean[idx] = results[2]
    ALL_AUCPR_per_drug_mean[idx] = results[3]
    ALL_AUROC_per_drug_mean[idx] = results[4]
    ALL_Fmax_mean[idx] = results[5]
    ALL_Smin_mean[idx] = results[6]
    ALL_AUCPR_all[idx] = results[7]
    ALL_AUROC_all[idx] = results[8]
    ALL_AUCPR_per_drug[idx] = results[9]
    ALL_AUROC_per_drug[idx] = results[10]
    ALL_Fmax[idx] = results[11]
    ALL_Smin[idx] = results[12]
    ALL_F1[idx] = results[13]
    ALL_AUCPR_all_rd[idx] = results[14]
    ALL_AUROC_all_rd[idx] = results[15]
    ALL_AUCPR_per_drug_rd[idx] = results[16]
    ALL_AUROC_per_drug_rd[idx] = results[17]
    ALL_Fmax_rd[idx] = results[18]
    ALL_Smin_rd[idx] = results[19]
    ALL_F1_rd[idx] = results[20]
    ALL_AUROC_topn_mean[idx] = results[21]
    ALL_AUROC_topn_rd[idx] = results[22]
    ALL_AUROC_topn[idx] = results[23]
    ALL_AUCPR_topn_mean[idx] = results[24]
    ALL_AUCPR_topn_rd[idx] = results[25]
    ALL_AUCPR_topn[idx] = results[26]

In [30]:
def asgvar_cv(f, results):
    # assign var for cv from results
    # f: size of hyper pars
    AUC_pr_all_mean[f] = results[0]
    AUC_roc_all_mean[f] = results[1]
    F1_mean[f] = results[2]
    AUC_pr_per_drug_mean[f] = results[3]
    AUC_roc_per_drug_mean[f] = results[4]
    Fmax_mean[f] = results[5]
    Smin_mean[f] = results[6]
    AUC_pr_all[f] = results[7]
    AUC_roc_all[f] = results[8]
    AUC_pr_per_drug[f] = results[9]
    AUC_roc_per_drug[f] = results[10]
    Fmax[f] = results[11]
    Smin[f] = results[12]
    F1[f] = results[13]
    AUC_pr_all_rd[f] = results[14]
    AUC_roc_all_rd[f] = results[15]
    AUC_pr_per_drug_rd[f] = results[16]
    AUC_roc_per_drug_rd[f] = results[17]
    Fmax_rd[f] = results[18]
    Smin_rd[f] = results[19]
    F1_rd[f] = results[20]
    AUC_roc_topn_mean[f] = results[21]
    AUC_roc_topn_rd[f] = results[22]
    AUC_roc_topn[f] = results[23]
    AUC_pr_topn_mean[f] = results[24]
    AUC_pr_topn_rd[f] = results[25]
    AUC_pr_topn[f] = results[26]

In [31]:
def tuning_plot(tuneVar, tune):
    if tune == None:
        return
    elif tune == "C":
        plt.figure()
        plt.plot(tuneVar, ALL_AUROC_all, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUROC_all_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUROC_all_rd, color='black', linewidth=0.5)
        plt.xlabel("C")
        plt.ylabel("AUROC")
        plt.title('AUROC_all-C')
        plt.show()
        
        plt.figure()
        plt.plot(tuneVar, ALL_AUCPR_all, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUCPR_all_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUCPR_all_rd, color='black', linewidth=0.5)
        plt.xlabel("C")
        plt.ylabel("AUPRC")
        plt.title('AUPRC_all-C')
        plt.show()     
        # per drug
        plt.figure()
        plt.plot(tuneVar, ALL_AUROC_per_drug, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUROC_per_drug_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUROC_per_drug_rd, color='black', linewidth=0.5)
        plt.xlabel("C")
        plt.ylabel("AUROC")
        plt.title('AUROC_per_drug-C')
        plt.show()
        
        plt.figure()
        plt.plot(tuneVar, ALL_AUCPR_per_drug, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUCPR_per_drug_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUCPR_per_drug_rd, color='black', linewidth=0.5)
        plt.xlabel("C")
        plt.ylabel("AUPRC")
        plt.title('AUPRC_per_drug-C')
        plt.show()
        # topn
        plt.figure()
        plt.plot(tuneVar, ALL_AUROC_topn, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUROC_topn_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUROC_topn_rd, color='black', linewidth=0.5)
        plt.xlabel("C")
        plt.ylabel("AUROC")
        plt.title('AUROC_topn-C')
        plt.show()
        
        plt.figure()
        plt.plot(tuneVar, ALL_AUCPR_topn, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUCPR_topn_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUCPR_topn_rd, color='black', linewidth=0.5)
        plt.xlabel("C")
        plt.ylabel("AUPRC")
        plt.title('AUPRC_topn-C')
        plt.show()
        # Fmax Smin
        plt.figure()
        plt.plot(tuneVar, ALL_Fmax, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_Fmax_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_Fmax_rd, color='black', linewidth=0.5)
        plt.xlabel("C")
        plt.ylabel("Fmax")
        plt.title('Fmax-C')
        plt.show()
    
        plt.figure()
        plt.plot(tuneVar, ALL_Smin, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_Smin_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_Smin_rd, color='black', linewidth=0.5)
        plt.xlabel("C")
        plt.ylabel("Smin")
        plt.title('Smin-C')
        plt.show()
    
        plt.figure()
        plt.plot(tuneVar, ALL_F1, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_F1_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_F1_rd, color='black', linewidth=0.5)
        plt.xlabel("C")
        plt.ylabel("F1")
        plt.title('F1-C')
        plt.show()
    elif tune == "lmd":
        plt.figure()
        plt.plot(tuneVar, ALL_AUROC_all, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUROC_all_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUROC_all_rd, color='black', linewidth=0.5)
        plt.xlabel("lambda")
        plt.ylabel("AUROC")
        plt.title('AUROC_all-lambda')
        plt.show()
        
        plt.figure()
        plt.plot(tuneVar, ALL_AUCPR_all, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUCPR_all_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUCPR_all_rd, color='black', linewidth=0.5)
        plt.xlabel("lambda")
        plt.ylabel("AUPRC")
        plt.title('AUPRC_all-lambda')
        plt.show()     
        # per drug
        plt.figure()
        plt.plot(tuneVar, ALL_AUROC_per_drug, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUROC_per_drug_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUROC_per_drug_rd, color='black', linewidth=0.5)
        plt.xlabel("lambda")
        plt.ylabel("AUROC")
        plt.title('AUROC_per_drug-lambda')
        plt.show()
        
        plt.figure()
        plt.plot(tuneVar, ALL_AUCPR_per_drug, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUCPR_per_drug_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUCPR_per_drug_rd, color='black', linewidth=0.5)
        plt.xlabel("lambda")
        plt.ylabel("AUPRC")
        plt.title('AUPRC_per_drug-lambda')
        plt.show()  
        # per drug
        plt.figure()
        plt.plot(tuneVar, ALL_AUROC_topn, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUROC_topn_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUROC_topn_rd, color='black', linewidth=0.5)
        plt.xlabel("lambda")
        plt.ylabel("AUROC")
        plt.title('AUROC_topn-lambda')
        plt.show()
        
        plt.figure()
        plt.plot(tuneVar, ALL_AUCPR_topn, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUCPR_topn_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUCPR_topn_rd, color='black', linewidth=0.5)
        plt.xlabel("lambda")
        plt.ylabel("AUPRC")
        plt.title('AUPRC_topn-lambda')
        plt.show() 
        # Fmax Smin
        plt.figure()
        plt.plot(tuneVar, ALL_Fmax, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_Fmax_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_Fmax_rd, color='black', linewidth=0.5)
        plt.xlabel("lambda")
        plt.ylabel("Fmax")
        plt.title('Fmax-lambda')
        plt.show()
    
        plt.figure()
        plt.plot(tuneVar, ALL_Smin, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_Smin_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_Smin_rd, color='black', linewidth=0.5)
        plt.xlabel("lambda")
        plt.ylabel("Smin")
        plt.title('Smin-lambda')
        plt.show()
    
        plt.figure()
        plt.plot(tuneVar, ALL_F1, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_F1_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_F1_rd, color='black', linewidth=0.5)
        plt.xlabel("lambda")
        plt.ylabel("F1")
        plt.title('F1-lambda')
        plt.show()
    elif tune == "component":
        plt.figure()
        plt.plot(tuneVar, ALL_AUROC_all, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUROC_all_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUROC_all_rd, color='black', linewidth=0.5)
        plt.xlabel("k")
        plt.ylabel("AUROC")
        plt.title('AUROC_all-k')
        plt.show()
        
        plt.figure()
        plt.plot(tuneVar, ALL_AUCPR_all, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUCPR_all_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUCPR_all_rd, color='black', linewidth=0.5)
        plt.xlabel("k")
        plt.ylabel("AUPRC")
        plt.title('AUPRC_all-k')
        plt.show()     
        # per drug
        plt.figure()
        plt.plot(tuneVar, ALL_AUROC_per_drug, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUROC_per_drug_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUROC_per_drug_rd, color='black', linewidth=0.5)
        plt.xlabel("k")
        plt.ylabel("AUROC")
        plt.title('AUROC_per_drug-k')
        plt.show()
        
        plt.figure()
        plt.plot(tuneVar, ALL_AUCPR_per_drug, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUCPR_per_drug_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUCPR_per_drug_rd, color='black', linewidth=0.5)
        plt.xlabel("k")
        plt.ylabel("AUPRC")
        plt.title('AUPRC_per_drug-k')
        plt.show()  
        # per drug
        plt.figure()
        plt.plot(tuneVar, ALL_AUROC_topn, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUROC_topn_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUROC_topn_rd, color='black', linewidth=0.5)
        plt.xlabel("k")
        plt.ylabel("AUROC")
        plt.title('AUROC_topn-k')
        plt.show()
        
        plt.figure()
        plt.plot(tuneVar, ALL_AUCPR_topn, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_AUCPR_topn_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_AUCPR_topn_rd, color='black', linewidth=0.5)
        plt.xlabel("k")
        plt.ylabel("AUPRC")
        plt.title('AUPRC_topn-k')
        plt.show() 
        # Fmax Smin
        plt.figure()
        plt.plot(tuneVar, ALL_Fmax, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_Fmax_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_Fmax_rd, color='black', linewidth=0.5)
        plt.xlabel("k")
        plt.ylabel("Fmax")
        plt.title('Fmax-k')
        plt.show()
    
        plt.figure()
        plt.plot(tuneVar, ALL_Smin, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_Smin_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_Smin_rd, color='black', linewidth=0.5)
        plt.xlabel("k")
        plt.ylabel("Smin")
        plt.title('Smin-k')
        plt.show()
    
        plt.figure()
        plt.plot(tuneVar, ALL_F1, color='navy', linewidth=0.5)
        plt.plot(tuneVar, ALL_F1_mean, color='red', linewidth=0.5)
        # plt.plot(tuneVar, ALL_F1_rd, color='black', linewidth=0.5)
        plt.xlabel("k")
        plt.ylabel("F1")
        plt.title('F1-k')
        plt.show()

In [32]:
def tuning_results(tuneVar):
    idx = np.argmax(ALL_AUCPR_all)
    Var = tuneVar[idx]
    Value = ALL_AUCPR_all[idx]

    # idx = np.argmax(ALL_AUCPR_per_drug)
    # Var = tuneVar[idx]
    # Value = ALL_AUCPR_per_drug[idx]
    print("best hyperpar: ", Var)
    print("AUPRC: ", Value)

    
    # ALL_AUCPR_all[idx]
    # ALL_AUROC_all[idx]
    # ALL_AUCPR_per_drug[idx]
    # ALL_AUROC_per_drug[idx]
    # ALL_Fmax[idx]
    # ALL_Smin[idx]
    # ALL_F1[idx]
    return Var, Value

In [33]:
def setvar_besttune(innerfolds):
    global besttunevalue
    global besttunevar
    besttunevalue = np.zeros(innerfolds) # best metric value
    besttunevar = np.zeros(innerfolds) # the value of best var
    besttunevar = besttunevar.tolist()

In [34]:
def asg_besttune(f, value, var):
    besttunevalue[f] = value
    besttunevar[f] = var

In [35]:
def besttune():
    idx = np.argmax(besttunevalue)
    value = besttunevalue[idx]
    var = besttunevar[idx]
    return value, var

In [36]:
def cv_results():
    # print("Mean AUC-PR", AUC_test_pr.mean()," ", "Standard Deviation:", AUC_test_pr.std())
    # print("Mean AUC-ROC",AUC_test_roc.mean()," ", "Standard Deviation:", AUC_test_roc.std())
    # print("Mean NDCG:", ndcg_folds.mean(),"  " , " Standard Deviation:", ndcg_folds.std())
    
    print("Mean AUC_pr_all_mean:", AUC_pr_all_mean.mean()," ", "Standard Deviation:", AUC_pr_all_mean.std())
    print("Mean AUC_roc_all_mean", AUC_roc_all_mean.mean()," ", "Standard Deviation:", AUC_roc_all_mean.std())
    print("Mean AUC_pr_per_drug_mean", AUC_pr_per_drug_mean.mean()," ", "Standard Deviation:", AUC_pr_per_drug_mean.std())
    print("Mean AUC_roc_per_drug_mean", AUC_roc_per_drug_mean.mean()," ", "Standard Deviation:", AUC_roc_per_drug_mean.std())
    print("Mean AUC_pr_topn_mean", AUC_pr_topn_mean.mean()," ", "Standard Deviation:", AUC_pr_topn_mean.std())
    print("Mean AUC_roc_topn_mean", AUC_roc_topn_mean.mean()," ", "Standard Deviation:", AUC_roc_topn_mean.std())
    print("Mean Fmax_mean", Fmax_mean.mean()," ", "Standard Deviation:", Fmax_mean.std())
    print("Mean Smin_mean", Smin_mean.mean()," ", "Standard Deviation:", Smin_mean.std())
    print("Mean F1_mean", F1_mean.mean()," ", "Standard Deviation:", F1_mean.std())
    print("-----------")
    results_mean = np.array([AUC_pr_all_mean, AUC_roc_all_mean, AUC_pr_per_drug_mean, AUC_roc_per_drug_mean,\
        AUC_pr_topn_mean, AUC_roc_topn_mean, Fmax_mean, Smin_mean, F1_mean])

    print("Mean AUC_pr_all", AUC_pr_all.mean()," ", "Standard Deviation:", AUC_pr_all.std())
    print("Mean AUC_roc_all", AUC_roc_all.mean()," ", "Standard Deviation:", AUC_roc_all.std())
    print("Mean AUC_pr_per_drug", AUC_pr_per_drug.mean()," ", "Standard Deviation:", AUC_pr_per_drug.std())
    print("Mean AUC_roc_per_drug", AUC_roc_per_drug.mean()," ", "Standard Deviation:", AUC_roc_per_drug.std())
    print("Mean AUC_pr_topn", AUC_pr_topn.mean()," ", "Standard Deviation:", AUC_pr_topn.std())
    print("Mean AUC_roc_topn", AUC_roc_topn.mean()," ", "Standard Deviation:", AUC_roc_topn.std())
    print("Mean Fmax", Fmax.mean()," ", "Standard Deviation:", Fmax.std())
    print("Mean Smin", Smin.mean()," ", "Standard Deviation:", Smin.std())
    print("Mean F1", F1.mean()," ", "Standard Deviation:", F1.std())
    print("-----------")
    results = np.array([AUC_pr_all, AUC_roc_all, AUC_pr_per_drug, AUC_roc_per_drug,\
        AUC_pr_topn, AUC_roc_topn, Fmax, Smin, F1])
    return results_mean, results

In [37]:
def tuning_loop(innermatrix, idx_train_inner, idx_test_inner, feature_matrix_inner1, feature_matrix_inner2, hyperparList, i):
    a = hyperparList[i]
    idx_target_inner = idx_test_inner
    print('target size:', len(idx_target_inner))
    results = innerfold(idx_target_inner,idx_test_inner,feature_matrix1=feature_matrix_inner1,feature_matrix2=feature_matrix_inner2,alpha=a,matrix=innermatrix)
    asgvar_tune(i, results=results)
    print("------ alpha: ", a, "------")

In [38]:
def main(method_option,alpha=0.8,Validation="nested_cv",sets="intersect",a=0.8):
    random.seed(1949) # for dataset split
    np.random.seed(1949) # for matrix initialization
    option(method_option)

    random.seed(1949) # for dataset split
    np.random.seed(1949) # for matrix initialization
    df = pd.read_csv("data/side-effect-and-drug_name_upper.tsv",sep = "\t")
    drug_id = df["drugbank_id"] # put col of df in var
    drug_name = df["drugbank_name"]
    side_effect = df["side_effect_name"]
    
    
    edgelist1 = zip(side_effect, drug_name)
    ##making Biparite Graph##
    B = nx.DiGraph()
    B.add_nodes_from(side_effect,bipartite = 0)
    B.add_nodes_from(drug_name,bipartite = 1)
    B.add_edges_from(edgelist1)
    # B.add_weighted_edges_from(edgelist2)
    drug_nodes = {n for n, d in B.nodes(data=True) if d['bipartite']==1}
    side_effect_nodes = {n for n, d in B.nodes(data=True) if d['bipartite']==0}
    drug_nodes = list(drug_nodes)
    drug_nodes.sort()
    side_effect_nodes = list(side_effect_nodes)
    side_effect_nodes.sort()
    ###Getting the Bi-Adjacency matrix between side effects and drugs ###################
    matrix_all = biadjacency_matrix(B, row_order = side_effect_nodes, column_order = drug_nodes) # create biadjacency matrix for drug side effect graph
    matrix_all = matrix_all.A
    m_all,n_all = matrix_all.shape # number of side effect # number of drug
    
    
    ### Setting validation set / training set / testing set ###
    validate_sz = int(0.25 * n_all)
    IDX_all = list(range(n_all))
    random.shuffle(IDX_all)
    IDX_validate = sorted(IDX_all[0:validate_sz])
    print("first few validation set idx:")
    print(IDX_validate[0:10])
    IDX_validate_diff = np.setdiff1d(IDX_all, IDX_validate)
    matrix = matrix_all[:, IDX_validate_diff].copy()
    # featureMat1 = featureMat1_all[IDX_validate_diff, :][:, IDX_validate_diff].copy()
    # featureMat2 = featureMat2_all[IDX_validate_diff, :][:, IDX_validate_diff].copy()
    # print("WMK shape:")
    # print(featureMat1.shape)
    
    df1 = pd.read_csv("data/intersection_DGIdb_mat.tsv",sep = "\t")
    df2 = pd.read_csv("data/intersection_Fingerprint_mat.tsv",sep = "\t")
    featureMat1_all = FeaturePreprocess(df1, drug_nodes=drug_nodes)
    featureMat2_all = FeaturePreprocess(df2, drug_nodes=drug_nodes)
    # drug_nodes_feature1 = featureMat1_all.index
    # drug_nodes_feature2 = featureMat2_all.index
    featureMat1 = featureMat1_all[IDX_validate_diff, :].copy()
    featureMat2 = featureMat2_all[IDX_validate_diff, :].copy()
    
    
    non_zero_idx_union = np.hstack(np.where(~((featureMat1.sum(1) == 0) & (featureMat2.sum(1) == 0))))
    non_zero_idx_missing = np.hstack(np.where(~(~(featureMat1.sum(1) == 0) & ~(featureMat2.sum(1) == 0))))
    non_zero_idx_intersect = np.hstack(np.where(~(featureMat1.sum(1) == 0) & ~(featureMat2.sum(1) == 0)))
    if sets == "union":
        # union
        matrix = matrix[:, non_zero_idx_union].copy()
        featureMat1 = featureMat1[non_zero_idx_union, :].copy()
        featureMat2 = featureMat2[non_zero_idx_union, :].copy()
    elif sets == "intersect":
        # intersect
        non_zero_idx_intersect_all = np.hstack(np.where(~(featureMat1_all.sum(1) == 0) & ~(featureMat2_all.sum(1) == 0)))
    
        matrix_all = matrix_all[:, non_zero_idx_intersect_all].copy()
        featureMat1_all = featureMat1_all[non_zero_idx_intersect_all, :].copy()
        featureMat2_all = featureMat2_all[non_zero_idx_intersect_all, :].copy()
    
        matrix = matrix[:, non_zero_idx_intersect].copy()
        featureMat1 = featureMat1[non_zero_idx_intersect, :].copy()
        featureMat2 = featureMat2[non_zero_idx_intersect, :].copy()
    
        IDX_validate = np.setdiff1d(non_zero_idx_intersect_all, IDX_validate_diff)
        IDX_validate_diff = np.setdiff1d(non_zero_idx_intersect_all, IDX_validate)
    
        drug_nodes_intersect_all = np.array(drug_nodes)[non_zero_idx_intersect_all]
        drug_nodes_intersect_validate_diff = np.array(drug_nodes)[IDX_validate_diff]
        drug_nodes_intersect_validate = np.array(drug_nodes)[IDX_validate]
    
        IDX_validate = np.array([x for x in range(len(drug_nodes_intersect_all)) if drug_nodes_intersect_all[x] in drug_nodes_intersect_validate])
        IDX_validate_diff = np.array([x for x in range(len(drug_nodes_intersect_all)) if drug_nodes_intersect_all[x] in drug_nodes_intersect_validate_diff])
    
    m,n = matrix.shape # number of side effect # number of drug




    random.seed(1949) # for dataset split
    np.random.seed(1949) # for matrix initialization
    start_time = time.time()



    FOLDS = 5
    innerFOLDS = 4
    ####for test sets####
    setvar_cv(FOLDS)

    sz = n
    IDX = list(range(sz))
    fsz = int(sz/FOLDS)
    random.shuffle(IDX)
    IDX = np.array(IDX)
    offset = 0

    innersz = sz - fsz
    innerIDX = list(range(innersz))
    random.shuffle(innerIDX)
    innerIDX = np.array(innerIDX)
    innerfsz = int(innersz / innerFOLDS)
    inneroffset = 0
    # setvar_cv(FOLDS=FOLDS)
    if Validation == "nested_cv":
        for f in range(FOLDS):  # range(FOLDS):
            offset = 0 + f*fsz
            idx_test = IDX[offset:offset + fsz]
    
            idx_train = IDX[np.setdiff1d(np.arange(len(IDX)), np.arange(offset,offset + fsz))]
            print("Fold:",f)
            innermatrix = matrix[:, idx_train]
            innerfeatureMat1 = featureMat1[idx_train, :].copy()
            innerfeatureMat2 = featureMat2[idx_train, :].copy()
            # print(type(weight_matrix1_inner))
    
            setvar_besttune(innerFOLDS)
    
            for innerf in range(innerFOLDS):
                idx_test_inner = innerIDX[inneroffset:inneroffset + innerfsz]
                idx_train_inner = innerIDX[np.array(np.setdiff1d(np.arange(len    (idx_train)), np.arange(inneroffset,inneroffset + innerfsz)))]
    
                print("Inner Fold:", innerf)
    
                alpha = np.arange(0.1, 1, 0.05).tolist()
                hyperparList = alpha
                setvar_tune(len(hyperparList))
    
                with parallel_backend('threading'):
                    Parallel(n_jobs=20)(delayed(tuning_loop)(innermatrix = innermatrix, idx_train_inner = idx_train_inner, 
                        idx_test_inner = idx_test_inner, feature_matrix_inner1= innerfeatureMat1, \
                            feature_matrix_inner2=innerfeatureMat2, hyperparList = hyperparList, i = i) \
                                    for i in range(len(hyperparList)))
    
                # tuning_plot(tuneVar=C, tune="C")
                hyperpars, evalValue = tuning_results(tuneVar=hyperparList)
    
                asg_besttune(innerf, value=evalValue, var=hyperpars)
                    
            _, bestHyperPars = besttune()
                
    
            print("--- tuning end ---")
            a = bestHyperPars
    
                
            # idx_target = np.intersect1d(idx_test, WMK_non_zero_idx_intersect)
            idx_target = idx_test
            print('target size:', len(idx_target))
    
            print("------ lambda: ", a, "------")
    
            results = fold(idx_target,idx_test,featureMat1,featureMat2,alpha=a,matrix=matrix)
            asgvar_cv(f=f, results=results)
                
            
        out_mean, out = cv_results()
        return out_mean, out
    elif Validation == "cv":
        setvar_besttune(FOLDS)
        for f in range(FOLDS):  # range(FOLDS):
            offset = 0 + f*fsz
            idx_test = IDX[offset:offset + fsz]
            idx_train = IDX[np.setdiff1d(np.arange(len(IDX)), np.arange(offset,offset + fsz))]

            print("Fold:",f)


            alpha = np.arange(0.1, 1, 0.05).tolist()
            hyperparList = alpha
            setvar_tune(len(hyperparList))
    
            with parallel_backend('threading'):
                Parallel(n_jobs=20)(delayed(tuning_loop)(innermatrix = matrix, idx_train_inner = idx_train, idx_test_inner = idx_test, \
                    feature_matrix_inner1= featureMat1, feature_matrix_inner2= featureMat2, hyperparList = hyperparList, i = i) \
                    for i in range(len(hyperparList)))
    
            # tuning_plot(tuneVar=C, tune="C")
            hyperpars, evalValue = tuning_results(tuneVar=hyperparList)
    
            asg_besttune(f, value=evalValue, var=hyperpars)
            # a = hyperpars
            # idx_target = idx_test
            # print('target size:', len(idx_target))

            # print("------ a: ", a, "------")
            # results = fold(idx_target,idx_test,weight_matrix1=weight_matrix1, weight_matrix2=weight_matrix2,alpha=a,matrix=matrix)
            # asgvar_cv(f=f, results=results)
                

                    
        print("--- tuning end ---")
        # cv_results()
        _, bestHyperPars = besttune()
    
    elif Validation == "Validation":
 

        # idx_target = np.intersect1d(idx_test, WMK_non_zero_idx_intersect)
        idx_test = IDX_validate
        idx_train = IDX_validate_diff
        idx_target = idx_test
        print('target size:', len(idx_target))
        print("------ a: ", a, "------")
    
        results = fold(idx_target,idx_test,feature_matrix1=featureMat1_all, feature_matrix2=featureMat2_all,alpha=a,matrix=matrix_all)
        return
    elif Validation == "plot":
    

        # idx_target = np.intersect1d(idx_test, WMK_non_zero_idx_intersect)
        idx_test = IDX_validate
        idx_train = IDX_validate_diff
        idx_target = idx_test
        print('target size:', len(idx_target))
        print("------ a: ", a, "------")
    
        pr, roc = plotfold(idx_target,idx_test,feature_matrix1=featureMat1_all,feature_matrix2=featureMat2_all,alpha=a,matrix=matrix_all)
        
        return pr, roc

In [39]:
main(method_option = "LNSM_CMI", Validation="nested_cv")

first few validation set idx:
[2, 7, 12, 13, 17, 20, 26, 39, 45, 47]
Fold: 0
Inner Fold: 0
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 115target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]

First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 115
First few 

(array([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]]),
 array([[0.35742519, 0.34568922, 0.32599043, 0.35499118, 0.34670926],
        [0.86587462, 0.85457452, 0.85607395, 0.8763973 , 0.87312464],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ]]))

In [40]:
main(method_option = "LNSM_SMI", Validation="nested_cv")

first few validation set idx:
[2, 7, 12, 13, 17, 20, 26, 39, 45, 47]
Fold: 0
Inner Fold: 0
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 115
First few 

(array([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]]),
 array([[0.29451682, 0.27071051, 0.29366793, 0.28797252, 0.32756292],
        [0.72966288, 0.73236283, 0.72337487, 0.74254629, 0.76102198],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ]]))

In [41]:
main(method_option = "LNSM_WMK1", Validation="nested_cv")

first few validation set idx:
[2, 7, 12, 13, 17, 20, 26, 39, 45, 47]
Fold: 0
Inner Fold: 0
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size:target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 11

(array([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]]),
 array([[0.32569603, 0.3138713 , 0.32254312, 0.30776102, 0.32087932],
        [0.86564283, 0.85730648, 0.87743144, 0.88809633, 0.85920818],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ]]))

In [42]:
main(method_option = "LNSM_WMK2", Validation="nested_cv")

first few validation set idx:
[2, 7, 12, 13, 17, 20, 26, 39, 45, 47]
Fold: 0
Inner Fold: 0
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 115
First few target index: [327  39 166 367  17 387 158  45 293  27]
First few mask index: [327  39 166 367  17 387 158  45 293  27]
target size: 115
First few 

(array([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]]),
 array([[0.35742519, 0.34568922, 0.32599043, 0.35499118, 0.34670926],
        [0.86587462, 0.85457452, 0.85607395, 0.8763973 , 0.87312464],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ]]))

In [43]:
main(method_option = "LNSM_CMI",Validation="cv") # 

first few validation set idx:
[2, 7, 12, 13, 17, 20, 26, 39, 45, 47]
Fold: 0
target size: 115
target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: 

In [44]:
main(method_option = "LNSM_SMI",Validation="cv") # 0.1

first few validation set idx:
[2, 7, 12, 13, 17, 20, 26, 39, 45, 47]
Fold: 0
target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]

target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: 

In [45]:
main(method_option = "LNSM_WMK1",Validation="cv") # 0.1

first few validation set idx:
[2, 7, 12, 13, 17, 20, 26, 39, 45, 47]
Fold: 0
target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]

[384 293 396 263  31 466 141 431  32 337]target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [

In [46]:
main(method_option = "LNSM_WMK2",Validation="cv") # 0.2

first few validation set idx:
[2, 7, 12, 13, 17, 20, 26, 39, 45, 47]
Fold: 0
target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: [384 293 396 263  31 466 141 431  32 337]
First few mask index: [384 293 396 263  31 466 141 431  32 337]
target size: 115
First few target index: 

In [45]:
main(method_option = "LNSM_CMI", Validation="Validation", a=0.15)

first few validation set idx:
[2, 7, 12, 13, 17, 20, 26, 39, 45, 47]
target size: 201
------ a:  0.15 ------
First few target index: [ 4 11 14 27 32 34 35 37 41 47]
First few mask index: [ 4 11 14 27 32 34 35 37 41 47]
LNSM starts:
LNSM converged
LNSM converged
proportion of ground truth: 0.02256888445786783
---evaluation---
-----
AUC-PR mean: 0
AUC-PR all: 0.34866442893067384
-----
AUC-ROC mean: 0
AUC-ROC all: 0.8815423895894255
-----
AUC-PR per drug mean: 0
AUC-PR per drug: 0
-----
AUC-ROC per drug mean: 0
AUC-ROC per drug: 0
-----
AUC-ROC top N mean: 0
AUC-ROC top N: 0
-----
AUC-PR top N mean: 0
AUC-PR top N: 0
-----
F1 mean: 0
F1: 0
-----
Fmax_mean 0
Fmax 0
-----
Smin_mean 0
Smin 0


In [48]:
main(method_option = "LNSM_SMI", Validation="Validation", a=0.1)

first few validation set idx:
[2, 7, 12, 13, 17, 20, 26, 39, 45, 47]
target size: 201
------ a:  0.1 ------
First few target index: [ 4 11 14 27 32 34 35 37 41 47]
First few mask index: [ 4 11 14 27 32 34 35 37 41 47]
LNSM starts:
proportion of ground truth: 0.02256888445786783
---evaluation---
-----
AUC-PR mean: 0
AUC-PR all: 0.26364827632418075
-----
AUC-ROC mean: 0
AUC-ROC all: 0.7295772251770828
-----
AUC-PR per drug mean: 0
AUC-PR per drug: 0
-----
AUC-ROC per drug mean: 0
AUC-ROC per drug: 0
-----
AUC-ROC top N mean: 0
AUC-ROC top N: 0
-----
AUC-PR top N mean: 0
AUC-PR top N: 0
-----
F1 mean: 0
F1: 0
-----
Fmax_mean 0
Fmax 0
-----
Smin_mean 0
Smin 0


In [49]:
main(method_option = "LNSM_WMK1", Validation="Validation", a=0.1)

first few validation set idx:
[2, 7, 12, 13, 17, 20, 26, 39, 45, 47]
target size: 201
------ a:  0.1 ------
First few target index: [ 4 11 14 27 32 34 35 37 41 47]
First few mask index: [ 4 11 14 27 32 34 35 37 41 47]
LNSM starts:
LNSM converged
proportion of ground truth: 0.02256888445786783
---evaluation---
-----
AUC-PR mean: 0
AUC-PR all: 0.2704789833227833
-----
AUC-ROC mean: 0
AUC-ROC all: 0.8611061079176346
-----
AUC-PR per drug mean: 0
AUC-PR per drug: 0
-----
AUC-ROC per drug mean: 0
AUC-ROC per drug: 0
-----
AUC-ROC top N mean: 0
AUC-ROC top N: 0
-----
AUC-PR top N mean: 0
AUC-PR top N: 0
-----
F1 mean: 0
F1: 0
-----
Fmax_mean 0
Fmax 0
-----
Smin_mean 0
Smin 0


In [40]:
main(method_option = "LNSM_WMK2", Validation="Validation", a=0.15)

first few validation set idx:
[2, 7, 12, 13, 17, 20, 26, 39, 45, 47]
target size: 201
------ a:  0.15 ------
First few target index: [ 4 11 14 27 32 34 35 37 41 47]
First few mask index: [ 4 11 14 27 32 34 35 37 41 47]
LNSM starts:
LNSM converged
proportion of ground truth: 0.02256888445786783
---evaluation---
-----
AUC-PR mean: 0
AUC-PR all: 0.34866442893067384
-----
AUC-ROC mean: 0
AUC-ROC all: 0.8815423895894255
-----
AUC-PR per drug mean: 0
AUC-PR per drug: 0
-----
AUC-ROC per drug mean: 0
AUC-ROC per drug: 0
-----
AUC-ROC top N mean: 0
AUC-ROC top N: 0
-----
AUC-PR top N mean: 0
AUC-PR top N: 0
-----
F1 mean: 0
F1: 0
-----
Fmax_mean 0
Fmax 0
-----
Smin_mean 0
Smin 0


Plot

In [47]:
LNSM_CMI_RLN_pr, LNSM_CMI_RLN_roc = \
    main(method_option="LNSM_CMI", Validation="plot", a=0.15)            
LNSM_CMI_RLN_pr.T.to_csv("Figs/LNSM_CMI_RLN_pr.csv", index=False)
LNSM_CMI_RLN_roc.T.to_csv("Figs/LNSM_CMI_RLN_roc.csv", index=False)

first few validation set idx:
[2, 7, 12, 13, 17, 20, 26, 39, 45, 47]
target size: 201
------ a:  0.15 ------
First few target index: [ 4 11 14 27 32 34 35 37 41 47]
First few mask index: [ 4 11 14 27 32 34 35 37 41 47]
LNSM starts:
LNSM converged
LNSM converged
proportion of ground truth: 0.02256888445786783
---evaluation---
-----
AUC-PR mean: 0
AUC-PR all: 0.34866442893067384
-----
AUC-ROC mean: 0
AUC-ROC all: 0.8815423895894255
-----
AUC-PR per drug mean: 0
AUC-PR per drug: 0
-----
AUC-ROC per drug mean: 0
AUC-ROC per drug: 0
-----
AUC-ROC top N mean: 0
AUC-ROC top N: 0
-----
AUC-PR top N mean: 0
AUC-PR top N: 0
-----
F1 mean: 0
F1: 0
-----
Fmax_mean 0
Fmax 0
-----
Smin_mean 0
Smin 0


In [82]:
LNSM_SMI_RLN_pr, LNSM_SMI_RLN_roc = \
    main(method_option="LNSM_SMI", Validation="plot", a=0.1)
LNSM_SMI_RLN_pr.T.to_csv("Figs/LNSM_SMI_RLN_pr.csv", index=False)
LNSM_SMI_RLN_roc.T.to_csv("Figs/LNSM_SMI_RLN_roc.csv", index=False)

first few validation set idx:
[2, 7, 12, 13, 17, 20, 26, 39, 45, 47]
target size: 201
------ a:  0.1 ------
First few target index: [ 4 11 14 27 32 34 35 37 41 47]
First few mask index: [ 4 11 14 27 32 34 35 37 41 47]
LNSM starts:
proportion of ground truth: 0.02256888445786783
---evaluation---
-----
AUC-PR mean: 0
AUC-PR all: 0.26364827632418075
-----
AUC-ROC mean: 0
AUC-ROC all: 0.7295772251770828
-----
AUC-PR per drug mean: 0
AUC-PR per drug: 0
-----
AUC-ROC per drug mean: 0
AUC-ROC per drug: 0
-----
AUC-ROC top N mean: 0
AUC-ROC top N: 0
-----
AUC-PR top N mean: 0
AUC-PR top N: 0
-----
F1 mean: 0
F1: 0
-----
Fmax_mean 0
Fmax 0
-----
Smin_mean 0
Smin 0


In [83]:
LNSM_WMK1_RLN_pr, LNSM_WMK1_RLN_roc = \
    main(method_option="LNSM_WMK1", Validation="plot", a=0.1)
LNSM_WMK1_RLN_pr.T.to_csv("Figs/LNSM_WMK1_RLN_pr.csv", index=False)
LNSM_WMK1_RLN_roc.T.to_csv("Figs/LNSM_WMK1_RLN_roc.csv", index=False)

first few validation set idx:
[2, 7, 12, 13, 17, 20, 26, 39, 45, 47]
target size: 201
------ a:  0.1 ------
First few target index: [ 4 11 14 27 32 34 35 37 41 47]
First few mask index: [ 4 11 14 27 32 34 35 37 41 47]
LNSM starts:
LNSM converged
proportion of ground truth: 0.02256888445786783
---evaluation---
-----
AUC-PR mean: 0
AUC-PR all: 0.2704789833227833
-----
AUC-ROC mean: 0
AUC-ROC all: 0.8611061079176346
-----
AUC-PR per drug mean: 0
AUC-PR per drug: 0
-----
AUC-ROC per drug mean: 0
AUC-ROC per drug: 0
-----
AUC-ROC top N mean: 0
AUC-ROC top N: 0
-----
AUC-PR top N mean: 0
AUC-PR top N: 0
-----
F1 mean: 0
F1: 0
-----
Fmax_mean 0
Fmax 0
-----
Smin_mean 0
Smin 0


In [42]:
LNSM_WMK2_RLN_pr, LNSM_WMK2_RLN_roc = \
    main(method_option="LNSM_WMK2", Validation="plot", a=0.15)
LNSM_WMK2_RLN_pr.T.to_csv("Figs/LNSM_WMK2_RLN_pr.csv", index=False)
LNSM_WMK2_RLN_roc.T.to_csv("Figs/LNSM_WMK2_RLN_roc.csv", index=False)

first few validation set idx:
[2, 7, 12, 13, 17, 20, 26, 39, 45, 47]
target size: 201
------ a:  0.15 ------
First few target index: [ 4 11 14 27 32 34 35 37 41 47]
First few mask index: [ 4 11 14 27 32 34 35 37 41 47]
LNSM starts:
LNSM converged
proportion of ground truth: 0.02256888445786783
---evaluation---
-----
AUC-PR mean: 0
AUC-PR all: 0.34866442893067384
-----
AUC-ROC mean: 0
AUC-ROC all: 0.8815423895894255
-----
AUC-PR per drug mean: 0
AUC-PR per drug: 0
-----
AUC-ROC per drug mean: 0
AUC-ROC per drug: 0
-----
AUC-ROC top N mean: 0
AUC-ROC top N: 0
-----
AUC-PR top N mean: 0
AUC-PR top N: 0
-----
F1 mean: 0
F1: 0
-----
Fmax_mean 0
Fmax 0
-----
Smin_mean 0
Smin 0
