In [7]:
# %load combine_sims2.py
import numpy as np
from base.csbase import Combine_Sims_Base
from sklearn.neighbors import NearestNeighbors

""" combine multiple similarities using the weight of each drug/target"""

class Combine_Sims_LimbPerDT(Combine_Sims_Base):
    def __init__(self, k=5):
        self.k = k
        self.copyable_attrs=['k']
    #---------------------------------------------------------------------------------------- 
    
    def combine(self, Ss, Y):
        self._num_sims = Ss.shape[0]
        self._n = Ss.shape[1] # the number of drugs/targets
        self.W = self._compute_weights(Ss, Y) # W: num_sims,n
        S = self._combine_sim(self.W, Ss) 
        return S, self.W
    #---------------------------------------------------------------------------------------- 
    
    def combine_test(self, Ss_te):
        # combine the test similarities, Ss is the test similairties
        self._check_test_sim(Ss_te)
        self._m = Ss_te.shape[1] # the number of test drugs/targets
        neigh = NearestNeighbors(n_neighbors=self.k, metric='precomputed')
        neigh.fit(np.zeros((self._n,self._n)))
        W_te = np.zeros((self._num_sims,self._m), dtype=float) 
        
        for i in range(self._num_sims):
            knn = neigh.kneighbors(1 - Ss_te[i], return_distance=False) # knn.shape = (m,k)
            U = self.W[i,:][knn] # U.shape = (m,k)
            W_te[i,:] = np.mean(U, axis=1)
        
        sum_W_rows = np.sum(W_te, axis=0)
        W_te = W_te/sum_W_rows[None,:] # the sum of each columns in W is 1
        S_te = self._combine_sim(W_te, Ss_te)        
        return S_te, W_te
    #----------------------------------------------------------------------------------------    
    
    def _combine_sim(self, W, Ss):
        # W.shape= num_sims,n, Ss.shape=num_sims,n,n
        W1 = W[:,:,None] # W1.shape = num_sims,n,1
        S = Ss*W1
        S = np.sum(S,axis=0)
        return S
    #---------------------------------------------------------------------------------------- 
    
    def _check_test_sim(self, Ss):
        if Ss.shape[0] != self._num_sims:
            raise RuntimeError("The number of similairties in Ss ({}) is not same with self._num_sims ({})!!".format(Ss.shape[0], self._num_sims))
        if self._n != Ss.shape[2]:
            raise RuntimeError("The self._n:{} is not comparable with Ss's column {}!!".format(self._n, Ss.shape[2]))
    #----------------------------------------------------------------------------------------    
    
    def _compute_weights(self, Ss, Y):
        W = np.zeros((self._num_sims,self._n), dtype=float) 
        for i in range(self._num_sims):
            S1 = Ss[i] - np.diag(np.diag(Ss[i])) # set diagnol elements to zeros
            C = self._cal_limb(S1, Y, self.k)
            W[i,:] = np.sum(C, axis=1)+1.0/self.k 
            # W[i,h] is the difficulty of d_h in i-th Sim, 1.0/self.k is an smoothing parameter ensuring none zero in W

        W = 1/W  # np.exp(-1*W) has similar performance 
        sum_W_rows = np.sum(W, axis=0)
        W = W/sum_W_rows[None,:] # the sum of each columns in W is 1
        return W
    #---------------------------------------------------------------------------------------- 
    
    def _cal_limb(self, S, Y, k):
        """ S is similarity matrix whose dignoal elememets are zeros"""
        
        neigh = NearestNeighbors(n_neighbors=k, metric='precomputed')
        neigh.fit(np.zeros(S.shape))
        knns = neigh.kneighbors(1 - S, return_distance=False)
        
        C = np.zeros(Y.shape, dtype=float)
        for i in range(Y.shape[0]):
            ii = knns[i]
            for j in range(Y.shape[1]):
                if Y[i,j] == 1: # only consider "1" 
                    C[i,j] = k-np.sum(Y[ii,j])
        C = C/k
        #milb = np.sum(C)/np.sum(Y)
        
        return C
    #---------------------------------------------------------------------------------------- 
#---------------------------------------------------------------------------------------- 


class Combine_Sims_LimbPerDT_1(Combine_Sims_Base):
    def __init__(self, k=5):
        self.k = k
        self.copyable_attrs=['k']
    #---------------------------------------------------------------------------------------- 
    
    def combine(self, Ss, Y):
        self._num_sims = Ss.shape[0]
        self._n = Ss.shape[1] # the number of drugs/targets
        self.W = self._compute_weights(Ss, Y) # W: num_sims,n
        S = self._combine_sim(self.W, Ss) 
        
    
        S[S>=1] = 1.0
        
        return S, self.W
    #---------------------------------------------------------------------------------------- 
    
    def combine_test(self, Ss_te):
        # combine the test similarities, Ss is the test similairties
        self._check_test_sim(Ss_te)
        self._m = Ss_te.shape[1] # the number of test drugs/targets
        neigh = NearestNeighbors(n_neighbors=self.k, metric='precomputed')
        neigh.fit(np.zeros((self._n,self._n)))
        W_te = np.zeros((self._num_sims,self._m), dtype=float) 
        
        for i in range(self._num_sims):
            knn = neigh.kneighbors(1 - Ss_te[i], return_distance=False) # knn.shape = (m,k)
            U = self.W[i,:][knn] # U.shape = (m,k)
            W_te[i,:] = np.mean(U, axis=1)
        
        sum_W_rows = np.sum(W_te, axis=0)
        if np.any(sum_W_rows==0):
            print(W_te)
        W_te = W_te/sum_W_rows[None,:] # the sum of each columns in W is 1
        S_te = self._combine_sim(W_te, Ss_te)
        
        # S_te_sum = np.sum(S_te, axis=1)
        # if np.any(S_te_sum==0):
        #     print(S_te_sum)
        S_te[S_te>1] = 1
        return S_te, W_te
    #----------------------------------------------------------------------------------------    
    
    def _combine_sim(self, W, Ss):
        # W.shape= num_sims,n, Ss.shape=num_sims,n,n
        W1 = W[:,:,None] # W1.shape = num_sims,n,1
        S = Ss*W1
        S = np.sum(S,axis=0)
        return S
    #---------------------------------------------------------------------------------------- 
    
    def _check_test_sim(self, Ss):
        if Ss.shape[0] != self._num_sims:
            raise RuntimeError("The number of similairties in Ss ({}) is not same with self._num_sims ({})!!".format(Ss.shape[0], self._num_sims))
        if self._n != Ss.shape[2]:
            raise RuntimeError("The self._n:{} is not comparable with Ss's column {}!!".format(self._n, Ss.shape[2]))
    #----------------------------------------------------------------------------------------    
    
    def _compute_weights(self, Ss, Y):
        W = np.zeros((self._num_sims,self._n), dtype=float) 
        wg = np.zeros(self._num_sims, dtype=float)  # the global local imbalance based weight
        for i in range(self._num_sims):
            S1 = Ss[i] - np.diag(np.diag(Ss[i])) # set diagnol elements to zeros
            milb, C = self._cal_limb(S1, Y, self.k)
            wg[i] = 1- milb
            
            idx1 = np.where(Y==1)
            C[idx1] = 1-C[idx1]
            W[i,:] = np.sum(C, axis=1) #+1.0/(self.k*self._num_sims)
            # W[i,h] is the easiness of d_h in i-th Sim, 1.0/self.k is an smoothing parameter ensuring none zero in W
         
        sum_wg = np.sum(wg)
            
        sum_W_rows = np.sum(W, axis=0)  
        idx0 = np.where(sum_W_rows==0)[0] # indices of durgs whose sum of weight is zero
        sum_W_rows[idx0] = sum_wg
        W[:,idx0] = wg[:,None]
        W = W/sum_W_rows[None,:] # the sum of each columns in W is 1
        return W
    #---------------------------------------------------------------------------------------- 
    
    def _cal_limb(self, S, Y, k):
        """ S is similarity matrix whose dignoal elememets are zeros"""
        
        neigh = NearestNeighbors(n_neighbors=k, metric='precomputed')
        neigh.fit(np.zeros(S.shape))
        knns = neigh.kneighbors(1 - S, return_distance=False)
        
        C = np.zeros(Y.shape, dtype=float)
        for i in range(Y.shape[0]):
            ii = knns[i]
            for j in range(Y.shape[1]):
                if Y[i,j] == 1: # only consider "1" 
                    C[i,j] = k-np.sum(Y[ii,j])
        C = C/k
        milb = np.sum(C)/np.sum(Y)
        
        return milb, C
    #---------------------------------------------------------------------------------------- 
#---------------------------------------------------------------------------------------- 

class Combine_Sims_LimbPerDT_2(Combine_Sims_LimbPerDT_1):
    """ set some samllest weights in each row (of each drug) to zeros"""
    def __init__(self, k=5, rho = 0.6):
        self.k = k
        self.rho = rho # the percetage of weghts of similarities removed
        self.copyable_attrs=['k', 'rho']
    #---------------------------------------------------------------------------------------- 
    
    def combine_test(self, Ss_te):
        # combine the test similarities, Ss is the test similairties
        self._check_test_sim(Ss_te)
        self._m = Ss_te.shape[1] # the number of test drugs/targets
        neigh = NearestNeighbors(n_neighbors=self.k, metric='precomputed')
        neigh.fit(np.zeros((self._n,self._n)))
        W_te = np.zeros((self._num_sims,self._m), dtype=float) 
        
        for i in range(self._num_sims):
            knn = neigh.kneighbors(1 - Ss_te[i], return_distance=False) # knn.shape = (m,k)
            U = self.W[i,:][knn] # U.shape = (m,k)
            W_te[i,:] = np.mean(U, axis=1)
        
        # sum_W_rows = np.sum(W_te, axis=0)
        
        """ !!! No test weights are all zeros"""
        # idx0 = np.where(sum_W_rows==0)[0] # indices of durgs whose sum of weight is zero
        # sum_W_rows[idx0] = self.sum_wg
        # W_te[:,idx0] = self.wg[:,None]       
        # if len(idx0) > 0:
        #     print("weight", idx0)
        
        # set smaller rn_sims weights in each column of W to zero
        if self.rn_sims>0:
            idx_par = np.argpartition(W_te, kth=self.rn_sims, axis=0)
            W_te[idx_par[:self.rn_sims,:],np.arange(W_te.shape[1])[None,:]] = 0   
            
        sum_W_rows = np.sum(W_te, axis=0) # recompute the sum of each columns
        sum_W_rows[sum_W_rows==0] = 1 # ensure no zero vlaues in sum_W_rows, as it will be used as denominator
        W_te = W_te/sum_W_rows[None,:] # the sum of each columns in W is 1

        
        S_te = self._combine_sim(W_te, Ss_te)  
        
        # check if any row of S_te are all 0s
        # sum_S_rows = np.sum(S_te, axis=1)
        # idx0 = np.where(sum_S_rows==0)[0]
        # if len(idx0) > 0:
        #     print("sim", idx0)
        #     S_te[idx0] = np.average(Ss_te[:,idx0,:],axis=0,weights=self.wg)
        S_te[S_te>1] = 1
        return S_te, W_te
    #----------------------------------------------------------------------------------------    
    
    
    # def _compute_weights(self, Ss, Y):
    #     self.rn_sims = int(self.rho*self._num_sims)
        
    #     W = np.zeros((self._num_sims,self._n), dtype=float) 
    #     self.wg = np.zeros(self._num_sims, dtype=float)  # the global local imbalance based weight
    #     for i in range(self._num_sims):
    #         S1 = Ss[i] - np.diag(np.diag(Ss[i])) # set diagnol elements to zeros
    #         milb, C = self._cal_limb(S1, Y, self.k)
    #         self.wg[i] = 1- milb
            
    #         idx1 = np.where(Y==1)
    #         C[idx1] = 1-C[idx1]
    #         W[i,:] = np.sum(C, axis=1) #+1.0/(self.k*self._num_sims)
    #         # W[i,h] is the easiness of d_h in i-th Sim, 1.0/self.k is an smoothing parameter ensuring none zero in W
        
    #     # set smaller rn_sims weights in each column of W to zero
    #     if self.rn_sims>0:
    #         idx_par = np.argpartition(self.wg, kth=self.rn_sims)
    #         self.wg[idx_par[:self.rn_sims]] = 0
    #     self.sum_wg = np.sum(self.wg)
            
    #     sum_W_rows = np.sum(W, axis=0)  
    #     idx0 = np.where(sum_W_rows==0)[0] # indices of durgs whose sum of weight is zero
    #     sum_W_rows[idx0] = self.sum_wg
    #     W[:,idx0] = self.wg[:,None]
        
    #     # set smaller rn_sims weights in each column of W to zero
    #     if self.rn_sims>0:
    #         idx_par = np.argpartition(W, kth=self.rn_sims, axis=0)
    #         W[idx_par[:self.rn_sims,:],np.arange(W.shape[1])[None,:]] = 0   
    #         sum_W_rows = np.sum(W, axis=0) # recompute the sum of each columns
            
    #     W = W/sum_W_rows[None,:] # the sum of each columns in W is 1
    #     return W
    # #---------------------------------------------------------------------------------------- 
    
    
    def _compute_weights(self, Ss, Y):
        self.rn_sims = int(self.rho*self._num_sims)
        
        W = np.zeros((self._num_sims,self._n), dtype=float) 
        self.wg = np.zeros(self._num_sims, dtype=float)  # the global local imbalance based weight
        for i in range(self._num_sims):
            S1 = Ss[i] - np.diag(np.diag(Ss[i])) # set diagnol elements to zeros
            _, C = self._cal_limb(S1, Y, self.k)
            # self.wg[i] = 1- milb
            
            idx1 = np.where(Y==1)
            C[idx1] = 1-C[idx1]
            W[i,:] = np.sum(C, axis=1) #+1.0/(self.k*self._num_sims)
            # W[i,h] is the easiness of d_h in i-th Sim, 1.0/self.k is an smoothing parameter ensuring none zero in W
        
        # set smaller rn_sims weights in each column of W to zero
        # if self.rn_sims>0:
        #     idx_par = np.argpartition(self.wg, kth=self.rn_sims)
        #     self.wg[idx_par[:self.rn_sims]] = 0
        # self.sum_wg = np.sum(self.wg)
            
        wg = np.sum(W, axis=1)
        sum_W_rows = np.sum(W, axis=0)  
        idx0 = np.where(sum_W_rows==0)[0] # indices of durgs whose sum of weight is zero
        # sum_W_rows[idx0] = self.sum_wg
        W[:,idx0] = wg[:,None]
        
        # set smaller rn_sims weights in each column of W to zero
        if self.rn_sims>0:
            idx_par = np.argpartition(W, kth=self.rn_sims, axis=0)
            W[idx_par[:self.rn_sims,:],np.arange(W.shape[1])[None,:]] = 0   
            
        sum_W_rows = np.sum(W, axis=0) # recompute the sum of each columns   
        sum_W_rows[sum_W_rows==0] = 1
        W = W/sum_W_rows[None,:] # the sum of each columns in W is 1
        return W
    #---------------------------------------------------------------------------------------- 
    
    def _cal_limb(self, S, Y, k):
        """ S is similarity matrix whose dignoal elememets are zeros"""
        
        neigh = NearestNeighbors(n_neighbors=k, metric='precomputed')
        neigh.fit(np.zeros(S.shape))
        knns = neigh.kneighbors(1 - S, return_distance=False)
        
        C = np.zeros(Y.shape, dtype=float)
        for i in range(Y.shape[0]):
            ii = knns[i]
            s = S[i,ii]
            z = np.sum(s)
            if z == 0:
                z=1
            C[i] = 1-s@Y[ii,:]/z
        C *= Y #
        milb = np.sum(C)/np.sum(Y)
        
        return milb, C
    #---------------------------------------------------------------------------------------- 
    
#---------------------------------------------------------------------------------------- 



    
    
class Combine_Sims_LimbPerDT2(Combine_Sims_LimbPerDT):
    """ Difference with Combine_Sims_LimbPerDT: considering the influence of similarities in '_cal_limb' function
    Seems slightly good for MFAP but worse for MFAUC, compared with Combine_Sims_Limb3
    """
    
    def combine_test(self, Ss_te): # No change
        # combine the test similarities, Ss is the test similairties
        self._check_test_sim(Ss_te)
        self._m = Ss_te.shape[1] # the number of test drugs/targets
        neigh = NearestNeighbors(n_neighbors=self.k, metric='precomputed')
        neigh.fit(np.zeros((self._n,self._n)))
        W_te = np.zeros((self._num_sims,self._m), dtype=float) 
        
        for i in range(self._num_sims):
            knn = neigh.kneighbors(1 - Ss_te[i], return_distance=False) # knn.shape = (m,k)
            U = self.W[i,:][knn] # U.shape = (m,k)
            W_te[i,:] = np.mean(U, axis=1)
        
        sum_W_rows = np.sum(W_te, axis=0)
        W_te = W_te/sum_W_rows[None,:] # the sum of each columns in W is 1
        S_te = self._combine_sim(W_te, Ss_te)        
        return S_te, W_te
    #----------------------------------------------------------------------------------------     
    
    def _compute_weights(self, Ss, Y):  # No change
        W = np.zeros((self._num_sims,self._n), dtype=float) 
        for i in range(self._num_sims):
            S1 = Ss[i] - np.diag(np.diag(Ss[i])) # set diagnol elements to zeros
            C = self._cal_limb(S1, Y, self.k)
            W[i,:] = np.sum(C, axis=1)+1.0/self.k 
            # W[i,h] is the difficulty of d_h in i-th Sim, 1.0/self.k is an smoothing parameter ensuring none zero in W
      
        W = 1/W
        sum_W_rows = np.sum(W, axis=0)
        W = W/sum_W_rows[None,:] # the sum of each columns in W is 1
        return W
    #---------------------------------------------------------------------------------------- 
    
    def _cal_limb(self, S, Y, k):
        """ S is similarity matrix whose dignoal elememets are zeros"""
        
        neigh = NearestNeighbors(n_neighbors=k, metric='precomputed')
        neigh.fit(np.zeros(S.shape))
        knns = neigh.kneighbors(1 - S, return_distance=False)
        
        C = np.zeros(Y.shape, dtype=float)
        for i in range(Y.shape[0]):
            ii = knns[i]
            s = S[i,ii]
            z = np.sum(s)
            if z == 0:
                z=1
            C[i] = 1-s@Y[ii,:]/z
        C *= Y #
        #milb = np.sum(C)/np.sum(Y)
        
        return C
    #---------------------------------------------------------------------------------------- 
#---------------------------------------------------------------------------------------- 

In [63]:
p1=np.array([[1,0.3,0.6,0.4,0.3],
           [0.3,1,0.3,0.1,0.2],
           [0.6,0.3,1,0.4,0.3],
           [0.4,0.1,0.4,1,0],
           [0.3,0.2,0.3,0,1]])
p2=np.array([[1,0.1,0,0.8,0],
           [0.1,1,0.4,0.4,0.1],
           [0,0.4,1,0,0.6],
           [0.5,0.4,0,1,0.2],
           [0,0.1,0.6,0.2,1]])
p3=np.array([[1,0.1,0.8,0.4,0],
           [0.1,1,0.4,0,0],
           [0.8,0.4,1,0,0.8],
           [0.4,0,0,1,0.2],
           [0,0,0.8,0.2,1]])
P=np.array([[1,0,1,1],
          [0,1,0,0],
          [0,0,0,1],
          [1,0,1,0],
          [0,0,0,0]])

In [70]:
Ss=np.array([p1,p2,p3])
csl1 = Combine_Sims_LimbPerDT_2(k=2, rho=1/3)
csl1.combine(Ss,P)

(array([[1.        , 0.18811189, 0.26433566, 0.62377622, 0.13216783],
        [0.1972973 , 1.        , 0.35135135, 0.04864865, 0.0972973 ],
        [0.69090909, 0.34545455, 1.        , 0.21818182, 0.52727273],
        [0.44545455, 0.18181818, 0.        , 1.        , 0.2       ],
        [0.14594595, 0.0972973 , 0.55675676, 0.1027027 , 1.        ]]),
 array([[0.44055944, 0.48648649, 0.54545455, 0.        , 0.48648649],
        [0.55944056, 0.        , 0.        , 0.45454545, 0.        ],
        [0.        , 0.51351351, 0.45454545, 0.54545455, 0.51351351]]))

In [71]:
csl1.combine_test(Ss)

(array([[1.        , 0.1986014 , 0.2958042 , 0.6027972 , 0.1479021 ],
        [0.19783584, 1.        , 0.35108208, 0.04891792, 0.09783584],
        [0.6631068 , 0.3315534 , 1.        , 0.27378641, 0.45776699],
        [0.46502242, 0.26008969, 0.        , 1.        , 0.2       ],
        [0.15479115, 0.1031941 , 0.54201474, 0.0968059 , 1.        ]]),
 array([[0.49300699, 0.48917922, 0.68446602, 0.        , 0.51597052],
        [0.50699301, 0.        , 0.        , 0.65022422, 0.        ],
        [0.        , 0.51082078, 0.31553398, 0.34977578, 0.48402948]]))

In [72]:
csl1._check_test_sim(Ss)

In [73]:
csl1._compute_weights(Ss,P)

array([[0.44055944, 0.48648649, 0.54545455, 0.        , 0.48648649],
       [0.55944056, 0.        , 0.        , 0.45454545, 0.        ],
       [0.        , 0.51351351, 0.45454545, 0.54545455, 0.51351351]])

In [74]:
csl1._combine_sim(csl1._compute_weights(Ss,P),Ss)

array([[1.        , 0.18811189, 0.26433566, 0.62377622, 0.13216783],
       [0.1972973 , 1.        , 0.35135135, 0.04864865, 0.0972973 ],
       [0.69090909, 0.34545455, 1.        , 0.21818182, 0.52727273],
       [0.44545455, 0.18181818, 0.        , 1.        , 0.2       ],
       [0.14594595, 0.0972973 , 0.55675676, 0.1027027 , 1.        ]])

In [75]:
csl1._cal_limb(csl1._combine_sim(csl1._compute_weights(Ss,P),Ss),P,3)

(0.20649213610934108,
 array([[0.14      , 0.        , 0.14      , 0.33037037],
        [0.        , 0.35427574, 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.23770492],
        [0.12154696, 0.        , 0.12154696, 0.        ],
        [0.        , 0.        , 0.        , 0.        ]]))

In [76]:
S1 = np.loadtxt("./datasets_mv/luo/Dsim/luo_simmat_drugs_ddi.txt")
S2 = np.loadtxt("./datasets_mv/luo/Dsim/luo_simmat_drugs_disease.txt")
S3 = np.loadtxt("./datasets_mv/luo/Dsim/luo_simmat_drugs_se.txt")
S4 = np.loadtxt("./datasets_mv/luo/Dsim/luo_simmat_drugs_tanimoto.txt")
Y_ = np.loadtxt("./datasets_mv/luo/luo_admat_dgc.txt").T

In [77]:
csl = Combine_Sims_LimbPerDT_2(k=5, rho=0.2)

In [78]:
similarity_matrices = np.array([S1, S2, S3, S4])

In [79]:
similarity_matrices.shape

(4, 708, 708)

In [80]:
csl.combine(similarity_matrices,Y_)

(array([[1.        , 0.11590694, 0.11716122, ..., 0.02986061, 0.11112665,
         0.18541722],
        [0.11590694, 1.        , 0.10733489, ..., 0.03454343, 0.0773127 ,
         0.1308292 ],
        [0.11716122, 0.10733489, 1.        , ..., 0.11118222, 0.12837926,
         0.15725569],
        ...,
        [0.02986061, 0.03454343, 0.11118222, ..., 1.        , 0.07300343,
         0.04185747],
        [0.11112665, 0.0773127 , 0.12837926, ..., 0.07300343, 1.        ,
         0.14924485],
        [0.20945397, 0.13086205, 0.15658613, ..., 0.03021092, 0.19575674,
         1.        ]]),
 array([[0.2752286 , 0.2752286 , 0.2752286 , ..., 0.2752286 , 0.2752286 ,
         0.4049607 ],
        [0.16697413, 0.16697413, 0.16697413, ..., 0.16697413, 0.16697413,
         0.28507603],
        [0.24368714, 0.24368714, 0.24368714, ..., 0.24368714, 0.24368714,
         0.        ],
        [0.31411014, 0.31411014, 0.31411014, ..., 0.31411014, 0.31411014,
         0.30996327]]))

In [81]:
csl.combine_test(similarity_matrices)

(array([[1.        , 0.11521455, 0.11179417, ..., 0.03270862, 0.10081245,
         0.18120953],
        [0.12421427, 1.        , 0.11477922, ..., 0.03176329, 0.09382116,
         0.13902752],
        [0.11959833, 0.10645788, 1.        , ..., 0.11040969, 0.13106636,
         0.15993404],
        ...,
        [0.03118378, 0.03539334, 0.11475068, ..., 1.        , 0.07538963,
         0.04339301],
        [0.10338645, 0.06999449, 0.13598859, ..., 0.09836645, 1.        ,
         0.12454738],
        [0.1832447 , 0.12451972, 0.14693473, ..., 0.03717592, 0.15161681,
         1.        ]]),
 array([[0.25227705, 0.22837993, 0.27159769, ..., 0.17372668, 0.18907272,
         0.35452434],
        [0.11763984, 0.26048263, 0.1787501 , ..., 0.25413615, 0.06523654,
         0.18226376],
        [0.28692789, 0.23630604, 0.2339601 , ..., 0.24541932, 0.28627705,
         0.15383158],
        [0.34315522, 0.27483141, 0.31569211, ..., 0.32671785, 0.45941368,
         0.30938033]]))

In [83]:
csl._check_test_sim(similarity_matrices)

In [84]:
csl._compute_weights(similarity_matrices,Y_)

array([[0.2752286 , 0.2752286 , 0.2752286 , ..., 0.2752286 , 0.2752286 ,
        0.4049607 ],
       [0.16697413, 0.16697413, 0.16697413, ..., 0.16697413, 0.16697413,
        0.28507603],
       [0.24368714, 0.24368714, 0.24368714, ..., 0.24368714, 0.24368714,
        0.        ],
       [0.31411014, 0.31411014, 0.31411014, ..., 0.31411014, 0.31411014,
        0.30996327]])

In [92]:
S_FGS_d=csl._combine_sim(csl._compute_weights(similarity_matrices,Y_),similarity_matrices)

In [142]:

np.savetxt('S_FGS_d.txt', S_FGS_d, delimiter='\t')

In [127]:
[S1, S2, S3, S4]

[array([[1., 0., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.],
        [0., 0., 1., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 1., 0., 0.],
        [0., 0., 0., ..., 0., 1., 0.],
        [0., 0., 0., ..., 0., 0., 1.]]),
 array([[1.       , 0.19036  , 0.28817  , ..., 0.0028653, 0.32353  ,
         0.3505   ],
        [0.19036  , 1.       , 0.13008  , ..., 0.       , 0.21622  ,
         0.19634  ],
        [0.28817  , 0.13008  , 1.       , ..., 0.0011723, 0.26042  ,
         0.33408  ],
        ...,
        [0.0028653, 0.       , 0.0011723, ..., 1.       , 0.0041667,
         0.0029674],
        [0.32353  , 0.21622  , 0.26042  , ..., 0.0041667, 1.       ,
         0.50394  ],
        [0.3505   , 0.19634  , 0.33408  , ..., 0.0029674, 0.50394  ,
         1.       ]]),
 array([[1.      , 0.044444, 0.11905 , ..., 0.013158, 0.017699, 0.065217],
        [0.044444, 1.      , 0.25926 , ..., 0.066667, 0.0625  , 0.090909],
        [0.11905 , 0.25926 , 1.      , ..., 0.

In [129]:
S2_p = np.loadtxt("./datasets_mv/luo/Tsim/luo_simmat_proteins_disease.txt")
S1_p = np.loadtxt("./datasets_mv/luo/Tsim/luo_simmat_proteins_ppi.txt")
S3_p = np.loadtxt("./datasets_mv/luo/Tsim/luo_simmat_proteins_sw-n.txt")

Y_1 = np.loadtxt("./datasets_mv/luo/luo_admat_dgc.txt")

In [130]:
similarity_matrices_p = np.array([S1_p, S2_p,S3_p])

In [131]:
similarity_matrices_p.shape

(3, 1512, 1512)

In [132]:
csl_p = Combine_Sims_LimbPerDT_2(k=5, rho=0.7)

In [133]:
csl_p.combine(similarity_matrices_p,Y_1)

(array([[1.  , 0.95, 0.95, ..., 0.95, 0.94, 0.94],
        [0.95, 1.  , 0.95, ..., 0.95, 0.94, 0.94],
        [0.95, 0.95, 1.  , ..., 0.95, 0.94, 0.94],
        ...,
        [0.95, 0.95, 0.95, ..., 1.  , 0.94, 0.94],
        [0.94, 0.94, 0.94, ..., 0.94, 1.  , 1.  ],
        [0.94, 0.94, 0.94, ..., 0.94, 1.  , 1.  ]]),
 array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [1., 1., 1., ..., 1., 1., 1.]]))

In [134]:
csl_p.combine_test(similarity_matrices_p)

(array([[1.  , 0.95, 0.95, ..., 0.95, 0.94, 0.94],
        [0.95, 1.  , 0.95, ..., 0.95, 0.94, 0.94],
        [0.95, 0.95, 1.  , ..., 0.95, 0.94, 0.94],
        ...,
        [0.95, 0.95, 0.95, ..., 1.  , 0.94, 0.94],
        [0.94, 0.94, 0.94, ..., 0.94, 1.  , 1.  ],
        [0.94, 0.94, 0.94, ..., 0.94, 1.  , 1.  ]]),
 array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [1., 1., 1., ..., 1., 1., 1.]]))

In [135]:
csl_p._check_test_sim(similarity_matrices_p)

In [136]:
csl_p._compute_weights(similarity_matrices_p,Y_1)

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [1., 1., 1., ..., 1., 1., 1.]])

In [137]:
S_FGS_p=csl_p._combine_sim(csl_p._compute_weights(similarity_matrices_p,Y_1),similarity_matrices_p)

In [143]:
np.savetxt('S_FGS_p.txt', S_FGS_p, delimiter='\t')

In [139]:
[S1_p, S2_p, S3_p]

[array([[1., 0., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.],
        [0., 0., 1., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 1., 0., 0.],
        [0., 0., 0., ..., 0., 1., 0.],
        [0., 0., 0., ..., 0., 0., 1.]]),
 array([[1.     , 0.53944, 0.36111, ..., 0.57476, 0.48302, 0.47758],
        [0.53944, 1.     , 0.39005, ..., 0.58358, 0.60386, 0.48448],
        [0.36111, 0.39005, 1.     , ..., 0.50254, 0.35823, 0.4229 ],
        ...,
        [0.57476, 0.58358, 0.50254, ..., 1.     , 0.57068, 0.52829],
        [0.48302, 0.60386, 0.35823, ..., 0.57068, 1.     , 0.46291],
        [0.47758, 0.48448, 0.4229 , ..., 0.52829, 0.46291, 1.     ]]),
 array([[1.  , 0.95, 0.95, ..., 0.95, 0.94, 0.94],
        [0.95, 1.  , 0.95, ..., 0.95, 0.94, 0.94],
        [0.95, 0.95, 1.  , ..., 0.95, 0.94, 0.94],
        ...,
        [0.95, 0.95, 0.95, ..., 1.  , 0.94, 0.94],
        [0.94, 0.94, 0.94, ..., 0.94, 1.  , 1.  ],
        [0.94, 0.94, 0.94, ..., 0.94, 1.  , 1.  ]])]

In [21]:
combine = Combine_Sims_LimbPerDT_2(k=5, rho=0.2)
S1 = np.loadtxt("./datasets_mv/luo/Dsim/luo_simmat_drugs_ddi.txt")

In [23]:
S1.shape

(708, 708)

In [20]:
S_te1, W_te1 = combine.combine_test(S1)

ValueError: Expected 2D array, got 1D array instead:
array=[0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [17]:
combine = Combine_Sims_LimbPerDT_2(k=5, rho=0.2)
S1 = np.loadtxt("./datasets_mv/luo/Dsim/luo_simmat_drugs_ddi.txt")
S2 = np.loadtxt("./datasets_mv/luo/Dsim/luo_simmat_drugs_disease.txt")
S3 = np.loadtxt("./datasets_mv/luo/Dsim/luo_simmat_drugs_se.txt")
S4 = np.loadtxt("./datasets_mv/luo/Dsim/luo_simmat_drugs_tanimoto.txt")

# 将相似性矩阵存储在列表中
similarity_matrices = [S1, S2, S3, S4]
S_te, W_te = combine.combine_test(similarity_matrices)

# 创建类的实例
combine = Combine_Sims_LimbPerDT_2(k=5, rho=0.6)

# 假设S1、S2、S3、S4是你的四个相似性矩阵
S1 = ...
S2 = ...
S3 = ...
S4 = ...

# 调用combine_test方法，分别传入相似性矩阵
S_te1, W_te1 = combine.combine_test(S1)
S_te2, W_te2 = combine.combine_test(S2)
S_te3, W_te3 = combine.combine_test(S3)
S_te4, W_te4 = combine.combine_test(S4)

# S_te1, S_te2, S_te3, S_te4分别为组合后的相似性矩阵，可以根据需要进行后续处理


AttributeError: 'list' object has no attribute 'shape'

In [8]:
import numpy as np
from sklearn.neighbors import NearestNeighbors

class Combine_Sims_LimbPerDT_2:
    """ set some smallest weights in each row (of each drug) to zeros"""
    def __init__(self, k=5, rho=0.6):
        self.k = k
        self.rho = rho # the percentage of weights of similarities removed
        self.copyable_attrs = ['k', 'rho']

    def combine_test(self, Ss_te):
        # combine the test similarities, Ss is the test similarities
        self._check_test_sim(Ss_te)
        self._m = Ss_te.shape[1] # the number of test drugs/targets
        neigh = NearestNeighbors(n_neighbors=self.k, metric='precomputed')
        neigh.fit(np.zeros((self._n, self._n)))
        W_te = np.zeros((self._num_sims, self._m), dtype=float)

        for i in range(self._num_sims):
            knn = neigh.kneighbors(1 - Ss_te[i], return_distance=False) # knn.shape = (m,k)
            U = self.W[i,:][knn] # U.shape = (m,k)
            W_te[i,:] = np.mean(U, axis=1)

        # set smaller rn_sims weights in each column of W to zero
        if self.rn_sims > 0:
            idx_par = np.argpartition(W_te, kth=self.rn_sims, axis=0)
            W_te[idx_par[:self.rn_sims,:], np.arange(W_te.shape[1])[None,:]] = 0

        sum_W_rows = np.sum(W_te, axis=0) # recompute the sum of each columns
        sum_W_rows[sum_W_rows == 0] = 1 # ensure no zero values in sum_W_rows, as it will be used as denominator
        W_te = W_te / sum_W_rows[None,:] # the sum of each columns in W is 1

        S_te = self._combine_sim(W_te, Ss_te)

        S_te[S_te > 1] = 1
        return S_te, W_te

    def _combine_sim(self, W, S):
        weighted_S = np.average(S, axis=0, weights=W)
        return weighted_S

# 创建四个示例相似性矩阵S1, S2, S3和S4
S1 = np.array([[0.0, 0.1, 0.2],
               [0.1, 0.0, 0.3],
               [0.2, 0.3, 0.0]])

S2 = np.array([[0.0, 0.4, 0.5],
               [0.4, 0.0, 0.6],
               [0.5, 0.6, 0.0]])

S3 = np.array([[0.0, 0.7, 0.8],
               [0.7, 0.0, 0.9],
               [0.8, 0.9, 0.0]])

S4 = np.array([[0.0, 0.10, 0.11],
               [0.10, 0.0, 0.12],
               [0.11, 0.12, 0.0]])

Ss_te = [S1, S2, S3, S4]

# 创建Combine_Sims_LimbPerDT_2类的实例，并调用combine_test方法来组合相似性矩阵
combiner = Combine_Sims_LimbPerDT_2()
combined_S_te, combined_W_te = combiner.combine_test(Ss_te)

print("Combined Similarity Matrix:")
print(combined_S_te)

print("\nWeights Matrix:")
print(combined_W_te)


AttributeError: 'Combine_Sims_LimbPerDT_2' object has no attribute '_check_test_sim'

In [18]:
import numpy as np
from sklearn.neighbors import NearestNeighbors

class Combine_Sims_LimbPerDT_2:
    """ set some smallest weights in each row (of each drug) to zeros"""
    def __init__(self, k=2, rho=0.3):
        self.k = k
        self.rho = rho # the percentage of weights of similarities removed
        self.copyable_attrs = ['k', 'rho']

    def combine_test(self, Ss_te):
        self._check_test_sim(Ss_te)  # Add this line to check test similarity matrices
        self._m = Ss_te[0].shape[0] # the number of test drugs/targets
        neigh = NearestNeighbors(n_neighbors=self.k, metric='precomputed')
        neigh.fit(np.zeros((self._m, self._m)))
        W_te = np.zeros((len(Ss_te), self._m), dtype=float)

        for i, S_te in enumerate(Ss_te):
            knn = neigh.kneighbors(1 - S_te, return_distance=False) # knn.shape = (m,k)
            U = self.W[i,:][knn] # U.shape = (m,k)
            W_te[i,:] = np.mean(U, axis=1)

        # set smaller rn_sims weights in each column of W to zero
        if self.rn_sims > 0:
            idx_par = np.argpartition(W_te, kth=self.rn_sims, axis=0)
            W_te[idx_par[:self.rn_sims,:], np.arange(W_te.shape[1])[None,:]] = 0

        sum_W_rows = np.sum(W_te, axis=0) # recompute the sum of each columns
        sum_W_rows[sum_W_rows == 0] = 1 # ensure no zero values in sum_W_rows, as it will be used as denominator
        W_te = W_te / sum_W_rows[None,:] # the sum of each columns in W is 1

        S_te = self._combine_sim(W_te, Ss_te)

        S_te[S_te > 1] = 1
        return S_te, W_te

    def _combine_sim(self, W, S):
        weighted_S = np.average(S, axis=0, weights=W)
        return weighted_S

    def _check_test_sim(self, Ss_te):
        num_sims = len(Ss_te)
        m = Ss_te[0].shape[0]
        for i in range(1, num_sims):
            if Ss_te[i].shape[0] != m:
                raise ValueError("All test similarity matrices must have the same number of drugs/targets.")


In [19]:
# 创建四个示例相似性矩阵 S1, S2, S3 和 S4
S1 = np.array([[0.0, 0.1, 0.2],
               [0.1, 0.0, 0.3],
               [0.2, 0.3, 0.0]])

S2 = np.array([[0.0, 0.4, 0.5],
               [0.4, 0.0, 0.6],
               [0.5, 0.6, 0.0]])

S3 = np.array([[0.0, 0.7, 0.8],
               [0.7, 0.0, 0.9],
               [0.8, 0.9, 0.0]])

S4 = np.array([[0.0, 0.10, 0.11],
               [0.10, 0.0, 0.12],
               [0.11, 0.12, 0.0]])

Ss_te = [S1, S2, S3, S4]

# 创建 Combine_Sims_LimbPerDT_2 类的实例，并调用 combine_test 方法来组合相似性矩阵
combiner = Combine_Sims_LimbPerDT_2()
combined_S_te, combined_W_te = combiner.combine_test(Ss_te)

print("Combined Similarity Matrix:")
print(combined_S_te)

print("\nWeights Matrix:")
print(combined_W_te)


AttributeError: 'Combine_Sims_LimbPerDT_2' object has no attribute 'W'

In [8]:

class Combine_Sims_LimbPerDT_1(Combine_Sims_Base):
    def __init__(self, k=5):
        self.k = k
        self.copyable_attrs=['k']
    #---------------------------------------------------------------------------------------- 
    
    def combine(self, Ss, Y):
        self._num_sims = Ss.shape[0]
        self._n = Ss.shape[1] # the number of drugs/targets
        self.W = self._compute_weights(Ss, Y) # W: num_sims,n
        S = self._combine_sim(self.W, Ss) 
        
    
        S[S>=1] = 1.0
        
        return S, self.W
    #---------------------------------------------------------------------------------------- 
    
    def combine_test(self, Ss_te):
        # combine the test similarities, Ss is the test similairties
        self._check_test_sim(Ss_te)
        self._m = Ss_te.shape[1] # the number of test drugs/targets
        neigh = NearestNeighbors(n_neighbors=self.k, metric='precomputed')
        neigh.fit(np.zeros((self._n,self._n)))
        W_te = np.zeros((self._num_sims,self._m), dtype=float) 
        
        for i in range(self._num_sims):
            knn = neigh.kneighbors(1 - Ss_te[i], return_distance=False) # knn.shape = (m,k)
            U = self.W[i,:][knn] # U.shape = (m,k)
            W_te[i,:] = np.mean(U, axis=1)
        
        sum_W_rows = np.sum(W_te, axis=0)
        if np.any(sum_W_rows==0):
            print(W_te)
        W_te = W_te/sum_W_rows[None,:] # the sum of each columns in W is 1
        S_te = self._combine_sim(W_te, Ss_te)
        
        # S_te_sum = np.sum(S_te, axis=1)
        # if np.any(S_te_sum==0):
        #     print(S_te_sum)
        S_te[S_te>1] = 1
        return S_te, W_te
    #----------------------------------------------------------------------------------------    
    
    def _combine_sim(self, W, Ss):
        # W.shape= num_sims,n, Ss.shape=num_sims,n,n
        W1 = W[:,:,None] # W1.shape = num_sims,n,1
        S = Ss*W1
        S = np.sum(S,axis=0)
        return S
    #---------------------------------------------------------------------------------------- 
    
    def _check_test_sim(self, Ss):
        if Ss.shape[0] != self._num_sims:
            raise RuntimeError("The number of similairties in Ss ({}) is not same with self._num_sims ({})!!".format(Ss.shape[0], self._num_sims))
        if self._n != Ss.shape[2]:
            raise RuntimeError("The self._n:{} is not comparable with Ss's column {}!!".format(self._n, Ss.shape[2]))
    #----------------------------------------------------------------------------------------    
    
    def _compute_weights(self, Ss, Y):
        W = np.zeros((self._num_sims,self._n), dtype=float) 
        wg = np.zeros(self._num_sims, dtype=float)  # the global local imbalance based weight
        for i in range(self._num_sims):
            S1 = Ss[i] - np.diag(np.diag(Ss[i])) # set diagnol elements to zeros
            milb, C = self._cal_limb(S1, Y, self.k)
            wg[i] = 1- milb
            
            idx1 = np.where(Y==1)
            C[idx1] = 1-C[idx1]
            W[i,:] = np.sum(C, axis=1) #+1.0/(self.k*self._num_sims)
            # W[i,h] is the easiness of d_h in i-th Sim, 1.0/self.k is an smoothing parameter ensuring none zero in W
         
        sum_wg = np.sum(wg)
            
        sum_W_rows = np.sum(W, axis=0)  
        idx0 = np.where(sum_W_rows==0)[0] # indices of durgs whose sum of weight is zero
        sum_W_rows[idx0] = sum_wg
        W[:,idx0] = wg[:,None]
        W = W/sum_W_rows[None,:] # the sum of each columns in W is 1
        return W
    #---------------------------------------------------------------------------------------- 
    
    def _cal_limb(self, S, Y, k):
        """ S is similarity matrix whose dignoal elememets are zeros"""
        
        neigh = NearestNeighbors(n_neighbors=k, metric='precomputed')
        neigh.fit(np.zeros(S.shape))
        knns = neigh.kneighbors(1 - S, return_distance=False)
        
        C = np.zeros(Y.shape, dtype=float)
        for i in range(Y.shape[0]):
            ii = knns[i]
            for j in range(Y.shape[1]):
                if Y[i,j] == 1: # only consider "1" 
                    C[i,j] = k-np.sum(Y[ii,j])
        C = C/k
        milb = np.sum(C)/np.sum(Y)
        
        return milb, C
    #---------------------------------------------------------------------------------------- 
#---------------------------------------------------------------------------------------- 

class Combine_Sims_LimbPerDT_2(Combine_Sims_LimbPerDT_1):
    """ set some samllest weights in each row (of each drug) to zeros"""
    def __init__(self, k=5, rho = 0.6):
        self.k = k
        self.rho = rho # the percetage of weghts of similarities removed
        self.copyable_attrs=['k', 'rho']
    #---------------------------------------------------------------------------------------- 
    
    def combine_test(self, Ss_te):
        # combine the test similarities, Ss is the test similairties
        self._check_test_sim(Ss_te)
        self._m = Ss_te.shape[1] # the number of test drugs/targets
        neigh = NearestNeighbors(n_neighbors=self.k, metric='precomputed')
        neigh.fit(np.zeros((self._n,self._n)))
        W_te = np.zeros((self._num_sims,self._m), dtype=float) 
        
        for i in range(self._num_sims):
            knn = neigh.kneighbors(1 - Ss_te[i], return_distance=False) # knn.shape = (m,k)
            U = self.W[i,:][knn] # U.shape = (m,k)
            W_te[i,:] = np.mean(U, axis=1)
        
        # sum_W_rows = np.sum(W_te, axis=0)
        
        """ !!! No test weights are all zeros"""
        # idx0 = np.where(sum_W_rows==0)[0] # indices of durgs whose sum of weight is zero
        # sum_W_rows[idx0] = self.sum_wg
        # W_te[:,idx0] = self.wg[:,None]       
        # if len(idx0) > 0:
        #     print("weight", idx0)
        
        # set smaller rn_sims weights in each column of W to zero
        if self.rn_sims>0:
            idx_par = np.argpartition(W_te, kth=self.rn_sims, axis=0)
            W_te[idx_par[:self.rn_sims,:],np.arange(W_te.shape[1])[None,:]] = 0   
            
        sum_W_rows = np.sum(W_te, axis=0) # recompute the sum of each columns
        sum_W_rows[sum_W_rows==0] = 1 # ensure no zero vlaues in sum_W_rows, as it will be used as denominator
        W_te = W_te/sum_W_rows[None,:] # the sum of each columns in W is 1

        
        S_te = self._combine_sim(W_te, Ss_te)  
        
        # check if any row of S_te are all 0s
        # sum_S_rows = np.sum(S_te, axis=1)
        # idx0 = np.where(sum_S_rows==0)[0]
        # if len(idx0) > 0:
        #     print("sim", idx0)
        #     S_te[idx0] = np.average(Ss_te[:,idx0,:],axis=0,weights=self.wg)
        S_te[S_te>1] = 1
        return S_te, W_te
    #----------------------------------------------------------------------------------------    
    
    
    # def _compute_weights(self, Ss, Y):
    #     self.rn_sims = int(self.rho*self._num_sims)
        
    #     W = np.zeros((self._num_sims,self._n), dtype=float) 
    #     self.wg = np.zeros(self._num_sims, dtype=float)  # the global local imbalance based weight
    #     for i in range(self._num_sims):
    #         S1 = Ss[i] - np.diag(np.diag(Ss[i])) # set diagnol elements to zeros
    #         milb, C = self._cal_limb(S1, Y, self.k)
    #         self.wg[i] = 1- milb
            
    #         idx1 = np.where(Y==1)
    #         C[idx1] = 1-C[idx1]
    #         W[i,:] = np.sum(C, axis=1) #+1.0/(self.k*self._num_sims)
    #         # W[i,h] is the easiness of d_h in i-th Sim, 1.0/self.k is an smoothing parameter ensuring none zero in W
        
    #     # set smaller rn_sims weights in each column of W to zero
    #     if self.rn_sims>0:
    #         idx_par = np.argpartition(self.wg, kth=self.rn_sims)
    #         self.wg[idx_par[:self.rn_sims]] = 0
    #     self.sum_wg = np.sum(self.wg)
            
    #     sum_W_rows = np.sum(W, axis=0)  
    #     idx0 = np.where(sum_W_rows==0)[0] # indices of durgs whose sum of weight is zero
    #     sum_W_rows[idx0] = self.sum_wg
    #     W[:,idx0] = self.wg[:,None]
        
    #     # set smaller rn_sims weights in each column of W to zero
    #     if self.rn_sims>0:
    #         idx_par = np.argpartition(W, kth=self.rn_sims, axis=0)
    #         W[idx_par[:self.rn_sims,:],np.arange(W.shape[1])[None,:]] = 0   
    #         sum_W_rows = np.sum(W, axis=0) # recompute the sum of each columns
            
    #     W = W/sum_W_rows[None,:] # the sum of each columns in W is 1
    #     return W
    # #---------------------------------------------------------------------------------------- 
    
    
    def _compute_weights(self, Ss, Y):
        self.rn_sims = int(self.rho*self._num_sims)
        
        W = np.zeros((self._num_sims,self._n), dtype=float) 
        self.wg = np.zeros(self._num_sims, dtype=float)  # the global local imbalance based weight
        for i in range(self._num_sims):
            S1 = Ss[i] - np.diag(np.diag(Ss[i])) # set diagnol elements to zeros
            _, C = self._cal_limb(S1, Y, self.k)
            # self.wg[i] = 1- milb
            
            idx1 = np.where(Y==1)
            C[idx1] = 1-C[idx1]
            W[i,:] = np.sum(C, axis=1) #+1.0/(self.k*self._num_sims)
            # W[i,h] is the easiness of d_h in i-th Sim, 1.0/self.k is an smoothing parameter ensuring none zero in W
        
        # set smaller rn_sims weights in each column of W to zero
        # if self.rn_sims>0:
        #     idx_par = np.argpartition(self.wg, kth=self.rn_sims)
        #     self.wg[idx_par[:self.rn_sims]] = 0
        # self.sum_wg = np.sum(self.wg)
            
        wg = np.sum(W, axis=1)
        sum_W_rows = np.sum(W, axis=0)  
        idx0 = np.where(sum_W_rows==0)[0] # indices of durgs whose sum of weight is zero
        # sum_W_rows[idx0] = self.sum_wg
        W[:,idx0] = wg[:,None]
        
        # set smaller rn_sims weights in each column of W to zero
        if self.rn_sims>0:
            idx_par = np.argpartition(W, kth=self.rn_sims, axis=0)
            W[idx_par[:self.rn_sims,:],np.arange(W.shape[1])[None,:]] = 0   
            
        sum_W_rows = np.sum(W, axis=0) # recompute the sum of each columns   
        sum_W_rows[sum_W_rows==0] = 1
        W = W/sum_W_rows[None,:] # the sum of each columns in W is 1
        return W
    #---------------------------------------------------------------------------------------- 
    
    def _cal_limb(self, S, Y, k):
        """ S is similarity matrix whose dignoal elememets are zeros"""
        
        neigh = NearestNeighbors(n_neighbors=k, metric='precomputed')
        neigh.fit(np.zeros(S.shape))
        knns = neigh.kneighbors(1 - S, return_distance=False)
        
        C = np.zeros(Y.shape, dtype=float)
        for i in range(Y.shape[0]):
            ii = knns[i]
            s = S[i,ii]
            z = np.sum(s)
            if z == 0:
                z=1sq
            C[i] = 1-s@Y[ii,:]/z
        C *= Y #
        milb = np.sum(C)/np.sum(Y)
        
        return milb, C

In [9]:
S1 = np.loadtxt("./datasets_mv/luo/Dsim/luo_simmat_drugs_ddi.txt")
S2 = np.loadtxt("./datasets_mv/luo/Dsim/luo_simmat_drugs_disease.txt")
S3 = np.loadtxt("./datasets_mv/luo/Dsim/luo_simmat_drugs_se.txt")
S4 = np.loadtxt("./datasets_mv/luo/Dsim/luo_simmat_drugs_tanimoto.txt")
Y_ = np.loadtxt("./datasets_mv/luo/luo_admat_dgc.txt")


In [12]:
Ss_te = np.array([S1, S2, S3, S4])

NameError: name '_compute_weights' is not defined

In [15]:
import numpy as np
from sklearn.neighbors import NearestNeighbors

class Combine_Sims_LimbPerDT_2(Combine_Sims_LimbPerDT_1):
    def __init__(self, k=5, rho=0.6):
        self.k = k
        self.rho = rho
        self.copyable_attrs=['k', 'rho']

        self._num_sims = 0  # Add this line to set the initial value of _num_sims

    # ... 省略其他代码 ...

    def combine_test(self, Ss_te):
        self._check_test_sim(Ss_te)
        self._m = Ss_te.shape[1]
        self._num_sims = Ss_te.shape[0]  # Add this line to set the _num_sims attribute
        neigh = NearestNeighbors(n_neighbors=self.k, metric='precomputed')
        neigh.fit(np.zeros((self._n,self._n)))
        W_te = np.zeros((self._num_sims,self._m), dtype=float) 

        for i in range(self._num_sims):
            knn = neigh.kneighbors(1 - Ss_te[i], return_distance=False)
            U = self.W[i,:][knn]
            W_te[i,:] = np.mean(U, axis=1)

        if self.rn_sims>0:
            idx_par = np.argpartition(W_te, kth=self.rn_sims, axis=0)
            W_te[idx_par[:self.rn_sims,:],np.arange(W_te.shape[1])[None,:]] = 0   

        sum_W_rows = np.sum(W_te, axis=0)
        sum_W_rows[sum_W_rows==0] = 1
        W_te = W_te/sum_W_rows[None,:]

        S_te = self._combine_sim(W_te, Ss_te)  

        S_te[S_te>1] = 1
        return S_te, W_te




# 数据导入
Ss_te = np.array([S1, S2, S3, S4])
Y = Y_

# 调用Combine_Sims_LimbPerDT_2函数
combine_sims = Combine_Sims_LimbPerDT_2(k=5, rho=0.6)
result, weights = combine_sims.combine_test(Ss_te)


RuntimeError: The number of similairties in Ss (4) is not same with self._num_sims (0)!!

In [5]:
# %load  hsic.py
import numpy as np
from scipy.optimize import minimize,Bounds
from combine_sims import Combine_Sims_Ave


class HSIC(Combine_Sims_Ave):
    """ 
    Impelementation of Hilbert–Schmidt independence criterion-based multiple similarities fusion based on
    matlab codes of [1] which is availble at https://figshare.com/s/f664b36119c60e7f6f30
    [1] Ding, Yijie, Jijun Tang, and Fei Guo. "Identification of drug–target interactions via 
    dual laplacian regularized least squares with multiple kernel fusion." Knowledge-Based Systems 204 (2020): 106254.
    """
    def __init__(self, v1=2**-1, v2=2**-4, seed=0):
        super().__init__()
        self.v1 = v1
        self.v2 = v2
        self.seed = seed
        self.copyable_attrs=self.copyable_attrs+['v1','v2','seed']
        """
        v1 = [2**0, 2**-1, ..., 2**-5]
        v2 = [2**0, 2**-1, ..., 2**-5]
        """
    #----------------------------------------------------------------------------------------     
        
    def _compute_weights(self, Ss, Y):
        n = Ss.shape[1] # the number of rows in Ss[0]
        Ss1 = np.zeros(Ss.shape)
        for i in range(self._num_sims):
            Ss1[i] = self._process_sim(Ss[i])
        S_ideal = Y@Y.T # U in paper
        S_ideal = self._normalize_sim(S_ideal)
        
        H = np.eye(n)-np.ones(Ss1[0].shape, dtype=float)/n
        M = np.zeros((self._num_sims,self._num_sims), dtype=float) # the similarity between input similarity matrices
        for i in range(self._num_sims):
            for j in range(i,self._num_sims):
                mm = self._alignment(Ss1[i],Ss1[j])
                m1 = self._alignment(Ss1[i],Ss1[i])
                m2 = self._alignment(Ss1[j],Ss1[j])
                ss = mm/(np.sqrt(m1)*np.sqrt(m2))
                M[i,j] = M[j,i] = ss
        d1 = np.sum(M, axis=1)
        D1 = np.diag(d1)
        LapM = D1-M
        
        a = np.zeros(self._num_sims)
        for i in range(self._num_sims):
            kk = H@Ss1[i]@H
            aa = np.trace(kk.T@S_ideal)
            a[i] = (n**-2)*aa # n-1 in matlab code
        
        prng = np.random.RandomState(self.seed)
        w = prng.rand(self._num_sims)
        w = w/np.sum(w)
        bnds = Bounds(np.zeros(self._num_sims),np.ones(self._num_sims)) # eq.14c
        cons = ({'type': 'eq', "fun": self._constraint_eq }) # eq.14d
        res = minimize(self._f_obj, w, args=(a,LapM), method='SLSQP', bounds=bnds, constraints=cons)
        w = res.x
        return w
    #----------------------------------------------------------------------------------------  
    def _process_sim(self, S):
        # make similarity matrix symmetric
        S1 = (S+S.T)/2 
        # make similarity matrix PSD
        eig_values = np.linalg.eigvals(S)
        eig_values = np.real_if_close(eig_values) # keep the real part of eig_values
        ev_min = np.min(eig_values)
        e = max(0.0, -1.0*ev_min+1e-4)
        e1 = e.real
        S1 = S1 + e1*np.eye(S.shape[0])
        
        S1 = self._normalize_sim(S1)
        return S1
    
    def _normalize_sim(self, S):
        min_nz = np.min(S[np.nonzero(S)]) # the mininal none zero value
        S[S==0] = min_nz
        D = np.diag(S)
        D = np.sqrt(D)
        S1 = S/(np.outer(D,D)) 
        return S1
    #----------------------------------------------------------------------------------------  
    
    def _alignment(self, S1, S2):
        # same with np.trace(S1.T@S2)
        A = S1*S2
        a = A.sum()
        return a     
    #----------------------------------------------------------------------------------------  
    
    def _f_obj(self, w, a, LapM):
        #  eq.14a.
        J = -1*w@a + self.v1*w.T@LapM@w +self.v2*np.linalg.norm(w,2)**2 # last term is equalient to w@w
        return J
    #----------------------------------------------------------------------------------------
    
    def _constraint_eq(self, w):
        """
        return value must come back as 0 to be accepted 
        if return value is anything other than 0 it's rejectedas not a valid answer.
        """
        s = np.sum(w)-1
        return s
    #----------------------------------------------------------------------------------------

In [6]:
S1 = np.loadtxt("./datasets_mv/luo/Dsim/luo_simmat_drugs_ddi.txt")
S2 = np.loadtxt("./datasets_mv/luo/Dsim/luo_simmat_drugs_disease.txt")
S3 = np.loadtxt("./datasets_mv/luo/Dsim/luo_simmat_drugs_se.txt")
S4 = np.loadtxt("./datasets_mv/luo/Dsim/luo_simmat_drugs_tanimoto.txt")
Y_ = np.loadtxt("./datasets_mv/luo/luo_admat_dgc.txt")
Ss_te = np.array([S1, S2, S3, S4])
Y = Y_
_compute_weights(self, Ss, Y)

NameError: name '_compute_weights' is not defined