In [1]:
# python version：3.9.13 (main, Aug 25 2022, 23:51:50) [MSC v.1916 64 bit (AMD64)]
import numpy as np
import pandas as pd
import scipy as sp
from scipy import linalg
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import KFold
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
import warnings
import time
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef, confusion_matrix
warnings.filterwarnings("ignore")

In [None]:
def fit(view1,view2,alpha,sita,num):

    MMS = MinMaxScaler()
    X1 = MMS.fit_transform(np.matrix(view1.iloc[:,:-1]))
    X2 = MMS.fit_transform(np.matrix(view2.iloc[:,:-1]))
    y1,y2  = view1.iloc[:,-1],view2.iloc[:,-1]
    
    MMSdata1=pd.DataFrame(np.column_stack((X1,y1)))
    MMSdata2=pd.DataFrame(np.column_stack((X2,y2)))
    
    # Distinguish between positive and negative labeled samples
    PM1=np.matrix(MMSdata1[MMSdata1.iloc[:,-1]==1])[:,:-1]  
    NM1=np.matrix(MMSdata1[MMSdata1.iloc[:,-1]==-1])[:,:-1]  
    
    PM2=np.matrix(MMSdata2[MMSdata2.iloc[:,-1]==1])[:,:-1]   
    NM2=np.matrix(MMSdata2[MMSdata2.iloc[:,-1]==-1])[:,:-1] 
    
    n,n1,n2= X1.shape[0] ,PM1.shape[0] ,NM1.shape[0]  
    
    # Positive_S matrix and negative_S matrix
    MA1,MA2 = PM1-(np.matrix(np.ones(n1))).T@pu1,PM2-(np.matrix(np.ones(n1))).T@pu2
    MB1,MB2 = NM1-(np.matrix(np.ones(n2))).T@nu1,NM2-(np.matrix(np.ones(n2))).T@nu2
    
    PS1,PS2 = MA1.T@MA1,MA2.T@MA2
    NS1,NS2 = MB1.T@MB1,MB2.T@MB2

    PL1 = (X1-(np.matrix(np.ones(n))).T@pu1).T@(X1-(np.matrix(np.ones(n))).T@pu1) 
    PL2 = (X2-(np.matrix(np.ones(n))).T@pu2).T@(X2-(np.matrix(np.ones(n))).T@pu2)

    NL1 = (X1-(np.matrix(np.ones(n))).T@nu1).T@(X1-(np.matrix(np.ones(n))).T@nu1)
    NL2 = (X2-(np.matrix(np.ones(n))).T@nu2).T@(X2-(np.matrix(np.ones(n))).T@nu2)
        
    positive_matrix1 = (1+sita)*PS1
    negative_matrix1 = (1+sita)*NS1
    
    positive_matrix2 = -sita*MA1.T@ MA2
    negative_matrix2 = -sita*MB1.T@ MB2
    
    positive_goal_matrix1 =  np.concatenate((positive_matrix1,positive_matrix2),axis=1)
    negative_goal_matrix1 =  np.concatenate((negative_matrix1,negative_matrix2),axis=1)
    
    positive_matrix3 = -sita*MA2.T@MA1
    negative_matrix3 = -sita*MB2.T@MB1
    
    positive_matrix4 = (1+ sita)*PS2 
    negative_matrix4 = (1+ sita)*NS2  
    
    positive_goal_matrix2 =  np.concatenate((positive_matrix3,positive_matrix4),axis=1)
    negative_goal_matrix2 =  np.concatenate((negative_matrix3,negative_matrix4),axis=1)
    
    # concatenated matrix
    positive_K  =  np.concatenate((positive_goal_matrix1,positive_goal_matrix2),axis=0)
    positive_K  =  positive_K + alpha*np.eye(positive_K.shape[0])
    negative_K  =  np.concatenate((negative_goal_matrix1,negative_goal_matrix2),axis=0)
    negative_K  =  negative_K + alpha*np.eye(negative_K.shape[0])
    
    positive_L1matrix =  np.concatenate((PL1,np.zeros((PL1.shape[0],PL2.shape[1]))),axis=1)
    positive_L2matrix =  np.concatenate((np.zeros((PL2.shape[0],PL1.shape[1])),PL2),axis=1)
    positive_T= np.concatenate((positive_L1matrix,positive_L2matrix),axis = 0)
    
    negative_L1matrix =  np.concatenate((NL1,np.zeros((NL1.shape[0],NL2.shape[1]))),axis=1)
    negative_L2matrix =  np.concatenate((np.zeros((NL2.shape[0],NL1.shape[1])),NL2),axis=1)
    negative_T= np.concatenate((negative_L1matrix,negative_L2matrix),axis = 0)
    
    # Obtain the eigenvalues and eigenvectors [w1, w2] of two views
    (positive_eva, positive_evt) = sp.linalg.eig(positive_K,positive_T)   
    (negative_eva, negative_evt) = sp.linalg.eig(negative_K,negative_T)   
    
    # Sort from small to large
    df_positive=np.column_stack((positive_evt.T,positive_eva))
    df_positive=pd.DataFrame(df_positive)
    df_positive=df_positive.sort_values(by=df_positive.columns[-1],ascending=True)
    
    eva_positive = np.matrix(df_positive.iloc[:num,-1]) 
    evt_positive = np.matrix(df_positive.iloc[:num,:-1])
    

    df_negative=np.column_stack((negative_evt.T,negative_eva))
    df_negative=pd.DataFrame(df_negative)
    df_negative=df_negative.sort_values(by=df_negative.columns[-1],ascending=True)
    
    eva_negative = np.matrix(df_negative.iloc[:num,-1])  
    evt_negative = np.matrix(df_negative.iloc[:num,:-1]) 
    
    evt_positive1 = evt_positive[:,:X1.shape[1]]
    evt_positive2 = evt_positive[:,X1.shape[1]:]
    
    evt_negative1 = evt_negative[:,:X1.shape[1]]
    evt_negative2 = evt_negative[:,X1.shape[1]:]
    
    return  evt_positive1,evt_positive2,evt_negative1,evt_negative2,pu1,pu2,nu1,nu2
    
    # Calculate acc, f1, mcc, sensitivity, specificity
def score(view1,view2,evt_positive1,evt_positive2,evt_negative1,evt_negative2,pu1,pu2,nu1,nu2):
    MMS = MinMaxScaler()
    X1 = MMS.fit_transform(np.matrix(view1.iloc[:, :-1]))
    X2 = MMS.fit_transform(np.matrix(view2.iloc[:, :-1]))
    y = list(view1.iloc[:, -1]) 
    
    yp1 = np.linalg.norm(X1 @ evt_positive1.T - pu1 @ evt_positive1.T, axis=1, keepdims=False)
    yp2 = np.linalg.norm(X2 @ evt_positive2.T - pu2 @ evt_positive2.T, axis=1, keepdims=False)
    yn1 = np.linalg.norm(X1 @ evt_negative1.T - nu1 @ evt_negative1.T, axis=1, keepdims=False)
    yn2 = np.linalg.norm(X2 @ evt_negative2.T - nu2 @ evt_negative2.T, axis=1, keepdims=False)
    

    y_pred = [yn1[i] + yn2[i] - yp1[i] - yp2[i] for i in range(len(yp1))]
    y_label = [1 if i >= 0 else -1 for i in y_pred]  
    

    acc = accuracy_score(y, y_label)
    
    f1 = f1_score(y, y_label, average='weighted')
    
    mcc = matthews_corrcoef(y, y_label)
    
    tn, fp, fn, tp = confusion_matrix(y, y_label, labels=[-1, 1]).ravel()
    sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0  
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0  


    return acc, f1, mcc, sensitivity, specificity

In [3]:
if __name__ == '__main__':
    # read data
    filename_list = ['cleveland']
    n_components  = [8]
    
    for k in range(len(filename_list)):
        view1 = pd.read_csv(filename_list[k]+ '.csv', header=None)
        # PCA
        modelPCA = PCA(n_components=n_components[k])  
        PCAdata = modelPCA.fit_transform(view1.copy().iloc[:, :-1])  
        view2 = pd.concat([pd.DataFrame(PCAdata), view1.iloc[:, -1]], axis=1)
        
        # normalization
        MMS = MinMaxScaler()
        X1 = np.matrix(MMS.fit_transform(np.matrix(view1.iloc[:,:-1])))
        X2 = np.matrix(MMS.fit_transform(np.matrix(view2.iloc[:,:-1])))
        y1,y2  = view1.iloc[:,-1],view2.iloc[:,-1]
        
        data1=pd.DataFrame(np.column_stack((X1,y1)))
        data2=pd.DataFrame(np.column_stack((X2,y2)))
        
        # classification
        PM1=np.matrix(data1[data1.iloc[:,-1]==1])[:,:-1]  
        NM1=np.matrix(data1[data1.iloc[:,-1]==-1])[:,:-1]  
        
        PM2=np.matrix(data2[data2.iloc[:,-1]==1])[:,:-1]   
        NM2=np.matrix(data2[data2.iloc[:,-1]==-1])[:,:-1]  
        
        pu1,pu2 = np.sum(PM1,axis=0)/PM1.shape[0],np.sum(PM2,axis=0)/PM2.shape[0]  
        nu1,nu2 = np.sum(NM1,axis=0)/NM1.shape[0],np.sum(NM2,axis=0)/NM2.shape[0]  
        
        # cross validation
        kf1 = KFold(n_splits=5, shuffle=True, random_state=10)
        kf2 = KFold(n_splits=5, shuffle=True, random_state=110)

         # Create empty lists, you can use these lists to record the rseults.
        result = []
        acc_list = []
        f1_list = []
        mcc_list = []  
        sensitivity_list = []  
        specificity_list = []  

        # Determine the parameter range
        alpha_list = [2**i for i in range(-8, 8)]
        sita_list  = [2**i for i in range(-8, 8)]
        num_list = list(range(1, view1.shape[1] + view2.shape[1] - 1))
        
        with tqdm(total=len(num_list)*len(sita_list)*len(alpha_list)*25) as pbar:
            res = {}           
            for num in num_list:
                acc_list = []
                f1_list = []
                mcc_list = []
                sensitivity_list = []
                specificity_list = []
                for train_valid_index, test_index in kf1.split(view1):

                    view1_train_valid = view1.iloc[train_valid_index]
                    view1_test = view1.iloc[test_index]
                    
                    view2_train_valid = view2.iloc[train_valid_index]
                    view2_test = view2.iloc[test_index]
                    
                    result_dict = {}
                    
                    for sita in sita_list:
                        for alpha in alpha_list:
                            result_list = []
                            for train_index, valid_index in kf2.split(view1_train_valid):
                                view1_train  =view1.iloc[train_index]
                                view1_valid  =view1.iloc[valid_index]
                                
                                view2_train  =view2.iloc[train_index]
                                view2_valid  =view2.iloc[valid_index]
                                
                                start_time = time.time()
                                evt_positive1, evt_positive2, evt_negative1, evt_negative2, pu1, pu2, nu1, nu2 = fit(view1_train, view2_train, alpha, sita, num)
                                acc, f1, mcc, sensitivity, specificity = score(view1_valid, view2_valid, evt_positive1, evt_positive2, evt_negative1, evt_negative2, pu1, pu2, nu1, nu2)
                                end_time = time.time()

                                pbar.update(1)
                                result_list.append((acc, f1, mcc, sensitivity, specificity))
                                
                            acc_mean = np.mean([x[0] for x in result_list])
                                
                            result_dict[(alpha, sita)] = (acc_mean)
                    
                    # Select the optimal parameters
                    para = sorted(result_dict.items(), key=lambda x: x[1], reverse=True)
                    best_alpha = para[0][0][0]
                    best_sita = para[0][0][1]
                   
                    evt_positive1, evt_positive2, evt_negative1, evt_negative2, pu1, pu2, nu1, nu2 = fit(view1_train_valid, view2_train_valid, best_alpha, best_sita, num)
                    acc = score(view1_test, view2_test, evt_positive1, evt_positive2, evt_negative1, evt_negative2, pu1, pu2, nu1, nu2)
                    acc_list.append(acc)
                    
                acc_mean, acc_std = np.mean(acc_list), np.std(acc_list)
                
                res[num] = (acc_mean, acc_std, best_alpha, best_sita)
                                                                        


100%|█████████████████████████████████████████████████████████████████████████| 134400/134400 [10:06<00:00, 221.49it/s]


In [45]:
run_time = end_time - start_time    # record run_time
print(run_time)

0.009759664535522461
