In [1]:
import numpy as np 
import scipy as sp
import cupy as cp
import csv
import networkx as nx
from sklearn import preprocessing
from collections import OrderedDict
import time 

In [2]:
# Inputs to be given
dataset = 'BZR' # Choose the datasets among: 'PROTEINS', 'ENZYMES', 'BZR', 'COX2', 'DHFR', 'SYNTHETICnew'
iter_num = 3 # No: of WL iteration
ker = 1  # choose 0 for Gaussian kernel as base kernel and 1 for linear kernel
path_to_data_folder = "Datasets/"

In [3]:
if dataset == 'PROTEINS':
    A = np.loadtxt(path_to_data_folder + "PROTEINS/PROTEINS_A.txt", dtype = int, delimiter=",", unpack=False)
    indicator = np.loadtxt(path_to_data_folder + "PROTEINS/PROTEINS_graph_indicator.txt", dtype = int, delimiter=",", unpack=False)
    NL = np.loadtxt(path_to_data_folder + "PROTEINS/PROTEINS_node_labels.txt", dtype = int, delimiter=",", unpack=False)
    NA = np.loadtxt(path_to_data_folder + "PROTEINS/PROTEINS_node_attributes.txt", dtype = float, delimiter=",", unpack=False)
    EL = None
    EA = None
    
if dataset == 'ENZYMES':
    A = np.loadtxt(path_to_data_folder + "ENZYMES/ENZYMES_A.txt", dtype = int, delimiter=",", unpack=False)
    indicator = np.loadtxt(path_to_data_folder + "ENZYMES/ENZYMES_graph_indicator.txt", dtype = int, delimiter=",", unpack=False)
    NL = np.loadtxt(path_to_data_folder + "ENZYMES/ENZYMES_node_labels.txt", dtype = int, delimiter=",", unpack=False)
    NA = np.loadtxt(path_to_data_folder + "ENZYMES/ENZYMES_node_attributes.txt", dtype = float, delimiter=",", unpack=False)
    EL = None
    EA = None
    
if dataset == 'BZR':
    A = np.loadtxt(path_to_data_folder + "BZR/BZR_A.txt", dtype = int, delimiter=",", unpack=False)
    indicator = np.loadtxt(path_to_data_folder + "BZR/BZR_graph_indicator.txt", dtype = int, delimiter=",", unpack=False)
    NL = np.loadtxt(path_to_data_folder + "BZR/BZR_node_labels.txt", dtype = int, delimiter=",", unpack=False)
    NA = np.loadtxt(path_to_data_folder + "BZR/BZR_node_attributes.txt", dtype = float, delimiter=",", unpack=False)
    EL = None
    EA = None
    
if dataset == 'COX2':
    A = np.loadtxt(path_to_data_folder + "COX2/COX2_A.txt", dtype = int, delimiter=",", unpack=False)
    indicator = np.loadtxt(path_to_data_folder + "COX2/COX2_graph_indicator.txt", dtype = int, delimiter=",", unpack=False)
    NL = np.loadtxt(path_to_data_folder + "COX2/COX2_node_labels.txt", dtype = int, delimiter=",", unpack=False)
    NA = np.loadtxt(path_to_data_folder + "COX2/COX2_node_attributes.txt", dtype = float, delimiter=",", unpack=False)
    EL = None
    EA = None
    
if dataset == 'DHFR':
    A = np.loadtxt(path_to_data_folder + "DHFR/DHFR_A.txt", dtype = int, delimiter=",", unpack=False)
    indicator = np.loadtxt(path_to_data_folder + "DHFR/DHFR_graph_indicator.txt", dtype = int, delimiter=",", unpack=False)
    NL = np.loadtxt(path_to_data_folder + "DHFR/DHFR_node_labels.txt", dtype = int, delimiter=",", unpack=False)
    NA = np.loadtxt(path_to_data_folder + "DHFR/DHFR_node_attributes.txt", dtype = float, delimiter=",", unpack=False)
    EL = None
    EA = None
    
if dataset == 'SYNTHETICnew':
    A = np.loadtxt(path_to_data_folder + "SYNTHETICnew/SYNTHETICnew_A.txt", dtype = int, delimiter=",", unpack=False)
    indicator = np.loadtxt(path_to_data_folder + "SYNTHETICnew/SYNTHETICnew_graph_indicator.txt", dtype = int, delimiter=",", unpack=False)
    NL = None
    NA = np.loadtxt(path_to_data_folder + "SYNTHETICnew/SYNTHETICnew_node_attributes.txt", dtype = float, delimiter=",", unpack=False)
    EL = None
    EA = None

In [4]:
if NL is not None:
    if len(NL.shape) == 1:
        NL = np.reshape(NL,(NL.shape[0],1))
if EL is not None:
    if len(EL.shape) == 1:
        EL = np.reshape(EL,(EL.shape[0],1))
if len(NA.shape) == 1:
    NA = np.reshape(NA,(NA.shape[0],1))

In [5]:
def create_data(A, indicator, NA, EA=None, NL=None, EL=None):
    U = np.unique(indicator)
    count=[]
    for i in range(U.shape[0]):
        tmp = indicator[indicator==U[i]]
        count.append(tmp.shape[0])
    adj=[]
    adj_list = []
    node_label = []
    node_attr = []
    edge_list = []
    edge_label = []
    edge_attr= []
    penalty = 0
    p = 0
    for i in range(len(count)):  
        tmp = np.zeros((count[i], count[i]))
        tmp_E = np.zeros((count[i], count[i]))
        flag = 1
        breakpoint = np.sum(count[0:i+1]) 
        tmp_EA = OrderedDict()
        while flag==1:
            if A[p,0]<=breakpoint and A[p,1]<=breakpoint:
                A[p,0] = A[p,0] - 1
                A[p,1] = A[p,1] - 1
                tmp[A[p,0]-penalty,A[p,1]-penalty] = 1
                
                if EL is not None: 
                    tmp_E[A[p,0]-penalty,A[p,1]-penalty] = EL[p]
                else:
                    tmp_E[A[p,0]-penalty,A[p,1]-penalty] = 1
                    
                if EA is not None:  
                    tmp_EA[str(A[p,0]-penalty)+str(A[p,1]-penalty)] = EA[p,:]
                p=p+1 
                if p == A.shape[0]:
                    adj_list_tmp = []
                    for j in range(tmp.shape[0]):
                        adj_list_tmp.append(np.nonzero(tmp[j,:]))
                        
                    indx = np.nonzero(np.triu(tmp))   
                    tmp1 = np.array((indx[0],indx[1]))
                    edge_list.append(tmp1.T)
                    
                    if NL is not None:
                        node_label.append(NL[penalty:breakpoint,0])
                    else:
                        node_label.append(np.ones(([breakpoint-penalty,1])))
                    node_attr.append(NA[penalty:breakpoint,:])
                    adj_list.append(adj_list_tmp) 
                    edge_label.append(tmp_E[indx[0],indx[1]])
                    if EA is not None:
                        edge_attr.append(tmp_EA) 
                    del tmp, tmp_E
                    flag=0
                    
            else:
                
                adj_list_tmp = []
                for j in range(tmp.shape[0]):
                    adj_list_tmp.append(np.nonzero(tmp[j,:]))
                    
                
                indx = np.nonzero(np.triu(tmp)) 
                tmp1 = np.array((indx[0],indx[1]))
                edge_list.append(tmp1.T)
                
                if NL is not None:
                    node_label.append(NL[penalty:breakpoint,0])
                else:
                    
                    node_label.append(np.ones(([breakpoint-penalty,1])))
                node_attr.append(NA[penalty:breakpoint,:])
                adj_list.append(adj_list_tmp) 
                edge_label.append(tmp_E[indx[0],indx[1]])
                if EA is not None:
                        edge_attr.append(tmp_EA) 
                del tmp, tmp_E
                flag = 0 
        flag = 1
        penalty = breakpoint  
    return adj_list, node_label, node_attr, edge_list, edge_label, edge_attr

In [6]:
adj_list, NL_list, NA_list, E_list, EL_list, EA_list = create_data(A, indicator, NA,EA, NL, EL)

In [7]:
def WL_refinement(adj_list, NL_list):
    WL_list=[]
    od = OrderedDict()
    p = 0
    for i in range(0, len(adj_list)):
        WL_tmp=[]
        for j in range(0,len(adj_list[i])):
            tmp = np.sort(NL_list[i][adj_list[i][j]])
            tmp = ''.join(str(x) for x in tmp) 
            label = str(NL_list[i][j])+tmp
            if label in od:
                WL_tmp.append(od[label])
            else:
                od[label] = p
                WL_tmp.append(p)
                p+=1
        WL_list.append(np.asarray(WL_tmp))
    return WL_list,p

In [8]:
def NP_kernel_imp2(adj_list, NL_list, NA_list, E_list, EL_list, EA_list, h, ker): 
    K_npe = np.zeros((len(adj_list),len(adj_list))) 
    K_oea = np.zeros((len(adj_list),len(adj_list))) 
    K_o=[]
    beta1 = NA_list[0].shape[1]
    beta2= 1
    WL = []
    if len(EA_list)!=0:
        beta2 = EA.shape[1]
    WL_ref_add=[]
    NL_list_tmp=NL_list
    for i in range(h):
        WL_list, label_count = WL_refinement(adj_list, NL_list_tmp)
        NL_list_tmp = WL_list
        WL.append(WL_list)
    for j in range(len(E_list)):
        od = OrderedDict()
        for k in range(len(E_list[j])): 
            tmp = E_list[j][k,:]
            tmp = tmp.tolist()
            tmp2 = [WL[0][j][tmp[0]], WL[0][j][tmp[1]]] 
            args = np.argsort(tmp2)
            tmp2  = [tmp2[args[0]] ,tmp2[args[1]] ]
            if args[0]!=0:
                tmp =tmp[::-1]
                    
            label1  = tmp2 + [int(EL_list[j][k])]
            tmp2 = tmp2[::-1]
            label2 = tmp2 + [int(EL_list[j][k])]
            label1, label2 = ' '.join(str(x) for x in label1), ' '.join(str(x) for x in label2)   
            if label1 not in od and label2 not in od:
                od[label1]=[]
            if label1 in od:
                od[label1].append(tmp)
            if label2 in od and label2!=label1:
                tmp = tmp[::-1]
                od[label2].append(tmp)
        WL_ref_add.append(od)
     
    for i in range(len(E_list)):  
        NA1 = NA_list[i]
        if len(EA_list)!=0:
                odEA1 = EA_list[i]
        else:
                odEA1 = None
        for j in range(len(E_list)):  
            if i<=j:
                od1 = WL_ref_add[i]
                od2 = WL_ref_add[j] 
                NA2 = NA_list[j]
                if len(EA_list)!=0:
                    odEA2 = EA_list[j]
                else:
                    odEA2 = None
                const = []
                st1, st2 = set(od1), set(od2)
                common_keys = st1.intersection(st2) 
                tmp11, tmp12, tmp21, tmp22 = np.zeros((0)), np.zeros((0)), np.zeros((0)), np.zeros((0))
                tmpE1, tmpE2 = np.zeros((0,beta2)), np.zeros((0,beta2))  
                oea = 0 
                rem_list1, rem_list2 = [],[]
                if len(common_keys)!=0:
                    for k in common_keys:
                    
                        lst1,lst2 = od1[k], od2[k]   
                        oea += min(len(lst1), len(lst2))
                        if odEA1 != None:
                            EAlst1, EAlst2= [],[] 
                            for m in range(len(lst1)):
                                c = str(lst1[m][0])+str(lst1[m][1])
                                EAlst1.append(odEA1[c])
                            for m in range(len(lst2)):
                                c = str(lst2[m][0])+str(lst2[m][1])
                                EAlst2.append(odEA2[c])
                            EAlst1=np.asarray(EAlst1)
                            EAlst2=np.asarray(EAlst2) 
                            EAlst1, EAlst2 = np.repeat(EAlst1, len(EAlst2), axis=0), np.tile(EAlst2,(len(EAlst1),1))   
                            tmpE1 = np.concatenate((tmpE1,EAlst1), axis=0)
                            tmpE2 = np.concatenate((tmpE2,EAlst2), axis=0)
                        
                        lst1,lst2 = np.asarray(lst1, dtype = np.int32), np.asarray(lst2, dtype = np.int32)  
                        lst1, lst2 = np.repeat(lst1, len(lst2), axis=0), np.tile(lst2,(len(lst1),1))  
                        const.append([1/lst1.shape[0]]*lst1.shape[0])
                        tmp11 = np.concatenate((tmp11,lst1[:,0]), axis=0)
                        tmp12 = np.concatenate((tmp12,lst2[:,0]), axis=0)              
                        tmp21 = np.concatenate((tmp21,lst1[:,1]), axis=0)
                        tmp22 = np.concatenate((tmp22,lst2[:,1]), axis=0) 
                        rem_list1.extend(od1[k])
                        rem_list2.extend(od2[k]) 
                    tmp11new, tmp12new, tmp21new, tmp22new = tmp11, tmp12, tmp21, tmp22 
                    tmpE1new, tmpE2new = tmpE1, tmpE2  
                    for f in range(h-1):   
                            lrm11 = WL[f+1][i][tmp11new.astype(int)] 
                            lrm12 = WL[f+1][j][tmp12new.astype(int)] 
                            lrm21 = WL[f+1][i][tmp21new.astype(int)] 
                            lrm22 = WL[f+1][j][tmp22new.astype(int)]   
                            
                    
                            T1 = np.vstack((lrm11,lrm21, tmp11new, tmp21new)) 
                            T1= T1.T
                            T1tmp1 = T1[:,0:2] 
                            ind = np.argsort(T1tmp1, axis=1) 
                            T1tmp2 = T1[:,2:] 
                            or1bool = ind[:,0].astype(bool)
                            T1tmp1[or1bool, :] = T1tmp1[or1bool,:][:,(1,0)]
                            T1tmp2[or1bool, :] = T1tmp2[or1bool,:][:,(1,0)]
                            if odEA1 != None:
                                tmpE1new[or1bool, :] = tmpE1new[or1bool,:][:,(1,0)]
                            lrm11, lrm21 = T1tmp1[:,0], T1tmp1[:,1]
                            tmp11new, tmp21new = T1tmp2[:,0], T1tmp2[:,1]
                            
                            T1 = np.vstack((lrm12,lrm22, tmp12new, tmp22new)) 
                            T1 = T1.T
                            T1tmp1 = T1[:,0:2] 
                            ind = np.argsort(T1tmp1, axis=1) 
                            T1tmp2 = T1[:,2:] 
                            or1bool = ind[:,0].astype(bool)
                            T1tmp1[or1bool, :] = T1tmp1[or1bool,:][:,(1,0)]
                            T1tmp2[or1bool, :] = T1tmp2[or1bool,:][:,(1,0)]
                            if odEA1 != None:
                                tmpE2new[or1bool, :] = tmpE2new[or1bool,:][:,(1,0)]
                            lrm12, lrm22 = T1tmp1[:,0], T1tmp1[:,1]
                            tmp12new, tmp22new = T1tmp2[:,0], T1tmp2[:,1] 
                    
                            b1 = (lrm11 == lrm12)  
                            b3 = (lrm21 == lrm22) 
                            b11 = b1 & b3  
                            b = b11 
                            lrmtmp1 = np.reshape(lrm11[b],(lrm11[b].shape[0],1))
                            lrmtmp2 = np.reshape(lrm21[b],(lrm21[b].shape[0],1))
                            if lrmtmp1.shape[0]!=0 and lrmtmp2.shape[0]!=0:
                                count_tmp = np.concatenate((lrmtmp1,lrmtmp2), axis=1) 
                                U, ind, count = np.unique(count_tmp,  return_inverse=True, return_counts=True, axis=0)
                                counttmp = 1/count[ind] 
                                const.append(counttmp) 
                                
                                tmp11 = np.concatenate((tmp11,tmp11new[b]), axis=0) 
                                tmp12 = np.concatenate((tmp12,tmp12new[b]), axis=0)              
                                tmp21 = np.concatenate((tmp21,tmp21new[b]), axis=0)
                                tmp22 = np.concatenate((tmp22,tmp22new[b]), axis=0) 
                                tmp11new, tmp12new, tmp21new, tmp22new = tmp11new[b], tmp12new[b], tmp21new[b], tmp22new[b]
                                if odEA1 != None: 
                                    tmpE1 = np.concatenate((tmpE1,tmpE1new[b]), axis=0)
                                    tmpE2 = np.concatenate((tmpE2,tmpE2new[b]), axis=0)
                                    tmpE1new, tmpE2new = tmpE1new[b], tmpE2new[b]
                            else:
                                break

                if len(tmp11)!=0: 
                    const = np.hstack(const) 
                    N11, N12 = NA1[tmp11.astype(int)], NA2[tmp12.astype(int)]   
                    N21, N22 = NA1[tmp21.astype(int)], NA2[tmp22.astype(int)]  
                    N11, N12, N21, N22, const = cp.asarray(N11),cp.asarray(N12),cp.asarray(N21),cp.asarray(N22),cp.asarray(const)
                    if ker==0:
                        t1 =  cp.exp(-1/beta1*(np.linalg.norm((N11-N12),axis=1)**2)) 
                        t2 =  cp.exp(-1/beta1*(np.linalg.norm((N21-N22),axis=1)**2))   
                        t = cp.multiply(const,t1)
                        t = cp.multiply(t, t2)   
                        if len(EA_list)!=0:
                            t = cp.multiply(t, np.exp(-1/beta2*(np.linalg.norm((tmpE1-tmpE2),axis=1)**2)))
                        t = cp.sum(t) 
                         
                    else:
                        t1 =  cp.multiply(N11,N12) 
                        t1 = cp.sum(t1,axis = 1)
                        t2 =  cp.multiply(N21,N22) 
                        t2 = cp.sum(t2,axis = 1) 
                        t = cp.multiply(const, t1)
                        t = cp.multiply(t, t2)
                        if odEA1 != None:
                            t1 = cp.multiply(tmpE1, tmpE2)
                            t1= cp.sum(t1, axis = 1)
                            t = cp.multiply(t, t1)
                        t = cp.sum(t)   
                K_npe[i,j],K_npe[j,i] =  t, t
                K_oea[i,j],K_oea[j,i] =  oea, oea
    K_o = K_oea   
    
    for f in range(1,h): 
        K_oea = np.zeros((len(adj_list),len(adj_list))) 
        WL_list = WL[f]
        WL_ref_add=[]
        for j in range(len(E_list)): 
            od = OrderedDict()
            for k in range(len(E_list[j])): 
                tmp = E_list[j][k,:]
                tmp = tmp.tolist()
                tmp2 = [WL_list[j][tmp[0]], WL_list[j][tmp[1]]] 
                args = np.argsort(tmp2)
                tmp2  = [tmp2[args[0]] ,tmp2[args[1]] ]
                if args[0]!=0:
                    tmp =tmp[::-1] 
                 
                label1  = tmp2 + [int(EL_list[j][k])]
                tmp2 = tmp2[::-1]
                label2 = tmp2 + [int(EL_list[j][k])]
                label1, label2 = ' '.join(str(x) for x in label1), ' '.join(str(x) for x in label2)   
                if label1 not in od and label2 not in od:
                    od[label1]=[]
                if label1 in od:
                    od[label1].append(tmp)
                if label2 in od and label2!=label1:
                    tmp = tmp[::-1]
                    od[label2].append(tmp)
            
            WL_ref_add.append(od)   
        for i in range(len(E_list)): 
            od1 = WL_ref_add[i]   
            for j in range(len(E_list)): 
                if i<=j:
                    od2 =  WL_ref_add[j]   
                    st1, st2 = set(od1), set(od2)
                    common_keys = st1.intersection(st2)
                    oea = 0 
                    for k in common_keys:
                        lst1,lst2 = od1[k], od2[k]  
                        oea += min(len(lst1), len(lst2))
                    K_oea[i,j]=oea
                    K_oea[j,i]=oea
        K_o+=K_oea
    return K_npe, K_o

In [9]:
start = time.time()

KM_npe, KM_oea = NP_kernel_imp2( adj_list, NL_list, NA_list, E_list, EL_list,EA_list, iter_num, ker)
end = time.time()

In [10]:
print("Time taken (in seconds): {}".format(end - start))

Time taken (in seconds): 169.14699840545654
