#Package Section

In [1]:
import sys
import numpy as np
import copy
from numpy import linalg as LA
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
from sklearn.metrics.cluster import adjusted_rand_score
from sklearn.cluster import KMeans
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn import metrics
import time
# for sparse matrix
from scipy import sparse
#early stop
from keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint


#Classes and functions

In [2]:
# invalide devide resutls will be nan
np.seterr(divide='ignore', invalid='ignore')

############------------graph_encoder_embed_start----------------###############
class GraphEncoderEmbed:
  def run(self, X, Y, n, **kwargs):
    defaultKwargs = {'EdgeList': False, 'DiagA': True, 'Laplacian': False, 'Correlation': True}
    kwargs = { **defaultKwargs, **kwargs}

    if kwargs['EdgeList']:
      size_flag = self.edge_list_size
      X = self.Edge_to_Sparse(X, n, size_flag)
    
    emb_strat = time.time()

    if kwargs['DiagA']:
      X = self.Diagonal(X, n)

    if kwargs['Laplacian']:
      X = self.Laplacian(X, n)
    
    Z, W = self.Basic(X, Y, n)

    if kwargs['Correlation']:
      Z = self.Correlation(Z)
    
    emb_end = time.time()
    emb_time = emb_end - emb_strat
    
    return Z, W, emb_time

  def Basic(self, X, Y, n):
    """
      graph embedding basic function
      input X is sparse csr matrix of adjacency matrix
      -- if there is a connection between node i and node j:
      ---- X(i,j) = 1, no edge weight
      ---- X(i,j) = edge weight.
      -- if there is no connection between node i and node j:
      ---- X(i,j) = 0, 
      ---- note there is no storage for this in sparse matrix. 
      ---- No storage means 0 in sparse matrix.
      input Y is numpy array with size (n,1):
      -- value -1 indicate no lable
      -- value >=0 indicate real label
      input train_idx: a list of indices of input X for training set 
    """
    # assign k to the max along the first column
    # Note for python, label Y starts from 0. Python index starts from 0. thus size k should be max + 1
    k = Y[:,0].max() + 1
    
    #nk: 1*n array, contains the number of observations in each class
    nk = np.zeros((1,k))
    for i in range(k):
      nk[0,i] = np.count_nonzero(Y[:,0]==i)
    
    #W: sparse matrix for encoder matrix. W[i,k] = {1/nk if Yi==k, otherwise 0}
    W = sparse.dok_matrix((n, k), dtype=np.float32)

    for i in range(n):
      k_i = Y[i,0]
      if k_i >=0:
        W[i,k_i] = 1/nk[0,k_i]
    
    W = sparse.csr_matrix(W)
    Z = X.dot(W)

    return Z, W

  def Diagonal(self, X, n):
    """
      input X is sparse csr matrix of adjacency matrix
      return a sparse csr matrix of X matrix with 1s on the diagonal
    """
    I = sparse.identity(n)
    X = X + I
    return X


  def Laplacian(self, X, n):
    """
      input X is sparse csr matrix of adjacency matrix
      return a sparse csr matrix of Laplacian normalization of X matrix
    """
    X_sparse = sparse.csr_matrix(X)
    # get an array of degrees
    dig = X_sparse.sum(axis=0).A1
    # diagonal sparse matrix of D
    D = sparse.diags(dig,0)
    _D = D.power(-0.5)
    # D^-0.5 x A x D^-0.5
    L = _D.dot(X_sparse.dot(_D)) 

    # _L = _D.dot(X_sparse.dot(_D))    
    # # L = I - D^-0.5 x A x D^-0.5
    # I = sparse.identity(n)
    # L = I - _L   

    return L
  
  def Correlation(self, Z):
    """
      input Z is sparse csr matrix of embedding matrix from the basic function
      return normalized Z sparse matrix
      Calculation:
      Calculate each row's 2-norm (Euclidean distance). 
      e.g.row_x: [ele_i,ele_j,ele_k]. norm2 = sqr(sum(ele_i^2+ele_i^2+ele_i^2))
      then divide each element by their row norm
      e.g. [ele_i/norm2,ele_j/norm2,ele_k/norm2] 
    """
    # 2-norm
    row_norm = sparse.linalg.norm(Z, axis = 1)

    # row division to get the normalized Z
    diag = np.nan_to_num(1/row_norm)
    N = sparse.diags(diag,0)
    Z = N.dot(Z)

    return Z

  def edge_list_size(self, X):
    """
      set default edge list size as S3.
      If find X only has 2 columns, 
      return a flag "S2" indicating this is S2 edge list
    """
    if X.shape[1] == 2:
      return "S2"
    else:
      return "S3"
    
  def Edge_to_Sparse(self, X, n, size_flag):
    """
      input X is an edge list.
      Note for X, the edge list: 
      it is assumed there is no duplication of one connection
      e.g. connection between node i and node j, 
      there is only one row for this connection. 
      either (node_i, node_j, edge_w), or(node_j, node_i, edge_w)
      Only one of them. 
      If there are duplication in your edge list, please remove them before run.

      For S2 edge list (e.g. node_i, node_j per row), add one to all connections
      return a sparse csr matrix of S3 edge list
    """   
    #Build an empty sparse matrix. 
    X_new = sparse.dok_matrix((n, n), dtype=np.float32)

    for row in X:
      if size_flag == "S2":
        [node_i, node_j] = row
        X_new[node_i, node_j] = 1
        X_new[node_j, node_i] = 1
      else:
        [node_i, node_j, weight] = row
        X_new[node_i, node_j] = weight
        X_new[node_j, node_i] = weight
    
    X_new = sparse.csr_matrix(X_new)

    return X_new


############------------graph_encoder_embed_end------------------###############
############------------Sparse_supervised_learning_start---------###############

# https://www.kaggle.com/c/talkingdata-mobile-user-demographics/discussion/22567
# https://github.com/tkipf/pygcn/blob/1600b5b748b3976413d1e307540ccc62605b4d6d/pygcn/utils.py#L73

def batch_generator(X, y, k, batch_size, shuffle):
    number_of_batches = int(X.shape[0]/batch_size)
    counter = 0
    sample_index = np.arange(X.shape[0])
    if shuffle:
        np.random.shuffle(sample_index)
    while True:
        batch_index = sample_index[batch_size*counter:batch_size*(counter+1)]
        X_batch = X[batch_index,:].toarray()
        y_batch = to_categorical(y[batch_index], num_classes=k)
        counter += 1
        yield X_batch, y_batch
        if (counter == number_of_batches):
            if shuffle:
                np.random.shuffle(sample_index)
            counter = 0

class Hyperperameters:
  """
    define perameters for GNN.
    default values are for GNN learning -- "Leaner" ==2:
      embedding via partial label, then learn unknown label via two-layer NN

  """
  def __init__(self):
    # there is no scaled conjugate gradiant in keras optimiser, use defualt instead
    # use whatever default
    self.learning_rate = 0.01  # Initial learning rate.
    self.epochs = 100 #Number of epochs to train.
    self.hidden = 20 #Number of units in hidden layer 
    self.val_split = 0.1 #Split 10% of training data for validation
    self.loss = 'categorical_crossentropy' # loss function

class GNN:
  def __init__(self, DataSets):
    GNN.DataSets = DataSets
    GNN.hyperM = Hyperperameters()
    GNN.model = self.GNN_model()  #model summary: GNN.model.summary()
      
 
  def GNN_model(self):
    """
      build GNN model
    """
    hyperM = self.hyperM
    DataSets = self.DataSets

    z_train = DataSets.z_train
    k = DataSets.d

    feature_num = z_train.shape[1]
    
    model = keras.Sequential([
    keras.layers.Flatten(input_shape = (feature_num,)),  # input layer 
    keras.layers.Dense(hyperM.hidden, activation='relu'),  # hidden layer -- no tansig activation function in Keras, use relu instead
    keras.layers.Dense(k, activation='softmax') # output layer, matlab used softmax for patternnet default ??? max(opts.neuron,K)? opts 
    ])

    optimizer = keras.optimizers.Adam(learning_rate = hyperM.learning_rate)

    model.compile(optimizer='adam',
                  loss=hyperM.loss,
                  metrics=['accuracy'])

    return model
    
  def GNN_run(self, flag):
    """
      Train and test directly.
      Do not learn from the unknown labels.
    """
    gnn = copy.deepcopy(self)
    hyperM = gnn.hyperM
    DataSets = self.DataSets
    k = DataSets.d
    z_train = DataSets.z_train
    y_train = DataSets.y_train
    y_test = DataSets.y_test
    z_test = DataSets.z_test
    model = gnn.model    


    if flag == "direct":
      y_train_one_hot = to_categorical(y_train, num_classes=k)
      train_strat = time.time() 
      history = model.fit(z_train.toarray(), y_train_one_hot, 
        validation_split=hyperM.val_split,
        epochs=hyperM.epochs, 
        shuffle=True,
        verbose=0)
    else:
      early_stopping_callback = EarlyStopping(monitor='loss', patience=5, verbose=0)
      checkpoint_callback = ModelCheckpoint('GNN.h5', monitor='loss', save_best_only=True, mode='min', verbose=0)
      
      train_strat = time.time()
      history = model.fit(batch_generator(z_train, y_train, k, 32, True),
                      epochs=hyperM.epochs,
                      steps_per_epoch=z_train.shape[0],
                      callbacks=[early_stopping_callback, checkpoint_callback],
                      verbose=0)
    train_end = time.time()
    train_time = train_end - train_strat 

    y_test_one_hot = to_categorical(y_test, num_classes=k) 
    # set verbose to 0 to silent the output
    test_loss, test_acc = gnn.model.evaluate(z_test.toarray(),  y_test_one_hot, verbose=0) 
    return test_acc, train_time
############------------Sparse_supervised_learning_end---------###############


#Packages for Drive Files

In [3]:
# import packages
## for mount drive purpose
import os
from google.colab import drive

#Mount Drive

In [4]:
# mount drive
drive.mount('/content/drive/', force_remount=True)
os.chdir('/content/drive/My Drive/Colab_Notebooks/Graph_ML/semi_dr.shen')

Mounted at /content/drive/


# import ipynb packages

In [5]:
!pip install import-ipynb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting import-ipynb
  Downloading import_ipynb-0.1.4-py3-none-any.whl (4.1 kB)
Collecting jedi>=0.10
  Downloading jedi-0.18.1-py2.py3-none-any.whl (1.6 MB)
[K     |████████████████████████████████| 1.6 MB 7.4 MB/s 
Installing collected packages: jedi, import-ipynb
Successfully installed import-ipynb-0.1.4 jedi-0.18.1


In [6]:
import import_ipynb
from test_cases import Model, Case

importing Jupyter notebook from test_cases.ipynb
Mounted at /content/drive/


# Test Cases 

## Test sparse matrix

In [None]:
A = sparse.csr_matrix(np.array([[4,5,6], [7,8,9],[10,11,12]]))
B = sparse.csr_matrix(np.array([[1,0,0], [0,2,0],[0,0,3]]))

In [None]:
C = np.array([[4,5,6], [7,8,9],[10,11,12]])
D = np.array([[1,0,0], [0,2,0],[0,0,3]])

In [None]:
t1_s = time.time()
T1 = A.dot(B)
t1_e = time.time()
print((t1_e-t1_s))

0.0009875297546386719


In [None]:
t2_s = time.time()
T2 = C.dot(D)
t2_e = time.time()
print((t2_e-t2_s))

0.00013256072998046875


In [None]:
from numpy import linalg as LA
row_norm = LA.norm(C, axis = 1)
print(row_norm)
reshape_row_norm = np.reshape(row_norm, (3,1))
print(reshape_row_norm)
Z = np.nan_to_num(C/reshape_row_norm)

[ 8.77496439 13.92838828 19.10497317]
[[ 8.77496439]
 [13.92838828]
 [19.10497317]]


In [None]:
print(B)

  (0, 0)	1
  (1, 1)	2
  (2, 2)	3


In [None]:
print(A)

  (0, 0)	4
  (0, 1)	5
  (0, 2)	6
  (1, 0)	7
  (1, 1)	8
  (1, 2)	9
  (2, 0)	10
  (2, 1)	11
  (2, 2)	12


In [None]:
A_csr = sparse.csr_matrix(np.array([[4,0,6], [7,8,0],[0,11,12]]))

In [None]:
print(A_csr)

  (0, 0)	4
  (0, 2)	6
  (1, 0)	7
  (1, 1)	8
  (2, 1)	11
  (2, 2)	12


In [None]:
print(A_csr.indptr)
print(A_csr.indices)
print(A_csr.data)

[0 2 4 6]
[0 2 0 1 1 2]
[ 4  6  7  8 11 12]


In [None]:
A_csc = sparse.csc_matrix(np.array([[4,0,6], [7,8,0],[0,11,12]]))

In [None]:
print(A_csc)

  (0, 0)	4
  (1, 0)	7
  (1, 1)	8
  (2, 1)	11
  (0, 2)	6
  (2, 2)	12


In [None]:
print(A_csc.indptr)
print(A_csc.indices)
print(A_csc.data)

[0 2 4 6]
[0 1 1 2 0 2]
[ 4  7  8 11  6 12]


In [None]:
print(A.nonzero())

(array([0, 0, 0, 1, 1, 1, 2, 2, 2], dtype=int32), array([0, 1, 2, 0, 1, 2, 0, 1, 2], dtype=int32))


In [None]:
print(B.nonzero())

(array([0, 1, 2], dtype=int32), array([0, 1, 2], dtype=int32))


In [None]:
C = A.dot(B)

In [None]:
print(C)

  (0, 2)	18
  (0, 1)	10
  (0, 0)	4
  (1, 2)	27
  (1, 1)	16
  (1, 0)	7
  (2, 2)	36
  (2, 1)	22
  (2, 0)	10


In [None]:
# this is csr, using csc will be faster to slice columns
C_new = sparse.lil_matrix(C[:,[0,2]])

In [None]:
print(C_new)

  (0, 0)	4
  (0, 1)	18
  (1, 0)	7
  (1, 1)	27
  (2, 0)	10
  (2, 1)	36


### Test diagonal, laplacian

In [None]:
A = np.array([
 [0, 0, 1, 0],
 [0, 0, 0, 1],
 [1, 0, 0, 1],
 [0, 1, 1, 0]])
print(A)

[[0 0 1 0]
 [0 0 0 1]
 [1 0 0 1]
 [0 1 1 0]]


In [None]:
I = sparse.identity(4)

In [None]:
A = A + I
print(A)

[[1. 0. 1. 0.]
 [0. 1. 0. 1.]
 [1. 0. 1. 1.]
 [0. 1. 1. 1.]]


In [None]:
import networkx as nx

In [None]:
G=nx.from_numpy_matrix(A)
G.degree()

DegreeView({0: 3, 1: 3, 2: 4, 3: 4})

In [None]:
A_sparse = sparse.csr_matrix(A)

In [None]:
# array of degrees
dig = A_sparse.sum(axis=0).A1

In [None]:
print(dig)

[1 1 2 2]


In [None]:
from scipy.sparse import diags

In [None]:
D = diags(dig,0)
_D = D.power(-0.5)
_L = _D.dot(A_sparse.dot(_D))
I = sparse.identity(4)
L = I - _L

In [None]:
print(D)

  (0, 0)	1.0
  (1, 1)	1.0
  (2, 2)	2.0
  (3, 3)	2.0


In [None]:
print(_L)

  (0, 2)	0.7071067811865476
  (1, 3)	0.7071067811865476
  (2, 0)	0.7071067811865476
  (2, 3)	0.5000000000000001
  (3, 1)	0.7071067811865476
  (3, 2)	0.5000000000000001


In [None]:
I = sparse.identity(4)
L = I - _L
print(L.toarray())

[[ 1.          0.         -0.70710678  0.        ]
 [ 0.          1.          0.         -0.70710678]
 [-0.70710678  0.          1.         -0.5       ]
 [ 0.         -0.70710678 -0.5         1.        ]]


In [None]:
G=nx.from_numpy_matrix(A)
L_matrix = nx.normalized_laplacian_matrix(G).toarray() 
print(L_matrix)

[[ 1.          0.         -0.70710678  0.        ]
 [ 0.          1.          0.         -0.70710678]
 [-0.70710678  0.          1.         -0.5       ]
 [ 0.         -0.70710678 -0.5         1.        ]]


## Graph Encoder test case


In [None]:
class Encoder_case:
  def __init__(self, A,Y,n):
    Encoder_case.X = A
    Encoder_case.Y = Y
    Encoder_case.n = n

###Case 1

A = 

\begin{bmatrix}
0 & 1 & 1 & 1 & 0\\
1 & 0 & 1 & 1 & 1\\
1 & 1 & 0 & 1 & 1\\
1 & 1 & 1 & 0 & 1\\
0 & 1 & 1 & 1 & 0
\end{bmatrix}

Labels = [0,0,0,1,1] 


In [None]:
A = np.ones((5,5))
A[0,4] = 0
A[4,0] = 0
np.fill_diagonal(A, 0)

Y = np.array([[0,0,0,1,1]]).reshape((5,1))

print(A)
print(Y)

Encoder_case = Encoder_case(A,Y,5)

[[0. 1. 1. 1. 0.]
 [1. 0. 1. 1. 1.]
 [1. 1. 0. 1. 1.]
 [1. 1. 1. 0. 1.]
 [0. 1. 1. 1. 0.]]
[[0]
 [0]
 [0]
 [1]
 [1]]


In [None]:
print(np.nonzero(A))

(array([0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4]), array([1, 2, 3, 0, 2, 3, 4, 0, 1, 3, 4, 0, 1, 2, 4, 1, 2, 3]))


In [None]:
np.transpose(np.nonzero(A))

array([[0, 1],
       [0, 2],
       [0, 3],
       [1, 0],
       [1, 2],
       [1, 3],
       [1, 4],
       [2, 0],
       [2, 1],
       [2, 3],
       [2, 4],
       [3, 0],
       [3, 1],
       [3, 2],
       [3, 4],
       [4, 1],
       [4, 2],
       [4, 3]])

In [None]:
len(np.transpose(np.nonzero(A)))

18

#### Test sparse correlation

In [None]:
GEE = GraphEncoderEmbed()
X_sparse = sparse.csr_matrix(Encoder_case.X)
Z, W = GEE.run(X_sparse, Encoder_case.Y, Encoder_case.n, Laplacian = False, DiagA = False, Correlation = False)

In [None]:
# 2-norm
row_norm = sparse.linalg.norm(Z, axis = 1)
print(row_norm)

[0.83333335 1.20185044 1.20185044 1.11803402 0.83333335]


In [None]:
# get 1/row_norm first
diag = np.nan_to_num(1/row_norm)
print(diag)

[1.19999998 0.83205029 0.83205029 0.89442717 1.19999998]


In [None]:
# make to sparse diagonal
N = sparse.diags(diag,0)
print(N)

  (0, 0)	1.1999999771118166
  (1, 1)	0.8320502867079879
  (2, 2)	0.8320502867079879
  (3, 3)	0.8944271696751103
  (4, 4)	1.1999999771118166


In [None]:
# Z * diagonal (1/row_norm)
Z_r = N.dot(Z)
print(Z_r)

  (0, 1)	0.5999999885559083
  (0, 0)	0.8000000085830685
  (1, 1)	0.8320502867079879
  (1, 0)	0.5547002076700125
  (2, 1)	0.8320502867079879
  (2, 0)	0.5547002076700125
  (3, 1)	0.44721358483755513
  (3, 0)	0.8944271963311171
  (4, 1)	0.5999999885559083
  (4, 0)	0.8000000085830685


#### [Original]Laplacian = False, correclation = False, DiagA = False

In [None]:
Dataset = DataPreprocess(Encoder_case, Laplacian = False, DiagA = False)
print(Dataset.X)
print(Dataset.Y)
print(Dataset.n)

[array([[0., 1., 1.],
       [0., 2., 1.],
       [0., 3., 1.],
       [1., 0., 1.],
       [1., 2., 1.],
       [1., 3., 1.],
       [1., 4., 1.],
       [2., 0., 1.],
       [2., 1., 1.],
       [2., 3., 1.],
       [2., 4., 1.],
       [3., 0., 1.],
       [3., 1., 1.],
       [3., 2., 1.],
       [3., 4., 1.],
       [4., 1., 1.],
       [4., 2., 1.],
       [4., 3., 1.]])]
[[0]
 [0]
 [0]
 [1]
 [1]]
5


In [None]:
Z, W = graph_encoder_embed(Dataset.X[0], Dataset.Y, Dataset.n, Correlation = False)
print(Z)
print(W)

[[1.33333333 1.        ]
 [1.33333333 2.        ]
 [1.33333333 2.        ]
 [2.         1.        ]
 [1.33333333 1.        ]]
[[0.33333333 0.        ]
 [0.33333333 0.        ]
 [0.33333333 0.        ]
 [0.         0.5       ]
 [0.         0.5       ]]


#### SPARSE MATRIX, Laplacian = False, correclation = False, DiagA = False
note: z matrix valus is 1/2 of the edge list version. doesn't affect the training and testing does not adding double weights here.

In [None]:
GEE = GraphEncoderEmbed()
X_sparse = sparse.csr_matrix(Encoder_case.X)
Z, W = GEE.run(X_sparse, Encoder_case.Y, Encoder_case.n, Laplacian = False, DiagA = False, Correlation = False)

In [None]:
print(Z)
print(W)

  (0, 1)	0.5
  (0, 0)	0.6666666865348816
  (1, 1)	1.0
  (1, 0)	0.6666666865348816
  (2, 1)	1.0
  (2, 0)	0.6666666865348816
  (3, 1)	0.5
  (3, 0)	1.0000000298023224
  (4, 1)	0.5
  (4, 0)	0.6666666865348816
  (0, 0)	0.33333334
  (1, 0)	0.33333334
  (2, 0)	0.33333334
  (3, 1)	0.5
  (4, 1)	0.5


In [None]:
print(Z.toarray())
print(W.toarray())

[[0.66666669 0.5       ]
 [0.66666669 1.        ]
 [0.66666669 1.        ]
 [1.00000003 0.5       ]
 [0.66666669 0.5       ]]
[[0.33333334 0.        ]
 [0.33333334 0.        ]
 [0.33333334 0.        ]
 [0.         0.5       ]
 [0.         0.5       ]]


#### Laplacian = False, correclation = True, DiagA = False

In [None]:
Dataset = DataPreprocess(Encoder_case, Laplacian = False, DiagA = False)
print(Dataset.X)
print(Dataset.Y)
print(Dataset.n)

[array([[0., 1., 1.],
       [0., 2., 1.],
       [0., 3., 1.],
       [1., 0., 1.],
       [1., 2., 1.],
       [1., 3., 1.],
       [1., 4., 1.],
       [2., 0., 1.],
       [2., 1., 1.],
       [2., 3., 1.],
       [2., 4., 1.],
       [3., 0., 1.],
       [3., 1., 1.],
       [3., 2., 1.],
       [3., 4., 1.],
       [4., 1., 1.],
       [4., 2., 1.],
       [4., 3., 1.]])]
[[0]
 [0]
 [0]
 [1]
 [1]]
5


In [None]:
Z, W = graph_encoder_embed(Dataset.X[0], Dataset.Y, Dataset.n, Correlation = True)
print(Z)
print(W)

[[0.8        0.6       ]
 [0.5547002  0.83205029]
 [0.5547002  0.83205029]
 [0.89442719 0.4472136 ]
 [0.8        0.6       ]]
[[0.33333333 0.        ]
 [0.33333333 0.        ]
 [0.33333333 0.        ]
 [0.         0.5       ]
 [0.         0.5       ]]


#### SPARSE MATRIX, Laplacian = False, correclation = True, DiagA = False
note: z matrix valus is 1/2 of the edge list version. doesn't affect the training and testing does not adding double weights here.

In [None]:
GEE = GraphEncoderEmbed()
X_sparse = sparse.csr_matrix(Encoder_case.X)
Z, W = GEE.run(X_sparse, Encoder_case.Y, Encoder_case.n, Laplacian = False, DiagA = False, Correlation = True)

In [None]:
print(Z)
print(W)

  (0, 1)	0.5999999885559083
  (0, 0)	0.8000000085830685
  (1, 1)	0.8320502867079879
  (1, 0)	0.5547002076700125
  (2, 1)	0.8320502867079879
  (2, 0)	0.5547002076700125
  (3, 1)	0.44721358483755513
  (3, 0)	0.8944271963311171
  (4, 1)	0.5999999885559083
  (4, 0)	0.8000000085830685
  (0, 0)	0.33333334
  (1, 0)	0.33333334
  (2, 0)	0.33333334
  (3, 1)	0.5
  (4, 1)	0.5


In [None]:
print(Z.toarray())
print(W.toarray())

[[0.80000001 0.59999999]
 [0.55470021 0.83205029]
 [0.55470021 0.83205029]
 [0.8944272  0.44721358]
 [0.80000001 0.59999999]]
[[0.33333334 0.        ]
 [0.33333334 0.        ]
 [0.33333334 0.        ]
 [0.         0.5       ]
 [0.         0.5       ]]


#### Laplacian = True, correclation = False, DiagA = False

In [None]:
Dataset = DataPreprocess(Encoder_case, Laplacian = True, DiagA = False)
print(Dataset.X)
print(Dataset.Y)
print(Dataset.n)

[array([[0.        , 1.        , 0.14433757],
       [0.        , 2.        , 0.14433757],
       [0.        , 3.        , 0.14433757],
       [1.        , 0.        , 0.14433757],
       [1.        , 2.        , 0.125     ],
       [1.        , 3.        , 0.125     ],
       [1.        , 4.        , 0.14433757],
       [2.        , 0.        , 0.14433757],
       [2.        , 1.        , 0.125     ],
       [2.        , 3.        , 0.125     ],
       [2.        , 4.        , 0.14433757],
       [3.        , 0.        , 0.14433757],
       [3.        , 1.        , 0.125     ],
       [3.        , 2.        , 0.125     ],
       [3.        , 4.        , 0.14433757],
       [4.        , 1.        , 0.14433757],
       [4.        , 2.        , 0.14433757],
       [4.        , 3.        , 0.14433757]])]
[[0]
 [0]
 [0]
 [1]
 [1]]
5


In [None]:
Z, W = graph_encoder_embed(Dataset.X[0], Dataset.Y, Dataset.n, Correlation = False)
print(Z)
print(W)

[[0.19245009 0.14433757]
 [0.17955838 0.26933757]
 [0.17955838 0.26933757]
 [0.26289171 0.14433757]
 [0.19245009 0.14433757]]
[[0.33333333 0.        ]
 [0.33333333 0.        ]
 [0.33333333 0.        ]
 [0.         0.5       ]
 [0.         0.5       ]]


#### SPARSE MATRIX, Laplacian = True, correclation = False, DiagA = False
note: z matrix valus is 1/2 of the edge list version. doesn't affect the training and testing does not adding double weights here.

In [None]:
GEE = GraphEncoderEmbed()
X_sparse = sparse.csr_matrix(Encoder_case.X)
Z, W = GEE.run(X_sparse, Encoder_case.Y, Encoder_case.n, Laplacian = True, DiagA = False, Correlation = False)

In [None]:
print(Z)
print(W)

  (0, 1)	0.14433756729740643
  (0, 0)	0.19245009546533487
  (1, 1)	0.26933756729740643
  (1, 0)	0.17955838354952763
  (2, 1)	0.26933756729740643
  (2, 0)	0.17955838354952763
  (3, 1)	0.14433756729740643
  (3, 0)	0.26289171936638783
  (4, 1)	0.14433756729740643
  (4, 0)	0.19245009546533487
  (0, 0)	0.33333334
  (1, 0)	0.33333334
  (2, 0)	0.33333334
  (3, 1)	0.5
  (4, 1)	0.5


In [None]:
print(Z.toarray())
print(W.toarray())

[[0.1924501  0.14433757]
 [0.17955838 0.26933757]
 [0.17955838 0.26933757]
 [0.26289172 0.14433757]
 [0.1924501  0.14433757]]
[[0.33333334 0.        ]
 [0.33333334 0.        ]
 [0.33333334 0.        ]
 [0.         0.5       ]
 [0.         0.5       ]]


## [Sparse]Supervised Learning

### Case 10 with 3000 nodes (SBM)

In [None]:
n = 3000
case = Case(n)

In [None]:
case_10 = case.case_10_fully_known()
case_10.summary()

name:

    SBM with 3 classes and defined probabilities with fully known labels
    80% for training and 20% for testing
    
n:
<class 'int'>
3000
d:
<class 'int'>
3
X:
(3000, 3000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 1 0]]
Y:
(3000, 1)
[[1]
 [0]
 [2]
 ...
 [2]
 [2]
 [2]]


#### Laplacian = False, DiagA = False, Correlation = False

In [None]:
X_sparse = sparse.csr_matrix(case_10.X)
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(X_sparse, case_10.Y, case_10.n, Laplacian = False, DiagA = False, Correlation = False)

train_idx = case_10.train_idx
test_idx = case_10.test_idx
case_10.z_train= Z[train_idx]
case_10.z_test = Z[test_idx]
case_10.y_train = case_10.Y[train_idx].ravel()
case_10.y_test = case_10.Y_test.ravel() 

gnn = GNN(case_10)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.0782020092010498 seconds ---
--- train 223.3523678779602 seconds ---
--- accuracy: 0.9549999833106995 ---


#### Laplacian = False, DiagA = True, Correlation = False

In [None]:
X_sparse = sparse.csr_matrix(case_10.X)
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(X_sparse, case_10.Y, case_10.n, Laplacian = False, DiagA = True, Correlation = False)

train_idx = case_10.train_idx
test_idx = case_10.test_idx
case_10.z_train= Z[train_idx]
case_10.z_test = Z[test_idx]
case_10.y_train = case_10.Y[train_idx].ravel()
case_10.y_test = case_10.Y_test.ravel() 

gnn = GNN(case_10)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.09136271476745605 seconds ---
--- train 177.6869888305664 seconds ---
--- accuracy: 0.9566666483879089 ---


#### Laplacian = True, DiagA = False, Correlation = False

In [None]:
X_sparse = sparse.csr_matrix(case_10.X)
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(X_sparse, case_10.Y, case_10.n, Laplacian = True, DiagA = False, Correlation = False)

train_idx = case_10.train_idx
test_idx = case_10.test_idx
case_10.z_train= Z[train_idx]
case_10.z_test = Z[test_idx]
case_10.y_train = case_10.Y[train_idx].ravel()
case_10.y_test = case_10.Y_test.ravel() 

gnn = GNN(case_10)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.13447093963623047 seconds ---
--- train 442.49657702445984 seconds ---
--- accuracy: 0.9233333468437195 ---


#### Laplacian = False, DiagA = False, Correlation = True

In [None]:
X_sparse = sparse.csr_matrix(case_10.X)
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(X_sparse, case_10.Y, case_10.n, Laplacian = False, DiagA = False, Correlation = True)

train_idx = case_10.train_idx
test_idx = case_10.test_idx
case_10.z_train= Z[train_idx]
case_10.z_test = Z[test_idx]
case_10.y_train = case_10.Y[train_idx].ravel()
case_10.y_test = case_10.Y_test.ravel() 

gnn = GNN(case_10)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.2443253993988037 seconds ---
--- train 405.3389480113983 seconds ---
--- accuracy: 0.9583333134651184 ---


### Case 11 with 3000 nodes (SBM)

In [None]:
n = 3000
case = Case(n)

In [None]:
case_11 = case.case_11_fully_known()
case_11.summary()

name:

    SBM with 5 classes and defined probabilities with fully known labels
    80% for training and 20% for testing
    
n:
<class 'int'>
3000
d:
<class 'int'>
5
X:
(3000, 3000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 1 0]]
Y:
(3000, 1)
[[2]
 [0]
 [2]
 ...
 [3]
 [3]
 [2]]


#### Laplacian = False, DiagA = False, Correlation = False

In [None]:
X_sparse = sparse.csr_matrix(case_11.X)
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(X_sparse, case_11.Y, case_11.n, Laplacian = False, DiagA = False, Correlation = False)

train_idx = case_11.train_idx
test_idx = case_11.test_idx
case_11.z_train= Z[train_idx]
case_11.z_test = Z[test_idx]
case_11.y_train = case_11.Y[train_idx].ravel()
case_11.y_test = case_11.Y_test.ravel() 

gnn = GNN(case_11)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.08876967430114746 seconds ---
--- train 502.5157072544098 seconds ---
--- accuracy: 1.0 ---


#### Laplacian = False, DiagA = True, Correlation = False

In [None]:
X_sparse = sparse.csr_matrix(case_11.X)
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(X_sparse, case_11.Y, case_11.n, Laplacian = False, DiagA = True, Correlation = False)

train_idx = case_11.train_idx
test_idx = case_11.test_idx
case_11.z_train= Z[train_idx]
case_11.z_test = Z[test_idx]
case_11.y_train = case_11.Y[train_idx].ravel()
case_11.y_test = case_11.Y_test.ravel() 

gnn = GNN(case_11)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.05606818199157715 seconds ---
--- train 434.72901606559753 seconds ---
--- accuracy: 1.0 ---


#### Laplacian = True, DiagA = False, Correlation = False

In [None]:
X_sparse = sparse.csr_matrix(case_11.X)
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(X_sparse, case_11.Y, case_11.n, Laplacian = True, DiagA = False, Correlation = False)

train_idx = case_11.train_idx
test_idx = case_11.test_idx
case_11.z_train= Z[train_idx]
case_11.z_test = Z[test_idx]
case_11.y_train = case_11.Y[train_idx].ravel()
case_11.y_test = case_11.Y_test.ravel() 

gnn = GNN(case_11)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.12320375442504883 seconds ---
--- train 429.60369753837585 seconds ---
--- accuracy: 1.0 ---


#### Laplacian = False, DiagA = False, Correlation = True

In [None]:
X_sparse = sparse.csr_matrix(case_11.X)
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(X_sparse, case_11.Y, case_11.n, Laplacian = False, DiagA = False, Correlation = True)

train_idx = case_11.train_idx
test_idx = case_11.test_idx
case_11.z_train= Z[train_idx]
case_11.z_test = Z[test_idx]
case_11.y_train = case_11.Y[train_idx].ravel()
case_11.y_test = case_11.Y_test.ravel() 

gnn = GNN(case_11)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.05002546310424805 seconds ---
--- train 502.299058675766 seconds ---
--- accuracy: 1.0 ---


### Case 20 with 3000 nodes (DC-SBM)

In [None]:
n = 3000
case = Case(n)

In [None]:
case_test = case.case_20_fully_known()
case_test.summary()

name:

    DC-SBM with 3 classes and defined probabilities with fully known labels
    80% for training and 20% for testing
    
n:
<class 'int'>
3000
d:
<class 'int'>
3
X:
(3000, 3000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
Y:
(3000, 1)
[[1]
 [0]
 [2]
 ...
 [2]
 [2]
 [2]]


#### Laplacian = False, DiagA = False, Correlation = False

In [None]:
X_sparse = sparse.csr_matrix(case_test.X)
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(X_sparse, case_test.Y, case_test.n, Laplacian = False, DiagA = False, Correlation = False)

train_idx = case_test.train_idx
test_idx = case_test.test_idx
case_test.z_train= Z[train_idx]
case_test.z_test = Z[test_idx]
case_test.y_train = case_test.Y[train_idx].ravel()
case_test.y_test = case_test.Y_test.ravel() 

gnn = GNN(case_test)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.03838777542114258 seconds ---
--- train 175.9174587726593 seconds ---
--- accuracy: 0.8933333158493042 ---


#### Laplacian = False, DiagA = True, Correlation = False

In [None]:
X_sparse = sparse.csr_matrix(case_test.X)
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(X_sparse, case_test.Y, case_test.n, Laplacian = False, DiagA = True, Correlation = False)

train_idx = case_test.train_idx
test_idx = case_test.test_idx
case_test.z_train= Z[train_idx]
case_test.z_test = Z[test_idx]
case_test.y_train = case_test.Y[train_idx].ravel()
case_test.y_test = case_test.Y_test.ravel() 

gnn = GNN(case_test)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.03677940368652344 seconds ---
--- train 406.9756042957306 seconds ---
--- accuracy: 0.9100000262260437 ---


#### Laplacian = True, DiagA = False, Correlation = False

In [None]:
X_sparse = sparse.csr_matrix(case_test.X)
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(X_sparse, case_test.Y, case_test.n, Laplacian = True, DiagA = False, Correlation = False)

train_idx = case_test.train_idx
test_idx = case_test.test_idx
case_test.z_train= Z[train_idx]
case_test.z_test = Z[test_idx]
case_test.y_train = case_test.Y[train_idx].ravel()
case_test.y_test = case_test.Y_test.ravel() 

gnn = GNN(case_test)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.03650665283203125 seconds ---
--- train 462.6620156764984 seconds ---
--- accuracy: 0.8833333253860474 ---


#### Laplacian = False, DiagA = False, Correlation = True

In [None]:
X_sparse = sparse.csr_matrix(case_test.X)
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(X_sparse, case_test.Y, case_test.n, Laplacian = False, DiagA = False, Correlation = True)

train_idx = case_test.train_idx
test_idx = case_test.test_idx
case_test.z_train= Z[train_idx]
case_test.z_test = Z[test_idx]
case_test.y_train = case_test.Y[train_idx].ravel()
case_test.y_test = case_test.Y_test.ravel() 

gnn = GNN(case_test)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.03638601303100586 seconds ---
--- train 442.27731013298035 seconds ---
--- accuracy: 0.9049999713897705 ---


### Case 21 with 3000 nodes (DC-SBM Edge List)

In [None]:
n = 3000
case = Case(n)

In [None]:
case_test = case.case_21_fully_known()
case_test.summary()

#### Laplacian = False, DiagA = False, Correlation = False

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(case_test.X, case_test.Y, case_test.n, Laplacian = False, DiagA = False, Correlation = False, EdgeList = True)

train_idx = case_test.train_idx
test_idx = case_test.test_idx
case_test.z_train= Z[train_idx]
case_test.z_test = Z[test_idx]
case_test.y_train = case_test.Y[train_idx].ravel()
case_test.y_test = case_test.Y_test.ravel() 

gnn = GNN(case_test)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.03486514091491699 seconds ---
--- train 414.2749547958374 seconds ---
--- accuracy: 0.8299999833106995 ---


#### Laplacian = False, DiagA = True, Correlation = False

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(case_test.X, case_test.Y, case_test.n, Laplacian = False, DiagA = True, Correlation = False, EdgeList = True)

train_idx = case_test.train_idx
test_idx = case_test.test_idx
case_test.z_train= Z[train_idx]
case_test.z_test = Z[test_idx]
case_test.y_train = case_test.Y[train_idx].ravel()
case_test.y_test = case_test.Y_test.ravel() 

gnn = GNN(case_test)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.06194496154785156 seconds ---
--- train 442.53462529182434 seconds ---
--- accuracy: 0.8183333277702332 ---


#### Laplacian = True, DiagA = False, Correlation = False

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(case_test.X, case_test.Y, case_test.n, Laplacian = True, DiagA = False, Correlation = False, EdgeList = True)

train_idx = case_test.train_idx
test_idx = case_test.test_idx
case_test.z_train= Z[train_idx]
case_test.z_test = Z[test_idx]
case_test.y_train = case_test.Y[train_idx].ravel()
case_test.y_test = case_test.Y_test.ravel() 

gnn = GNN(case_test)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.07028460502624512 seconds ---
--- train 413.15993666648865 seconds ---
--- accuracy: 0.8149999976158142 ---


#### Laplacian = False, DiagA = False, Correlation = True

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(case_test.X, case_test.Y, case_test.n, Laplacian = False, DiagA = False, Correlation = True, EdgeList = True)

train_idx = case_test.train_idx
test_idx = case_test.test_idx
case_test.z_train= Z[train_idx]
case_test.z_test = Z[test_idx]
case_test.y_train = case_test.Y[train_idx].ravel()
case_test.y_test = case_test.Y_test.ravel() 

gnn = GNN(case_test)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.0374906063079834 seconds ---
--- train 442.28494596481323 seconds ---
--- accuracy: 0.82833331823349 ---


### Case 10 with 5000 nodes (SBM)

In [None]:
n = 5000
case = Case(n)

In [None]:
case_test = case.case_10_fully_known()
case_test.summary()

name:

    SBM with 3 classes and defined probabilities with fully known labels
    80% for training and 20% for testing
    
n:
<class 'int'>
5000
d:
<class 'int'>
3
X:
(5000, 5000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 1 0]
 [0 0 0 ... 1 0 0]
 [0 0 0 ... 0 0 0]]
Y:
(5000, 1)
[[1]
 [0]
 [2]
 ...
 [2]
 [0]
 [1]]


#### Laplacian = False, DiagA = False, Correlation = False

In [None]:
X_sparse = sparse.csr_matrix(case_test.X)
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(X_sparse, case_test.Y, case_test.n, Laplacian = False, DiagA = False, Correlation = False)

train_idx = case_test.train_idx
test_idx = case_test.test_idx
case_test.z_train= Z[train_idx]
case_test.z_test = Z[test_idx]
case_test.y_train = case_test.Y[train_idx].ravel()
case_test.y_test = case_test.Y_test.ravel() 

gnn = GNN(case_test)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.15922141075134277 seconds ---
--- train 364.1757776737213 seconds ---
--- accuracy: 0.9869999885559082 ---


#### Laplacian = False, DiagA = True, Correlation = False

In [None]:
X_sparse = sparse.csr_matrix(case_test.X)
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(X_sparse, case_test.Y, case_test.n, Laplacian = False, DiagA = True, Correlation = False)

train_idx = case_test.train_idx
test_idx = case_test.test_idx
case_test.z_train= Z[train_idx]
case_test.z_test = Z[test_idx]
case_test.y_train = case_test.Y[train_idx].ravel()
case_test.y_test = case_test.Y_test.ravel() 

gnn = GNN(case_test)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.13637113571166992 seconds ---
--- train 243.53846096992493 seconds ---
--- accuracy: 0.9860000014305115 ---


#### Laplacian = True, DiagA = False, Correlation = False

In [None]:
X_sparse = sparse.csr_matrix(case_test.X)
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(X_sparse, case_test.Y, case_test.n, Laplacian = True, DiagA = False, Correlation = False)

train_idx = case_test.train_idx
test_idx = case_test.test_idx
case_test.z_train= Z[train_idx]
case_test.z_test = Z[test_idx]
case_test.y_train = case_test.Y[train_idx].ravel()
case_test.y_test = case_test.Y_test.ravel() 

gnn = GNN(case_test)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.1709291934967041 seconds ---
--- train 702.3590714931488 seconds ---
--- accuracy: 0.9779999852180481 ---


#### Laplacian = False, DiagA = False, Correlation = True

In [None]:
X_sparse = sparse.csr_matrix(case_test.X)
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(X_sparse, case_test.Y, case_test.n, Laplacian = False, DiagA = False, Correlation = True)

train_idx = case_test.train_idx
test_idx = case_test.test_idx
case_test.z_train= Z[train_idx]
case_test.z_test = Z[test_idx]
case_test.y_train = case_test.Y[train_idx].ravel()
case_test.y_test = case_test.Y_test.ravel() 

gnn = GNN(case_test)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.09369468688964844 seconds ---
--- train 137.98763251304626 seconds ---
--- accuracy: 0.9819999933242798 ---


### Case 11 with 5000 nodes (SBM)

In [None]:
n = 5000
case = Case(n)

In [None]:
case_test = case.case_11_fully_known()
case_test.summary()

name:

    SBM with 5 classes and defined probabilities with fully known labels
    80% for training and 20% for testing
    
n:
<class 'int'>
5000
d:
<class 'int'>
5
X:
(5000, 5000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 1 0]
 [0 0 0 ... 1 0 0]
 [0 0 0 ... 0 0 0]]
Y:
(5000, 1)
[[2]
 [0]
 [2]
 ...
 [3]
 [0]
 [1]]


#### Laplacian = False, DiagA = False, Correlation = False

In [None]:
X_sparse = sparse.csr_matrix(case_test.X)
GEE = GraphEncoderEmbed()
emb_strat = time.time()
Z, W = GEE.run(X_sparse, case_test.Y, case_test.n, Laplacian = False, DiagA = False, Correlation = False)
emb_end = time.time()
emb_time = emb_end - emb_strat

train_idx = case_test.train_idx
test_idx = case_test.test_idx
case_test.z_train= Z[train_idx]
case_test.z_test = Z[test_idx]
case_test.y_train = case_test.Y[train_idx].ravel()
case_test.y_test = case_test.Y_test.ravel() 

gnn = GNN(case_test)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.09811902046203613 seconds ---
--- train 879.1894819736481 seconds ---
--- accuracy: 1.0 ---


#### Laplacian = False, DiagA = True, Correlation = False

In [None]:
X_sparse = sparse.csr_matrix(case_test.X)
GEE = GraphEncoderEmbed()
emb_strat = time.time()
Z, W = GEE.run(X_sparse, case_test.Y, case_test.n, Laplacian = False, DiagA = True, Correlation = False)
emb_end = time.time()
emb_time = emb_end - emb_strat

train_idx = case_test.train_idx
test_idx = case_test.test_idx
case_test.z_train= Z[train_idx]
case_test.z_test = Z[test_idx]
case_test.y_train = case_test.Y[train_idx].ravel()
case_test.y_test = case_test.Y_test.ravel() 

gnn = GNN(case_test)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.11572647094726562 seconds ---
--- train 901.3123707771301 seconds ---
--- accuracy: 1.0 ---


#### Laplacian = True, DiagA = False, Correlation = False

In [None]:
X_sparse = sparse.csr_matrix(case_test.X)
GEE = GraphEncoderEmbed()
emb_strat = time.time()
Z, W = GEE.run(X_sparse, case_test.Y, case_test.n, Laplacian = True, DiagA = False, Correlation = False)
emb_end = time.time()
emb_time = emb_end - emb_strat

train_idx = case_test.train_idx
test_idx = case_test.test_idx
case_test.z_train= Z[train_idx]
case_test.z_test = Z[test_idx]
case_test.y_train = case_test.Y[train_idx].ravel()
case_test.y_test = case_test.Y_test.ravel() 

gnn = GNN(case_test)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.14545154571533203 seconds ---
--- train 682.2206883430481 seconds ---
--- accuracy: 0.8289999961853027 ---


#### Laplacian = False, DiagA = False, Correlation = True

In [None]:
X_sparse = sparse.csr_matrix(case_test.X)
GEE = GraphEncoderEmbed()
emb_strat = time.time()
Z, W = GEE.run(X_sparse, case_test.Y, case_test.n, Laplacian = False, DiagA = False, Correlation = True)
emb_end = time.time()
emb_time = emb_end - emb_strat

train_idx = case_test.train_idx
test_idx = case_test.test_idx
case_test.z_train= Z[train_idx]
case_test.z_test = Z[test_idx]
case_test.y_train = case_test.Y[train_idx].ravel()
case_test.y_test = case_test.Y_test.ravel() 

gnn = GNN(case_test)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.09896516799926758 seconds ---
--- train 862.3278121948242 seconds ---
--- accuracy: 1.0 ---


### Case 20 with 5000 nodes (DC-SBM)

In [None]:
n = 5000
case = Case(n)

In [None]:
case_test = case.case_20_fully_known()
case_test.summary()

[[0.20530728]
 [0.25104189]
 [0.22868416]
 ...
 [0.06156643]
 [0.27507487]
 [0.12602176]]
name:

    DC-SBM with 3 classes and defined probabilities with fully known labels
    80% for training and 20% for testing
    
n:
<class 'int'>
5000
d:
<class 'int'>
3
X:
(5000, 5000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
Y:
(5000, 1)
[[1]
 [0]
 [2]
 ...
 [2]
 [0]
 [1]]


#### Laplacian = False, DiagA = False, Correlation = False

In [None]:
X_sparse = sparse.csr_matrix(case_test.X)
GEE = GraphEncoderEmbed()
emb_strat = time.time()
Z, W = GEE.run(X_sparse, case_test.Y, case_test.n, Laplacian = False, DiagA = False, Correlation = False)
emb_end = time.time()
emb_time = emb_end - emb_strat

train_idx = case_test.train_idx
test_idx = case_test.test_idx
case_test.z_train= Z[train_idx]
case_test.z_test = Z[test_idx]
case_test.y_train = case_test.Y[train_idx].ravel()
case_test.y_test = case_test.Y_test.ravel() 

gnn = GNN(case_test)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.05440974235534668 seconds ---
--- train 277.24299359321594 seconds ---
--- accuracy: 0.9350000023841858 ---


#### Laplacian = False, DiagA = True, Correlation = False

In [None]:
X_sparse = sparse.csr_matrix(case_test.X)
GEE = GraphEncoderEmbed()
emb_strat = time.time()
Z, W = GEE.run(X_sparse, case_test.Y, case_test.n, Laplacian = False, DiagA = True, Correlation = False)
emb_end = time.time()
emb_time = emb_end - emb_strat

train_idx = case_test.train_idx
test_idx = case_test.test_idx
case_test.z_train= Z[train_idx]
case_test.z_test = Z[test_idx]
case_test.y_train = case_test.Y[train_idx].ravel()
case_test.y_test = case_test.Y_test.ravel() 

gnn = GNN(case_test)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.0541532039642334 seconds ---
--- train 514.6924860477448 seconds ---
--- accuracy: 0.9359999895095825 ---


#### Laplacian = True, DiagA = False, Correlation = False

In [None]:
X_sparse = sparse.csr_matrix(case_test.X)
GEE = GraphEncoderEmbed()
emb_strat = time.time()
Z, W = GEE.run(X_sparse, case_test.Y, case_test.n, Laplacian = True, DiagA = False, Correlation = False)
emb_end = time.time()
emb_time = emb_end - emb_strat

train_idx = case_test.train_idx
test_idx = case_test.test_idx
case_test.z_train= Z[train_idx]
case_test.z_test = Z[test_idx]
case_test.y_train = case_test.Y[train_idx].ravel()
case_test.y_test = case_test.Y_test.ravel() 

gnn = GNN(case_test)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.0666968822479248 seconds ---
--- train 670.5076379776001 seconds ---
--- accuracy: 0.9279999732971191 ---


#### Laplacian = False, DiagA = False, Correlation = True

In [None]:
X_sparse = sparse.csr_matrix(case_test.X)
GEE = GraphEncoderEmbed()
emb_strat = time.time()
Z, W = GEE.run(X_sparse, case_test.Y, case_test.n, Laplacian = False, DiagA = False, Correlation = True)
emb_end = time.time()
emb_time = emb_end - emb_strat

train_idx = case_test.train_idx
test_idx = case_test.test_idx
case_test.z_train= Z[train_idx]
case_test.z_test = Z[test_idx]
case_test.y_train = case_test.Y[train_idx].ravel()
case_test.y_test = case_test.Y_test.ravel() 

gnn = GNN(case_test)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.05890154838562012 seconds ---
--- train 826.0063326358795 seconds ---
--- accuracy: 0.9309999942779541 ---


### Case 21 with 5000 nodes (DC-SBM Edge List)

In [None]:
n = 5000
case = Case(n)

In [None]:
case_test = case.case_21_fully_known()
case_test.summary()

name:

    DC-SBM with 10 classes and defined probabilities with fully known labels.
    Edge list version. 
    
n:
<class 'int'>
5000
d:
<class 'int'>
10
X:
(169442, 3)
[[   0  161    1]
 [   0  242    1]
 [   0  385    1]
 ...
 [4999 4586    1]
 [4999 4645    1]
 [4999 4772    1]]
Y:
(5000, 1)
[[4]
 [0]
 [5]
 ...
 [6]
 [1]
 [3]]


#### Laplacian = False, DiagA = False, Correlation = False

In [None]:
GEE = GraphEncoderEmbed()
emb_strat = time.time()
Z, W = GEE.run(case_test.X, case_test.Y, case_test.n, Laplacian = False, DiagA = False, Correlation = False, EdgeList = True)
emb_end = time.time()
emb_time = emb_end - emb_strat

train_idx = case_test.train_idx
test_idx = case_test.test_idx
case_test.z_train= Z[train_idx]
case_test.z_test = Z[test_idx]
case_test.y_train = case_test.Y[train_idx].ravel()
case_test.y_test = case_test.Y_test.ravel() 

gnn = GNN(case_test)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 4.580548286437988 seconds ---
--- train 292.23935866355896 seconds ---
--- accuracy: 0.9079999923706055 ---


#### Laplacian = False, DiagA = True, Correlation = False

In [None]:
GEE = GraphEncoderEmbed()
emb_strat = time.time()
Z, W = GEE.run(case_test.X, case_test.Y, case_test.n, Laplacian = False, DiagA = True, Correlation = False, EdgeList = True)
emb_end = time.time()
emb_time = emb_end - emb_strat

train_idx = case_test.train_idx
test_idx = case_test.test_idx
case_test.z_train= Z[train_idx]
case_test.z_test = Z[test_idx]
case_test.y_train = case_test.Y[train_idx].ravel()
case_test.y_test = case_test.Y_test.ravel() 

gnn = GNN(case_test)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 5.443112373352051 seconds ---
--- train 562.6391978263855 seconds ---
--- accuracy: 0.902999997138977 ---


#### Laplacian = True, DiagA = False, Correlation = False

In [None]:
GEE = GraphEncoderEmbed()
emb_strat = time.time()
Z, W = GEE.run(case_test.X, case_test.Y, case_test.n, Laplacian = True, DiagA = False, Correlation = False, EdgeList = True)
emb_end = time.time()
emb_time = emb_end - emb_strat

train_idx = case_test.train_idx
test_idx = case_test.test_idx
case_test.z_train= Z[train_idx]
case_test.z_test = Z[test_idx]
case_test.y_train = case_test.Y[train_idx].ravel()
case_test.y_test = case_test.Y_test.ravel() 

gnn = GNN(case_test)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 4.596813440322876 seconds ---
--- train 622.2317621707916 seconds ---
--- accuracy: 0.8849999904632568 ---


#### Laplacian = False, DiagA = False, Correlation = True

In [None]:
GEE = GraphEncoderEmbed()
emb_strat = time.time()
Z, W = GEE.run(case_test.X, case_test.Y, case_test.n, Laplacian = False, DiagA = False, Correlation = True, EdgeList = True)
emb_end = time.time()
emb_time = emb_end - emb_strat

train_idx = case_test.train_idx
test_idx = case_test.test_idx
case_test.z_train= Z[train_idx]
case_test.z_test = Z[test_idx]
case_test.y_train = case_test.Y[train_idx].ravel()
case_test.y_test = case_test.Y_test.ravel() 

gnn = GNN(case_test)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 4.554842710494995 seconds ---
--- train 562.2122373580933 seconds ---
--- accuracy: 0.8999999761581421 ---


#Real datasets and cases from input files

In [None]:
import math
import copy
import networkx as nx

class RealDataSet:
  def __init__(self, edg_file, node_file):
    self.X = None  # edg_list
    self.n = None
    self.Y = None
    self.edg_file = edg_file
    self.node_file = node_file
  
  def get_initial_values(self):
    realSet = copy.deepcopy(self)

    label_dict, map_new_old_keys = self.read_node_file(self.node_file)
    n = self.get_n(label_dict)
    
    if map_new_old_keys:
      X = self.read_edge_file_with_remap(self.edg_file, n, map_new_old_keys)
    else:
      X = self.read_edge_file(self.edg_file, n)
    
    realSet.X = X
    label_dict = self.check_class_values(label_dict)
    Y = self.get_labels(label_dict, n)
    realSet.Y = Y
    realSet.n = n
    realSet.k = self.get_k(label_dict)

    return realSet
  
  def check_class_values(self, label_dict):
    """
      check if class values start with 0, if not, correct it
    """
    
    if min(set(label_dict.values())) != 0:
      for k, v in label_dict.items():
         label_dict[k] = str(int(v) - 1)
    
    return label_dict

  def find_split_point(self, firstline):
    # find split point
    split_point_pos = [",", "\t"]
    split_point = ""
    for sp in split_point_pos:
      if sp in firstline:
        split_point = sp
        break
    return split_point

  def read_node_file(self, filename):
    """
      the node in the node file start with node 1 not node 0
    """
    re_map_node = False
    label_dict = {}
    labels = open(filename, "r") 
    line_count = 0
    map_new_old_keys = {}
    
    for l in labels:
      line_count += 1
      if (line_count) == 1:
        split_point = self.find_split_point(l)
      (node_i, label_i) = l.strip().split(split_point)
      if (line_count) == 1 and (int(node_i) != 1):
        re_map_node = True 
      label_dict[int(node_i)-1] = label_i 
    # if there is an id for the node, for example PMID for pubmed data
    # need to map the pubmed id back to a serie of node IDs starting from 0 
    if re_map_node:
      keys = sorted(list(label_dict.keys()))
      new_node_idx = [i for i in range(len(keys))]
      new_label_dict = {}
      for i in range(len(keys)):
        map_new_old_keys[keys[i]] = new_node_idx[i]
        new_label_dict[new_node_idx[i]] = label_dict[keys[i]]
      label_dict = new_label_dict
        
    return label_dict, map_new_old_keys
  
  def get_n(self, label_dict):
    """
      get the number of nodes: n
      the keys start with 0, so n is max + 1.
    """
    n = max(sorted(list(label_dict.keys())))+1
    return n
  
  def get_k(self, label_dict):
    """
      get the number of classes: k
    """
    k = len(set(label_dict.values()))
    return k

  def read_edge_file(self, filename, n):
    """
      NOTE: the node in the node file start with node 1 not node 0
    """
    edg_list = []
    edges = open(filename, "r") 

    line_count = 0
    for l in edges:
      
      line_count += 1
      if (line_count) == 1:
        split_point = self.find_split_point(l)

      elements = l.strip().split(split_point)
      if len(elements) > 2:
        (node_i, node_j, w) = elements
        edg_list.append([int(node_i)-1, int(node_j)-1, float(w)])
      else: 
        (node_i, node_j) = elements
        edg_list.append([int(node_i)-1, int(node_j)-1, 1]) 
    edg = np.array(edg_list)
    return edg  

  def read_edge_file_with_remap(self, filename, n, map_new_old_keys):
    """
      for the ids that are remaped from the node file, 
      need to remap id for edge list as well
    """
    edg_list = []
    edges = open(filename, "r") 

    line_count = 0
    for l in edges:
      line_count += 1
      if (line_count) == 1:
        split_point = self.find_split_point(l)
      elements = l.strip().split(split_point)
      if len(elements) > 2:
        (node_i, node_j, w) = elements
        new_idx_i = map_new_old_keys[int(node_i)-1]
        new_idx_j = map_new_old_keys[int(node_j)-1]
        edg_list.append([new_idx_i, new_idx_j, float(w)])
      else: 
        (node_i, node_j) = elements
        new_idx_i = map_new_old_keys[int(node_i)-1]
        new_idx_j = map_new_old_keys[int(node_j)-1]        
        edg_list.append([new_idx_i, new_idx_j, 1]) 
    edg = np.array(edg_list)
    return edg  

  def check_label(self, label_dict, n):
    """
      the input label_dict start with key 0
    """
    check = True
    keys = sorted(list(label_dict.keys()))
    unlabeld_node_idx = []
    for node_idx in range(n):
      if node_idx not in keys:
        unlabeld_node_idx.append(node_idx)
    if len(unlabeld_node_idx) > 0:
      print("There are node(s) not labeled")
      check = False
    return check, unlabeld_node_idx

  def get_labels(self, label_dict, n):
    check, unlabeld_node_idx = self.check_label(label_dict, n)
    keys = sorted(list(label_dict.keys()))
    Y = np.zeros((n,1), dtype=int)
    for node_idx in keys:    
      Y[node_idx][0] = int(label_dict[node_idx])
    if not check:
      for idx in unlabeld_node_idx:
        Y[idx][0] = -1

    return Y

  def split_sets(self, test_ratio):

    DataSet = copy.deepcopy(self)
    Y_ori = DataSet.Y
    Y = np.copy(Y_ori)

    t = test_ratio
    Y_1st_dim = Y.shape[0]

    np.random.seed(0)
    indices = np.random.permutation(Y_1st_dim)  #randomly permute the 1st indices

    # Generate indices for splits
    test_ind_split_point = math.floor(Y_1st_dim*t)
    test_idx, train_idx = indices[:test_ind_split_point], indices[test_ind_split_point:]

    
    # get the Y_test label
    Y_test = Y[test_idx]
    Y_train = Y[train_idx]
    # mark the test position as unknown: -1
    Y[test_idx, 0] = -1    


    DataSet.Y_ori = Y_ori
    DataSet.Y = Y
    DataSet.Y_train = Y_train.ravel()
    DataSet.Y_test = Y_test.ravel() 
    DataSet.test_idx = test_idx
    DataSet.train_idx = train_idx    
    return DataSet 

def edge_list_to_adjacency_matrix(edg_list, n):
  A = np.zeros((n,n))
  for [i, j, w] in edg_list:
    i = int(i)
    j = int(j)
    if A[i,j] != w:
      A[i,j] = w
    if A[j,i] != w:
      A[j,i] = w
  return A



##case10

In [None]:
edg_file = "case10.edges"
node_file = "case10.node_labels"

In [None]:
RlDataSet = RealDataSet(edg_file, node_file)
case10 = RlDataSet.get_initial_values()
test_case = case10.split_sets(0.2)

In [None]:
print(test_case.X)

[[0.000e+00 4.000e+00 1.000e+00]
 [0.000e+00 2.500e+01 1.000e+00]
 [0.000e+00 4.000e+01 1.000e+00]
 ...
 [2.992e+03 2.994e+03 1.000e+00]
 [2.993e+03 2.998e+03 1.000e+00]
 [2.998e+03 2.999e+03 1.000e+00]]


In [None]:
% matplotlib inline

UsageError: Line magic function `%` not found.


In [None]:
print(test_case.X.shape)

In [None]:
print(test_case.Y)

[[ 0]
 [-1]
 [ 1]
 ...
 [ 1]
 [ 1]
 [ 1]]


In [None]:
print(test_case.n)

3000


###Laplacian = False, DiagA = False, Correlation = False

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.03038644790649414 seconds ---
--- train 88.90509605407715 seconds ---
--- accuracy: 0.9183333516120911 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.0961446762084961 seconds ---
--- train 42.322298526763916 seconds ---
--- accuracy: 0.8566666841506958 ---


###Laplacian = False, DiagA = True, Correlation = False

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = False, EdgeList = True)


train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.03454303741455078 seconds ---
--- train 132.98816013336182 seconds ---
--- accuracy: 0.9150000214576721 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = False, EdgeList = True)


train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.08326268196105957 seconds ---
--- train 21.180127382278442 seconds ---
--- accuracy: 0.9066666960716248 ---


### Laplacian = True, DiagA = False, Correlation = False

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = True, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("batch")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.06829667091369629 seconds ---
--- train 530.1541969776154 seconds ---
--- accuracy: 0.7733333110809326 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = True, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.07297301292419434 seconds ---
--- train 20.925249338150024 seconds ---
--- accuracy: 0.47999998927116394 ---


### Laplacian = False, DiagA = False, Correlation = True

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.03211236000061035 seconds ---
--- train 64.22701692581177 seconds ---
--- accuracy: 0.82833331823349 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.055518150329589844 seconds ---
--- train 13.124871730804443 seconds ---
--- accuracy: 0.8316666483879089 ---


### Laplacian = False, DiagA = True, Correlation = True

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.0351099967956543 seconds ---
--- train 54.50569772720337 seconds ---
--- accuracy: 0.824999988079071 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.05383610725402832 seconds ---
--- train 20.92409920692444 seconds ---
--- accuracy: 0.8199999928474426 ---


##case11

In [None]:
edg_file = "case11.edges"
node_file = "case11.node_labels"

In [None]:
RlDataSet = RealDataSet(edg_file, node_file)
case11 = RlDataSet.get_initial_values()
test_case = case11.split_sets(0.2)

In [None]:
print(test_case.X)

[[0.000e+00 4.000e+00 1.000e+00]
 [0.000e+00 1.300e+01 1.000e+00]
 [0.000e+00 2.500e+01 1.000e+00]
 ...
 [2.992e+03 2.994e+03 1.000e+00]
 [2.993e+03 2.998e+03 1.000e+00]
 [2.998e+03 2.999e+03 1.000e+00]]


In [None]:
print(test_case.X.shape)

(539863, 3)


In [None]:
print(test_case.Y)

[[ 1]
 [-1]
 [ 1]
 ...
 [ 2]
 [ 2]
 [ 1]]


In [None]:
print(test_case.n)

3000


###Laplacian = False, DiagA = False, Correlation = False

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("batch")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.07485294342041016 seconds ---
--- train 573.1077656745911 seconds ---
--- accuracy: 0.9983333349227905 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.07628655433654785 seconds ---
--- train 21.249775886535645 seconds ---
--- accuracy: 0.9983333349227905 ---


###Laplacian = False, DiagA = True, Correlation = False

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = False, EdgeList = True)


train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("batch")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.09609580039978027 seconds ---
--- train 683.2330887317657 seconds ---
--- accuracy: 0.9983333349227905 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = False, EdgeList = True)


train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.05245351791381836 seconds ---
--- train 13.996417760848999 seconds ---
--- accuracy: 0.9983333349227905 ---


### Laplacian = True, DiagA = False, Correlation = False

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = True, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("batch")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.07326173782348633 seconds ---
--- train 622.3633680343628 seconds ---
--- accuracy: 0.9983333349227905 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = True, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.0765531063079834 seconds ---
--- train 15.731871366500854 seconds ---
--- accuracy: 0.19499999284744263 ---


### Laplacian = False, DiagA = False, Correlation = True

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("batch")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.04842519760131836 seconds ---
--- train 471.52581882476807 seconds ---
--- accuracy: 0.9900000095367432 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.045197248458862305 seconds ---
--- train 20.89595651626587 seconds ---
--- accuracy: 0.9900000095367432 ---


### Laplacian = False, DiagA = True, Correlation = True

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("batch")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.05521368980407715 seconds ---
--- train 562.3607084751129 seconds ---
--- accuracy: 0.9883333444595337 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.058574676513671875 seconds ---
--- train 13.61279582977295 seconds ---
--- accuracy: 0.9900000095367432 ---


##case20

In [None]:
edg_file = "case20.edges"
node_file = "case20.node_labels"

In [None]:
RlDataSet = RealDataSet(edg_file, node_file)
case10 = RlDataSet.get_initial_values()
test_case = case10.split_sets(0.2)

In [None]:
print(test_case.X)

[[0.000e+00 3.000e+00 1.000e+00]
 [0.000e+00 1.680e+02 1.000e+00]
 [0.000e+00 4.420e+02 1.000e+00]
 ...
 [2.952e+03 2.975e+03 1.000e+00]
 [2.952e+03 2.993e+03 1.000e+00]
 [2.975e+03 2.980e+03 1.000e+00]]


In [None]:
print(test_case.X.shape)

(32994, 3)


In [None]:
print(test_case.Y)

[[ 0]
 [-1]
 [ 1]
 ...
 [ 1]
 [ 1]
 [ 1]]


In [None]:
print(test_case.n)

3000


###Laplacian = False, DiagA = False, Correlation = False

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.021523714065551758 seconds ---
--- train 203.19335341453552 seconds ---
--- accuracy: 0.7166666388511658 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.07206487655639648 seconds ---
--- train 42.37710237503052 seconds ---
--- accuracy: 0.7149999737739563 ---


###Laplacian = False, DiagA = True, Correlation = False

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = False, EdgeList = True)


train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.02159428596496582 seconds ---
--- train 118.12804365158081 seconds ---
--- accuracy: 0.7266666889190674 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = False, EdgeList = True)


train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.054227590560913086 seconds ---
--- train 19.87187123298645 seconds ---
--- accuracy: 0.7116666436195374 ---


### Laplacian = True, DiagA = False, Correlation = False

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = True, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.02234935760498047 seconds ---
--- train 322.15816712379456 seconds ---
--- accuracy: 0.7333333492279053 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = True, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.056607723236083984 seconds ---
--- train 21.61723017692566 seconds ---
--- accuracy: 0.47999998927116394 ---


### Laplacian = False, DiagA = False, Correlation = True

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.021627187728881836 seconds ---
--- train 224.08568739891052 seconds ---
--- accuracy: 0.721666693687439 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.030698060989379883 seconds ---
--- train 20.950966119766235 seconds ---
--- accuracy: 0.7233333587646484 ---


### Laplacian = False, DiagA = True, Correlation = True

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.02348160743713379 seconds ---
--- train 226.90867948532104 seconds ---
--- accuracy: 0.7183333039283752 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.03227496147155762 seconds ---
--- train 20.978010416030884 seconds ---
--- accuracy: 0.7083333134651184 ---


##case21

In [None]:
edg_file = "case21.edges"
node_file = "case21.node_labels"

In [None]:
RlDataSet = RealDataSet(edg_file, node_file)
case21 = RlDataSet.get_initial_values()
test_case = case21.split_sets(0.2)

In [None]:
print(test_case.X)

[[0.000e+00 3.000e+00 1.000e+00]
 [0.000e+00 1.680e+02 1.000e+00]
 [0.000e+00 5.510e+02 1.000e+00]
 ...
 [2.952e+03 2.993e+03 1.000e+00]
 [2.975e+03 2.980e+03 1.000e+00]
 [2.983e+03 2.987e+03 1.000e+00]]


In [None]:
print(test_case.X.shape)

(30487, 3)


In [None]:
print(test_case.Y)

[[ 3]
 [-1]
 [ 4]
 ...
 [ 5]
 [ 6]
 [ 4]]


In [None]:
print(test_case.n)

3000


###Laplacian = False, DiagA = False, Correlation = False

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.023638010025024414 seconds ---
--- train 322.158921957016 seconds ---
--- accuracy: 0.7933333516120911 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct") # run without batch input

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.031816959381103516 seconds ---
--- train 20.83100175857544 seconds ---
--- accuracy: 0.7316666841506958 ---


###Laplacian = False, DiagA = True, Correlation = False

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = False, EdgeList = True)


train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.03726673126220703 seconds ---
--- train 502.83205246925354 seconds ---
--- accuracy: 0.79666668176651 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = False, EdgeList = True)


train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct") # run without batch input

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.037064552307128906 seconds ---
--- train 20.755454063415527 seconds ---
--- accuracy: 0.7316666841506958 ---


### Laplacian = True, DiagA = False, Correlation = False

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = True, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.025398731231689453 seconds ---
--- train 262.1607003211975 seconds ---
--- accuracy: 0.778333306312561 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = True, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct") # run without batch input

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.05007219314575195 seconds ---
--- train 9.007418632507324 seconds ---
--- accuracy: 0.3766666650772095 ---


### Laplacian = False, DiagA = False, Correlation = True

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("batch")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.048158884048461914 seconds ---
--- train 346.19911336898804 seconds ---
--- accuracy: 0.7900000214576721 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct") # run without batch input

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.03285074234008789 seconds ---
--- train 9.162509202957153 seconds ---
--- accuracy: 0.7766666412353516 ---


### Laplacian = False, DiagA = True, Correlation = True

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("batch")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.04832267761230469 seconds ---
--- train 342.5735297203064 seconds ---
--- accuracy: 0.7799999713897705 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct") # run without batch input

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.03435373306274414 seconds ---
--- train 10.521466732025146 seconds ---
--- accuracy: 0.7866666913032532 ---


##citeseer

In [None]:
edg_file = "citeseer.edges"
node_file = "citeseer.node_labels"

In [None]:
RlDataSet = RealDataSet(edg_file, node_file)
citeseer = RlDataSet.get_initial_values()
test_case = citeseer.split_sets(0.2)

In [None]:
print(test_case.X)

[[0.000e+00 8.690e+02 1.000e+00]
 [1.000e+00 5.970e+02 1.000e+00]
 [1.000e+00 2.206e+03 1.000e+00]
 ...
 [3.196e+03 3.197e+03 1.000e+00]
 [3.227e+03 3.228e+03 1.000e+00]
 [3.242e+03 3.243e+03 1.000e+00]]


In [None]:
print(test_case.X.shape)

(4536, 3)


In [None]:
print(test_case.Y)

[[ 1]
 [-1]
 [ 4]
 ...
 [ 2]
 [ 3]
 [ 3]]


In [None]:
print(test_case.n)

3264


###Laplacian = False, DiagA = False, Correlation = False

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.03588271141052246 seconds ---
--- train 562.3592529296875 seconds ---
--- accuracy: 0.696319043636322 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.05118560791015625 seconds ---
--- train 14.848748922348022 seconds ---
--- accuracy: 0.6886503100395203 ---


###Laplacian = False, DiagA = True, Correlation = False

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = False, EdgeList = True)


train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.06334495544433594 seconds ---
--- train 555.6530475616455 seconds ---
--- accuracy: 0.6855828166007996 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = False, EdgeList = True)


train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.052089691162109375 seconds ---
--- train 12.432457208633423 seconds ---
--- accuracy: 0.696319043636322 ---


### Laplacian = True, DiagA = False, Correlation = False

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = True, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.03852081298828125 seconds ---
--- train 345.2316391468048 seconds ---
--- accuracy: 0.6825153231620789 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = True, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.05598926544189453 seconds ---
--- train 20.92568802833557 seconds ---
--- accuracy: 0.3190183937549591 ---


### Laplacian = False, DiagA = False, Correlation = True

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.07450008392333984 seconds ---
--- train 353.41384196281433 seconds ---
--- accuracy: 0.6947852969169617 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.05429697036743164 seconds ---
--- train 13.165440082550049 seconds ---
--- accuracy: 0.6947852969169617 ---


### Laplacian = False, DiagA = True, Correlation = True

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.042494773864746094 seconds ---
--- train 525.241046667099 seconds ---
--- accuracy: 0.6871165633201599 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.06247448921203613 seconds ---
--- train 20.883169174194336 seconds ---
--- accuracy: 0.6855828166007996 ---


##cora

In [None]:
edg_file = "cora.edges"
node_file = "cora.node_labels"

In [None]:
RlDataSet = RealDataSet(edg_file, node_file)
cora = RlDataSet.get_initial_values()
test_case = cora.split_sets(0.2)

In [None]:
print(test_case.X)

[[0.000e+00 8.000e+00 1.000e+00]
 [0.000e+00 4.350e+02 1.000e+00]
 [0.000e+00 5.440e+02 1.000e+00]
 ...
 [2.707e+03 7.740e+02 1.000e+00]
 [2.707e+03 1.389e+03 1.000e+00]
 [2.707e+03 2.344e+03 1.000e+00]]


In [None]:
print(test_case.X.shape)

(5429, 3)


In [None]:
print(test_case.Y)

[[2]
 [5]
 [4]
 ...
 [1]
 [0]
 [2]]


In [None]:
print(test_case.n)

2708


###Laplacian = False, DiagA = False, Correlation = False

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.05180621147155762 seconds ---
--- train 502.58276557922363 seconds ---
--- accuracy: 0.831792950630188 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.05096435546875 seconds ---
--- train 14.380504846572876 seconds ---
--- accuracy: 0.709796667098999 ---


###Laplacian = False, DiagA = True, Correlation = False

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.029953479766845703 seconds ---
--- train 444.114625453949 seconds ---
--- accuracy: 0.8262476921081543 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.030592918395996094 seconds ---
--- train 10.528685331344604 seconds ---
--- accuracy: 0.7024029493331909 ---


### Laplacian = True, DiagA = False, Correlation = False

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = True, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("batch")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.04567265510559082 seconds ---
--- train 203.43756866455078 seconds ---
--- accuracy: 0.8336414098739624 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = True, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.03199601173400879 seconds ---
--- train 8.735424995422363 seconds ---
--- accuracy: 0.28280961513519287 ---


### Laplacian = False, DiagA = False, Correlation = True

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.03147721290588379 seconds ---
--- train 287.77333903312683 seconds ---
--- accuracy: 0.8299445509910583 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.05087876319885254 seconds ---
--- train 20.874951362609863 seconds ---
--- accuracy: 0.8299445509910583 ---


### Laplacian = False, DiagA = True, Correlation = True

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run()

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.06289076805114746 seconds ---
--- train 502.3208260536194 seconds ---
--- accuracy: 0.8059149980545044 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.05168461799621582 seconds ---
--- train 20.899131298065186 seconds ---
--- accuracy: 0.8280960917472839 ---


##PubMed

In [None]:
edg_file = "PubMed.edges"
node_file = "PubMed.node_labels"

In [None]:
RlDataSet = RealDataSet(edg_file, node_file)
PubMed = RlDataSet.get_initial_values()
test_case = PubMed.split_sets(0.2)

In [None]:
print(test_case.X)

[[8964 2235    1]
 [8964 5975    1]
 [8964 1603    1]
 ...
 [8953  749    1]
 [8953 2175    1]
 [8953 5033    1]]


In [None]:
print(test_case.X.shape)

(44338, 3)


In [None]:
print(test_case.Y)

[[ 0]
 [ 0]
 [ 0]
 ...
 [ 1]
 [-1]
 [ 2]]


In [None]:
print(test_case.n)

19717


###Laplacian = False, DiagA = False, Correlation = False

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("batch")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.3289682865142822 seconds ---
--- train 1754.6710941791534 seconds ---
--- accuracy: 0.7882323265075684 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.21994996070861816 seconds ---
--- train 82.23268246650696 seconds ---
--- accuracy: 0.735734224319458 ---


###Laplacian = False, DiagA = True, Correlation = False

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("batch")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.40156078338623047 seconds ---
--- train 1673.811951637268 seconds ---
--- accuracy: 0.7859497666358948 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.23819732666015625 seconds ---
--- train 66.16280746459961 seconds ---
--- accuracy: 0.7834136486053467 ---


### Laplacian = True, DiagA = False, Correlation = False

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = True, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("batch")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.22405290603637695 seconds ---
--- train 2002.2049589157104 seconds ---
--- accuracy: 0.781891942024231 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = True, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.42499423027038574 seconds ---
--- train 42.107654094696045 seconds ---
--- accuracy: 0.5292924046516418 ---


### Laplacian = False, DiagA = False, Correlation = True

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("batch")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.2214040756225586 seconds ---
--- train 565.0404715538025 seconds ---
--- accuracy: 0.7874714732170105 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.3243293762207031 seconds ---
--- train 82.31721091270447 seconds ---
--- accuracy: 0.7874714732170105 ---


### Laplacian = False, DiagA = True, Correlation = True

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("batch")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.22463083267211914 seconds ---
--- train 1040.8125801086426 seconds ---
--- accuracy: 0.7699720859527588 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.23688006401062012 seconds ---
--- train 43.280879974365234 seconds ---
--- accuracy: 0.7697184681892395 ---


##proteins-all

In [None]:
edg_file = "proteins-all.edges"
node_file = "proteins-all.node_labels"

In [None]:
RlDataSet = RealDataSet(edg_file, node_file)
proteins_all = RlDataSet.get_initial_values()
test_case = proteins_all.split_sets(0.2)

In [None]:
print(test_case.X)

[[   11     0     1]
 [   22     0     1]
 [   32     0     1]
 ...
 [43438 43470     1]
 [43468 43470     1]
 [43469 43470     1]]


In [None]:
print(test_case.X.shape)

(162088, 3)


In [None]:
print(test_case.Y)

[[0]
 [0]
 [0]
 ...
 [2]
 [2]
 [2]]


In [None]:
print(max(test_case.Y))

[2]


In [None]:
print(test_case.n)

43471


###Laplacian = False, DiagA = False, Correlation = False

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("batch")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.8885107040405273 seconds ---
--- train 4172.695830821991 seconds ---
--- accuracy: 0.7078444957733154 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 1.9805512428283691 seconds ---
--- train 202.8896131515503 seconds ---
--- accuracy: 0.692776620388031 ---


###Laplacian = False, DiagA = True, Correlation = False

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("batch")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.908355712890625 seconds ---
--- train 2099.4193699359894 seconds ---
--- accuracy: 0.705083966255188 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.5069622993469238 seconds ---
--- train 127.28984260559082 seconds ---
--- accuracy: 0.704048752784729 ---


### Laplacian = True, DiagA = False, Correlation = False

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = True, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("batch")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.9047324657440186 seconds ---
--- train 5256.997646331787 seconds ---
--- accuracy: 0.6954221129417419 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = True, DiagA = False, Correlation = False, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.4960191249847412 seconds ---
--- train 142.29407477378845 seconds ---
--- accuracy: 0.6790890097618103 ---


### Laplacian = False, DiagA = False, Correlation = True

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("batch")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 1.8330798149108887 seconds ---
--- train 4963.6488881111145 seconds ---
--- accuracy: 0.7064642310142517 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = False, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 1.242522954940796 seconds ---
--- train 202.8031713962555 seconds ---
--- accuracy: 0.7062341570854187 ---


### Laplacian = False, DiagA = True, Correlation = True

In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("batch")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.5176630020141602 seconds ---
--- train 3758.7463619709015 seconds ---
--- accuracy: 0.7070393562316895 ---


In [None]:
GEE = GraphEncoderEmbed()
Z, W, emb_time = GEE.run(test_case.X, test_case.Y, test_case.n, Laplacian = False, DiagA = True, Correlation = True, EdgeList = True)

train_idx = test_case.train_idx
test_idx = test_case.test_idx
test_case.z_train= Z[train_idx]
test_case.z_test = Z[test_idx]
test_case.y_train = test_case.Y_train 
test_case.y_test = test_case.Y_test
test_case.d = test_case.k

gnn = GNN(test_case)
acc, train_time  = gnn.GNN_run("direct")

print("--- embed %s seconds ---" % emb_time)
print("--- train %s seconds ---" % train_time)
print("--- accuracy: %s ---" % acc)

--- embed 0.4997091293334961 seconds ---
--- train 142.2773756980896 seconds ---
--- accuracy: 0.694847047328949 ---


In [None]:
!cat /proc/gpuinfo

cat: /proc/gpuinfo: No such file or directory


## [Original]Supervised Learning

In [None]:
n = 3000
case = Case(n)

In [None]:
# get all combinations of different emb settings 

sets_no = 8
L_set = [True, False]
Diag_set = [True, False]
Corre_set = [True, False]
comb = [L_set, Diag_set, Corre_set]
comb_set = []

ele_list = [None, None, None]
for ele1 in comb[0]:
  ele_list[0] = ele1
  for ele2 in comb[1]:
    ele_list[1] = ele2
    for ele3 in comb[2]:
      ele_list[2] = ele3
      comb_set.append(ele_list.copy())

print(comb_set)
print(len(comb_set))

[[True, True, True], [True, True, False], [True, False, True], [True, False, False], [False, True, True], [False, True, False], [False, False, True], [False, False, False]]
8


In [None]:
def average_restuls(case_num, comb_set, learner_no):
  results = []
  for comb in comb_set:
    acc_final, train_time_final, emb_time_final, total_time_final = 0,0,0,0
    for i in range(10):
      test_case = copy.deepcopy(case_num)
      acc, train_time, emb_time, total_time = Run(test_case, "su", Learner = learner_no, Laplacian = comb[0], DiagA = comb[1], Correlation = comb[2])
      acc_final += acc
      train_time_final += train_time
      emb_time_final += emb_time 
      total_time_final += total_time
    
    acc_final /= 10
    train_time_final /= 10
    emb_time_final /= 10
    total_time_final /= 10

    result = comb + [acc_final, train_time_final, emb_time_final, total_time_final]
    results.append(result)
  
  return results

def plot(results):
  df = pd.DataFrame(results,
  index=['set_01','set_02','set_03','set_04','set_05','set_06','set_07','set_08'],
  columns=['Laplacian','DiagA', 'Correlation', 'Accuracy', 'Train_Time(s)', 'Emb_Time(s)', 'Total_Time(s)'])

  df = df.style.format({
    'Emb_Time(s)': '{:0.2f}',
    'Train_Time(s)': '{:0.5f}',
    'Total_Time(s)': '{:0.2f}'
  })

  display(df)

### Supervised

#### GNN

##### case 10

In [None]:
case_10 = case.case_10_fully_known()
case_10.summary()

name:

    SBM with 3 classes and defined probabilities with fully known labels
    80% for training and 20% for testing
    
n:
<class 'int'>
3000
d:
<class 'int'>
3
X:
(3000, 3000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 1 0]]
Y:
(3000, 1)
[[1]
 [0]
 [2]
 ...
 [2]
 [2]
 [2]]


In [None]:
print(case_10.bd)

0.13


In [None]:
results = average_restuls(case_10, comb_set, 0)

acc:  0.95333331823349
--- embed 3.419264078140259 seconds ---
--- train 21.11810326576233 seconds ---
--- total 40.59562110900879 seconds ---
acc:  0.95333331823349
--- embed 3.4034903049468994 seconds ---
--- train 12.623661756515503 seconds ---
--- total 31.16509985923767 seconds ---
acc:  0.9549999833106995
--- embed 3.418227434158325 seconds ---
--- train 12.319038391113281 seconds ---
--- total 30.31583523750305 seconds ---
acc:  0.95333331823349
--- embed 3.3595666885375977 seconds ---
--- train 12.580832242965698 seconds ---
--- total 31.665188550949097 seconds ---
acc:  0.949999988079071
--- embed 3.4427661895751953 seconds ---
--- train 12.615651845932007 seconds ---
--- total 30.693434953689575 seconds ---
acc:  0.9516666531562805
--- embed 3.4687247276306152 seconds ---
--- train 12.653229475021362 seconds ---
--- total 31.406693696975708 seconds ---
acc:  0.9516666531562805
--- embed 3.4163429737091064 seconds ---
--- train 21.00416111946106 seconds ---
--- total 39.331142

In [None]:
plot(results)

Unnamed: 0,Laplacian,DiagA,Correlation,Accuracy,Train_Time(s),Emb_Time(s),Total_Time(s)
set_01,True,True,True,0.952167,16.79191,3.41,35.29
set_02,True,True,False,0.48,14.96333,3.42,33.36
set_03,True,False,True,0.951833,16.69279,3.46,36.04
set_04,True,False,False,0.48,15.7976,3.44,35.08
set_05,False,True,True,0.953,15.7844,3.32,24.09
set_06,False,True,False,0.954833,15.64148,3.31,23.85
set_07,False,False,True,0.953,12.97473,3.45,21.31
set_08,False,False,False,0.954667,16.57532,3.43,24.88


## profiling


1.   https://colab.research.google.com/github/jakevdp/PythonDataScienceHandbook/blob/master/notebooks/01.07-Timing-and-Profiling.ipynb
2.   https://perso.crans.org/besson/publis/notebooks/Profiling_in_a_Jupyter_notebook.html
3. have to disconnect the runtime then run for one task at a time

In [None]:
! pip install memory_profiler

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting memory_profiler
  Downloading memory_profiler-0.60.0.tar.gz (38 kB)
Building wheels for collected packages: memory-profiler
  Building wheel for memory-profiler (setup.py) ... [?25l[?25hdone
  Created wheel for memory-profiler: filename=memory_profiler-0.60.0-py3-none-any.whl size=31284 sha256=ad4dcf3435d162d5b00539976212730190f04121ed3f1d186df634201aa206f6
  Stored in directory: /root/.cache/pip/wheels/67/2b/fb/326e30d638c538e69a5eb0aa47f4223d979f502bbdb403950f
Successfully built memory-profiler
Installing collected packages: memory-profiler
Successfully installed memory-profiler-0.60.0


In [None]:
%load_ext memory_profiler

In [None]:
! pip install hypothesis

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting hypothesis
  Downloading hypothesis-6.47.2-py3-none-any.whl (387 kB)
[K     |████████████████████████████████| 387 kB 4.2 MB/s 
[?25hCollecting exceptiongroup>=1.0.0rc8
  Downloading exceptiongroup-1.0.0rc8-py3-none-any.whl (11 kB)
Installing collected packages: exceptiongroup, hypothesis
Successfully installed exceptiongroup-1.0.0rc8 hypothesis-6.47.2


In [None]:
# https://github.com/HypothesisWorks/hypothesis/issues/985
# doesn't help
import inspect
from hypothesis import settings

def is_debugging():
    for frame in inspect.stack():
        if frame[1].endswith("pydevd.py"):
            return True
    return False

if is_debugging():
    # The pycharm debugger requires to
    # run hypothesis with the `use_coverage=False` flag.
    try:
        settings.register_profile("debug", use_coverage=False)
        settings.load_profile(os.getenv(u'HYPOTHESIS_PROFILE', 'debug'))
    except:
        pass
else:
    settings.load_profile(os.getenv(u'HYPOTHESIS_PROFILE', 'default'))

### Sparse embed

In [None]:
%%file mprun.py

import numpy as np
# for sparse matrix
from scipy import sparse

def graph_encoder_embed(X, Y, n, **kwargs):
  """
    graph embedding function
    input X is sparse csr matrix of adjacency matrix
    -- if there is a connection between node i and node j:
    ---- X(i,j) = 1, no edge weight
    ---- X(i,j) = edge weight.
    -- if there is no connection between node i and node j:
    ---- X(i,j) = 0, 
    ---- note there is no storage for this in sparse matrix. 
    ---- No storage means 0 in sparse matrix.
    input Y is a list of labels:
    -- value -1 indicate no lable
    -- value >=0 indicate real label
    input train_idx: a list of indices of input X for training set 
  """
  defaultKwargs = {'Correlation': True}
  kwargs = { **defaultKwargs, **kwargs}


  # assign k to the max along the first column
  # Note for python, label Y starts from 0. Python index starts from 0. thus size k should be max + 1
  k = Y[:,0].max() + 1

  #nk: 1*n array, contains the number of observations in each class
  nk = np.zeros((1,k))
  for i in range(k):
    nk[0,i] = np.count_nonzero(Y[:,0]==i)
  
  #W: sparse matrix for encoder marix. W[i,k] = {1/nk if Yi==k, otherwise 0}
  W = sparse.dok_matrix((n, k), dtype=np.float32)

  for i in range(Y.shape[0]):
    k_i = Y[i,0]
    if k_i >=0:
      W[i,k_i] = 1/nk[0,k_i]
  
  W = sparse.csr_matrix(W)
  Z = X.dot(W)
  
  return Z, W


Overwriting mprun.py


### origin embed

In [None]:
def adj_to_edg(A):
  """
    input is the adjacency matrix: A
    other variables in this function:
    s: number of edges
    return edg_list -- matrix format with shape(edg_sum,3):
    example row in edg_list(matrix): [vertex1, vertex2, connection weight from Adj matrix]
  """
  # check the len of the second dimenson of A
  if A.shape[1] <= 3:
    edg = A
  else:
    n = A.shape[0]
    # construct the initial edgg_list matrix with the size of (edg_sum, 3)
    edg_list = []
    for i in range(n):
      for j in range(n):
        if A[i,j] > 0:
          edg_list.append([i, j, A[i,j]])
    edg = np.array(edg_list)
  return edg

In [None]:
%%file mprun_origin.py

import numpy as np

def graph_encoder_embed(X,Y,n,**kwargs):
  """
    input X is s*3 edg list: nodei, nodej, connection weight(i,j)
    graph embedding function
  """
  defaultKwargs = {'Correlation': True}
  kwargs = { **defaultKwargs, **kwargs}
  
  # assign k to the max along the first column
  # Note for python, label Y starts from 0. Python index starts from 0. thus size k should be max + 1
  k = Y[:,0].max() + 1

  #nk: 1*n array, contains the number of observations in each class
  #W: encoder marix. W[i,k] = {1/nk if Yi==k, otherwise 0}
  nk = np.zeros((1,k))
  W = np.zeros((n,k))

  for i in range(k):
    nk[0,i] = np.count_nonzero(Y[:,0]==i)

  for i in range(Y.shape[0]):
    k_i = Y[i,0]
    if k_i >=0:
      W[i,k_i] = 1/nk[0,k_i]

  # Edge List Version in O(s)
  Z = np.zeros((n,k))
  i = 0
  for row in X:
    [v_i, v_j, edg_i_j] = row
    v_i = int(v_i)
    v_j = int(v_j)

    label_i = Y[v_i][0] 
    label_j = Y[v_j][0]

    if label_j >= 0:
      Z[v_i, label_j] = Z[v_i, label_j] + W[v_j, label_j]*edg_i_j
    if (label_i >= 0) and (v_i != v_j):
      Z[v_j, label_i] = Z[v_j, label_i] + W[v_i, label_i]*edg_i_j
  
  return Z, W

### cases

In [None]:
import numpy as np
from scipy import sparse
import time

 #### case 10 with 5000 nodes

##### sparse

In [None]:
n = 5000
case = Case(n)
case_10 = case.case_10_fully_known()
case_10.summary()

name:

    SBM with 3 classes and defined probabilities with fully known labels
    80% for training and 20% for testing
    
n:
<class 'int'>
5000
d:
<class 'int'>
3
X:
(5000, 5000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 1 0]
 [0 0 0 ... 1 0 0]
 [0 0 0 ... 0 0 0]]
Y:
(5000, 1)
[[1]
 [0]
 [2]
 ...
 [2]
 [0]
 [1]]


In [None]:
len(np.transpose(np.nonzero(case_10.X)))

2780918

In [None]:
X_sparse = sparse.csr_matrix(case_10.X)

In [None]:
from mprun import graph_encoder_embed

In [None]:
%mprun -f graph_encoder_embed Z,W = graph_encoder_embed(X_sparse, case_10.Y, case_10.n)


sys.settrace() should not be used when the debugger is being used.
This may cause the debugger to stop working correctly.
If this is needed, please check: 
http://pydev.blogspot.com/2007/06/why-cant-pydev-debugger-work-with.html
to see how to restore the debug tracing back correctly.
Call Location:
  File "/usr/local/lib/python3.7/dist-packages/memory_profiler.py", line 845, in enable
    sys.settrace(self.trace_memory_usage)


sys.settrace() should not be used when the debugger is being used.
This may cause the debugger to stop working correctly.
If this is needed, please check: 
http://pydev.blogspot.com/2007/06/why-cant-pydev-debugger-work-with.html
to see how to restore the debug tracing back correctly.
Call Location:
  File "/usr/local/lib/python3.7/dist-packages/memory_profiler.py", line 848, in disable
    sys.settrace(self._original_trace_function)






##### original

In [None]:
n = 5000
case = Case(n)
case_10 = case.case_10_fully_known()
case_10.summary()

name:

    SBM with 3 classes and defined probabilities with fully known labels
    80% for training and 20% for testing
    
n:
<class 'int'>
5000
d:
<class 'int'>
3
X:
(5000, 5000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 1 0]
 [0 0 0 ... 1 0 0]
 [0 0 0 ... 0 0 0]]
Y:
(5000, 1)
[[1]
 [0]
 [2]
 ...
 [2]
 [0]
 [1]]


In [None]:
len(np.transpose(np.nonzero(case_10.X)))

2780918

In [None]:
X_edg = adj_to_edg(case_10.X)

In [None]:
from mprun_origin import graph_encoder_embed

In [None]:
emb_strat = time.time()
Z, W = graph_encoder_embed(X_edg, case_10.Y, case_10.n)
emb_end = time.time()
emb_time = emb_end - emb_strat
print(emb_time)

10.877496480941772


doc: https://pypi.org/project/memory-profiler/
The first column represents the line number of the code that has been profiled, the second column (Mem usage) the memory usage of the Python interpreter after that line has been executed. The third column (Increment) represents the difference in memory of the current line with respect to the last one. The last column (Line Contents) prints the code that has been profiled.

In [None]:
%mprun -f graph_encoder_embed Z,W = graph_encoder_embed(X_edg, case_10.Y, case_10.n)


sys.settrace() should not be used when the debugger is being used.
This may cause the debugger to stop working correctly.
If this is needed, please check: 
http://pydev.blogspot.com/2007/06/why-cant-pydev-debugger-work-with.html
to see how to restore the debug tracing back correctly.
Call Location:
  File "/usr/local/lib/python3.7/dist-packages/memory_profiler.py", line 845, in enable
    sys.settrace(self.trace_memory_usage)


sys.settrace() should not be used when the debugger is being used.
This may cause the debugger to stop working correctly.
If this is needed, please check: 
http://pydev.blogspot.com/2007/06/why-cant-pydev-debugger-work-with.html
to see how to restore the debug tracing back correctly.
Call Location:
  File "/usr/local/lib/python3.7/dist-packages/memory_profiler.py", line 848, in disable
    sys.settrace(self._original_trace_function)






 #### case 10 with 3000 nodes

##### sparse

In [None]:
n = 3000
case = Case(n)
case_10 = case.case_10_fully_known()
case_10.summary()

name:

    SBM with 3 classes and defined probabilities with fully known labels
    80% for training and 20% for testing
    
n:
<class 'int'>
3000
d:
<class 'int'>
3
X:
(3000, 3000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 1 0]]
Y:
(3000, 1)
[[1]
 [0]
 [2]
 ...
 [2]
 [2]
 [2]]


In [None]:
len(np.transpose(np.nonzero(case_10.X)))

1000498

In [None]:
X_sparse = sparse.csr_matrix(case_10.X)

In [None]:
from mprun import graph_encoder_embed

In [None]:
%mprun -f graph_encoder_embed Z,W = graph_encoder_embed(X_sparse, case_10.Y, case_10.n)


sys.settrace() should not be used when the debugger is being used.
This may cause the debugger to stop working correctly.
If this is needed, please check: 
http://pydev.blogspot.com/2007/06/why-cant-pydev-debugger-work-with.html
to see how to restore the debug tracing back correctly.
Call Location:
  File "/usr/local/lib/python3.7/dist-packages/memory_profiler.py", line 845, in enable
    sys.settrace(self.trace_memory_usage)


sys.settrace() should not be used when the debugger is being used.
This may cause the debugger to stop working correctly.
If this is needed, please check: 
http://pydev.blogspot.com/2007/06/why-cant-pydev-debugger-work-with.html
to see how to restore the debug tracing back correctly.
Call Location:
  File "/usr/local/lib/python3.7/dist-packages/memory_profiler.py", line 848, in disable
    sys.settrace(self._original_trace_function)






##### original

In [None]:
n = 3000
case = Case(n)
case_10 = case.case_10_fully_known()
case_10.summary()

name:

    SBM with 3 classes and defined probabilities with fully known labels
    80% for training and 20% for testing
    
n:
<class 'int'>
3000
d:
<class 'int'>
3
X:
(3000, 3000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 1 0]]
Y:
(3000, 1)
[[1]
 [0]
 [2]
 ...
 [2]
 [2]
 [2]]


In [None]:
len(np.transpose(np.nonzero(case_10.X)))

1000498

In [None]:
X_edg = adj_to_edg(case_10.X)

In [None]:
from mprun_origin import graph_encoder_embed

In [None]:
emb_strat = time.time()
Z, W = graph_encoder_embed(X_edg, case_10.Y, case_10.n)
emb_end = time.time()
emb_time = emb_end - emb_strat
print(emb_time)

3.5426878929138184


In [None]:
%mprun -f graph_encoder_embed Z,W = graph_encoder_embed(X_edg, case_10.Y, case_10.n)


sys.settrace() should not be used when the debugger is being used.
This may cause the debugger to stop working correctly.
If this is needed, please check: 
http://pydev.blogspot.com/2007/06/why-cant-pydev-debugger-work-with.html
to see how to restore the debug tracing back correctly.
Call Location:
  File "/usr/local/lib/python3.7/dist-packages/memory_profiler.py", line 845, in enable
    sys.settrace(self.trace_memory_usage)


sys.settrace() should not be used when the debugger is being used.
This may cause the debugger to stop working correctly.
If this is needed, please check: 
http://pydev.blogspot.com/2007/06/why-cant-pydev-debugger-work-with.html
to see how to restore the debug tracing back correctly.
Call Location:
  File "/usr/local/lib/python3.7/dist-packages/memory_profiler.py", line 848, in disable
    sys.settrace(self._original_trace_function)






#### case 20 with 5000 nodes

##### sparse

In [None]:
n = 5000
case = Case(n)
case_20 = case.case_20_fully_known()
case_20.summary()

[[0.20530728]
 [0.25104189]
 [0.22868416]
 ...
 [0.06156643]
 [0.27507487]
 [0.12602176]]
name:

    DC-SBM with 3 classes and defined probabilities with fully known labels
    80% for training and 20% for testing
    
n:
<class 'int'>
5000
d:
<class 'int'>
3
X:
(5000, 5000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
Y:
(5000, 1)
[[1]
 [0]
 [2]
 ...
 [2]
 [0]
 [1]]


In [None]:
len(np.transpose(np.nonzero(case_20.X)))

184430

In [None]:
X_sparse = sparse.csr_matrix(case_20.X)

In [None]:
from mprun import graph_encoder_embed

In [None]:
%mprun -f graph_encoder_embed Z,W = graph_encoder_embed(X_sparse, case_20.Y, case_20.n)


sys.settrace() should not be used when the debugger is being used.
This may cause the debugger to stop working correctly.
If this is needed, please check: 
http://pydev.blogspot.com/2007/06/why-cant-pydev-debugger-work-with.html
to see how to restore the debug tracing back correctly.
Call Location:
  File "/usr/local/lib/python3.7/dist-packages/memory_profiler.py", line 845, in enable
    sys.settrace(self.trace_memory_usage)


sys.settrace() should not be used when the debugger is being used.
This may cause the debugger to stop working correctly.
If this is needed, please check: 
http://pydev.blogspot.com/2007/06/why-cant-pydev-debugger-work-with.html
to see how to restore the debug tracing back correctly.
Call Location:
  File "/usr/local/lib/python3.7/dist-packages/memory_profiler.py", line 848, in disable
    sys.settrace(self._original_trace_function)






##### original

In [None]:
n = 5000
case = Case(n)
case_20 = case.case_20_fully_known()
case_20.summary()

[[0.20530728]
 [0.25104189]
 [0.22868416]
 ...
 [0.06156643]
 [0.27507487]
 [0.12602176]]
name:

    DC-SBM with 3 classes and defined probabilities with fully known labels
    80% for training and 20% for testing
    
n:
<class 'int'>
5000
d:
<class 'int'>
3
X:
(5000, 5000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
Y:
(5000, 1)
[[1]
 [0]
 [2]
 ...
 [2]
 [0]
 [1]]


In [None]:
len(np.transpose(np.nonzero(case_20.X)))

184430

In [None]:
X_edg = adj_to_edg(case_20.X)

In [None]:
from mprun_origin import graph_encoder_embed

In [None]:
emb_strat = time.time()
Z, W = graph_encoder_embed(X_edg, case_20.Y, case_20.n)
emb_end = time.time()
emb_time = emb_end - emb_strat
print(emb_time)

0.6683666706085205


In [None]:
%mprun -f graph_encoder_embed Z,W = graph_encoder_embed(X_edg, case_20.Y, case_20.n)


sys.settrace() should not be used when the debugger is being used.
This may cause the debugger to stop working correctly.
If this is needed, please check: 
http://pydev.blogspot.com/2007/06/why-cant-pydev-debugger-work-with.html
to see how to restore the debug tracing back correctly.
Call Location:
  File "/usr/local/lib/python3.7/dist-packages/memory_profiler.py", line 845, in enable
    sys.settrace(self.trace_memory_usage)


sys.settrace() should not be used when the debugger is being used.
This may cause the debugger to stop working correctly.
If this is needed, please check: 
http://pydev.blogspot.com/2007/06/why-cant-pydev-debugger-work-with.html
to see how to restore the debug tracing back correctly.
Call Location:
  File "/usr/local/lib/python3.7/dist-packages/memory_profiler.py", line 848, in disable
    sys.settrace(self._original_trace_function)




