# Compute graph kernels

In [49]:
import networkx as nx
import numpy as np
import logging

logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logging.debug("test")

import os 
dir_path = os.path.dirname(os.path.realpath('__file__'))

DEBUG:root:test


In [50]:
H = nx.read_gml(dir_path+'/04_unit_testing/_graph.gml', label='id')

In [51]:
def csv_labeled_matrix_to_nparray(path):
    # Import matrix from csv file and remove headers
    m = np.genfromtxt(path, delimiter=',')
    return np.array([[x for x in a if ~np.isnan(x)] for a in m[1:]])

## commuteTimeKernel

Computes the conmute-time kernel, which is the expected time of going back and forth between a couple of nodes. If the network is connected, then the commute time kernel will be totally dense, therefore reflecting global properties of the network. For further details, see [Yen, 2007]. This kernel can be computed using both the unnormalised and normalised graph Laplacian.

In [52]:
def commute_time_kernel(G, normalized = False):
    if nx.is_directed(G):
        sys.exit('graph must be undirected')
        
    if not normalized:
        L = nx.laplacian_matrix(G).toarray()
    else:
        L = nx.normalized_laplacian_matrix(G).toarray()
        
    # pseudo-inverse (moore-penrose)
    ans = np.linalg.pinv(L)

    return ans

In [58]:
def test_commute_time_kernel(G, test_matrix):
    M = commute_time_kernel(G)
    logging.info(' %s  \n %s\n', 'Computed matrix', M)
    logging.info(' %s  \n %s\n', 'Test matrix', test_matrix)
    
    # Assert rounded similarity (floating comma)
    assert np.allclose(M, test_matrix)
    logging.info('Test commute_time_kernel passed')
    
test_commute_time_kernel(H, csv_labeled_matrix_to_nparray(dir_path+'/04_unit_testing/commuteTimeKernel.csv'))

INFO:root: Computed matrix  
 [[ 0.13585449  0.01349958  0.00707249 ...  0.00140383 -0.00238913
  -0.00211733]
 [ 0.01349958  0.09948265  0.00573008 ...  0.0325679  -0.00379415
   0.00052156]
 [ 0.00707249  0.00573008  0.05223533 ... -0.00223114  0.01331363
  -0.0036509 ]
 ...
 [ 0.00140383  0.0325679  -0.00223114 ...  0.39279541 -0.00579927
  -0.00742603]
 [-0.00238913 -0.00379415  0.01331363 ... -0.00579927  0.37149228
  -0.00921487]
 [-0.00211733  0.00052156 -0.0036509  ... -0.00742603 -0.00921487
   0.35492096]]

INFO:root: Test matrix  
 [[ 0.13585449  0.01349958  0.00707249 ...  0.00140383 -0.00238913
  -0.00211733]
 [ 0.01349958  0.09948265  0.00573008 ...  0.0325679  -0.00379415
   0.00052156]
 [ 0.00707249  0.00573008  0.05223533 ... -0.00223114  0.01331363
  -0.0036509 ]
 ...
 [ 0.00140383  0.0325679  -0.00223114 ...  0.39279541 -0.00579927
  -0.00742603]
 [-0.00238913 -0.00379415  0.01331363 ... -0.00579927  0.37149228
  -0.00921487]
 [-0.00211733  0.00052156 -0.0036509  ...

## diffusionKernel

Computes the classical diffusion kernel that involves matrix exponentiation. It has a "bandwidth" parameter σ^2 that controls the extent of the spreading. Quoting [Smola, 2003]: K(x1,x2) can be visualized as the quantity of some substance that would accumulate at vertex x2 after a given amount of time if we injected the substance at vertex x1 and let it diffuse through the graph along the edges. This kernel can be computed using both the unnormalised and normalised graph Laplacian.

In [None]:
Mt = nx.read_gml(csv_labeled_matrix_to_nparray(dir_path+'/04_unit_testing/_graph.gml'), label='id')