In [1]:
import numpy as np
import scipy.sparse as sparse
from scipy.io import mmread
from scipy.linalg import expm, norm
from timeit import default_timer as timer

In [2]:
# download datasets
dol = mmread('dolphins.mtx').tocsr()
fb = mmread('facebook.mtx').tocsr()
pa = mmread('PA.mtx').tocsr()

In [3]:
# get dolphin names dictionary
dol_names = 'dolphins_nodename.txt'
f = open(dol_names, 'r') # read names text file

names = [] #create names list
for i in f:
    names.append(i.split()[0])
    
keys = list(range(len(names))) # create dictionary where keys are node number and vals are names
names_dict = dict(zip(keys,names))

In [4]:
# Arnoldi algorithm

# matrix A for which exp(A)b is of interest, n x n
# b initial vector to be used, length n
# m, the produced Krylov subspace will have dimension m

def arnoldi_iteration(A, m: int): 
    b  = np.ones(A.shape[0])  # b default to be 1_n
    
    n = A.shape[0]
    h = np.zeros((m + 1, m)) # to become the m x m upper Hessenberg matrix consisting of the coefficients h_ij
    V = np.zeros((n, m + 1)) # to become the orthonormal basis V_m = [v_1, v_2, ..., v_m]
    v = b / norm(b) # makes v a unit 2-norm vector    
    V[:, 0] = v # use v as the first Krylov vector
    
    for j in range(m):
        w = A @ v  # compute candidate vector
        
        for i in range(j + 1):
            h[i, j] = V[:,i] @ w # h_ij-th element is product of v_i and w
            w = w - h[i, j] * V[:, i] # modified Gram-Schmidt
            
        h[j + 1, j] = norm(w)
        
        zero = 1e-12 # small value used as h_ij = 0 threshold
        if h[j + 1, j] > zero: # if nonzero add v to the basis
            v = w / h[j + 1, j]
            V[:, i + 1] = v
        else: 
            return V, h 
        # print('step',j,'out of',m) # to check how far along algorithm is for larger m
    return V, h

In [5]:
# get Vm and Hm from output
def get_m(Vm1,Hm1,m):
    Vm, Hm = sparse.csc_matrix(Vm1[:,0:m]), sparse.csc_matrix(Hm1[0:m,0:m])
    return Vm, Hm

In [6]:
# get approximation from arnoldi result
def a_approximation(A, Vm, Hm, m):
    # get beta = ||v||_2
    b = np.ones(A.shape[0])
    beta = norm(b)

    # get unit vector
    e1 = np.zeros((m,1))
    e1[0] = 1
    e1 = sparse.csc_matrix(e1)
    
    X = beta * Vm @ expm(Hm) @ e1
    return X

In [7]:
########## RESULTS

In [8]:
# dolphins

m = 25

dolV, dolH = arnoldi_iteration(dol, m)
V, H = get_m(dolV,dolH,m)
dol_res = a_approximation(dol, V, H,m)

# check H doen't go to all zeros for this m with PA by looking at last column
H[-1].todense()

matrix([[ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  1.59088851, -0.70742389]])

In [9]:
# facebook

m = 100

fbV, fbH = arnoldi_iteration(fb, m)
V, H = get_m(fbV,fbH,m)
fb_res = a_approximation(fb, V, H,m)

# check H doen't go to all zeros for this m with PA by looking at last column
H[-1].todense()

matrix([[0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0

In [10]:
# pennsylvania

m = 100

paV, paH = arnoldi_iteration(pa, m)
V, H = get_m(paV,paH,m)
pa_res = a_approximation(pa, V, H,m)

# check H doen't go to all zeros for this m with PA by looking at last column
H[-1].todense()

matrix([[0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0

In [None]:
# Results (node counting from 1 not 0)

In [11]:
# Dolphins: get most and least central nodes for 

In [18]:
print('Central-most node is', dol_res.argmax() +1,', with value',dol_res.max(),',which represents', names_dict[dol_res.argmax()])

Central-most node is 15 , with value 2314.949993033023 ,which represents Grin


In [19]:
print('Least central node is', dol_res.argmin() +1,', with value',dol_res.min(),',which represents', names_dict[dol_res.argmin()])

Least central node is 61 , with value 33.41912649100891 ,which represents Zig


In [14]:
# Facebook: get most and least central nodes

In [23]:
print('Central-most node is', fb_res.argmax() +1,', with value',fb_res.max())

Central-most node is 1913 , with value 4.9056691536452307e+70


In [25]:
print('Least central node is', fb_res.argmin() +1,', with value',fb_res.min())

Least central node is 693 , with value 1.8690838011151774e+58


In [26]:
# PA: get most and leasat central nodes

In [27]:
print('Central-most node is', pa_res.argmax() +1,', with value',pa_res.max())

Central-most node is 847933 , with value 188.05911061957053


In [28]:
print('Least central node is', pa_res.argmin() +1,', with value',pa_res.min())

Least central node is 664 , with value 0.9999999999999964


In [15]:
########## DOLPHIN ERROR PLOT

In [16]:
########## FACEBOOK ERROR PLOT

In [17]:
########## PA TIMINGS BAR CHART 