In [1]:
import os
os.chdir("..")

import pathpy as pp
from pathpy import Network
import matplotlib.pyplot as plt
from typing import TYPE_CHECKING, Dict, List, Tuple, Union, Any
import scipy.sparse
from scipy.sparse import linalg as spl
import numpy as np

from collections import defaultdict # think these two are used to iterate through dicts
from collections.abc import Iterable
from __future__ import annotations
from typing import TYPE_CHECKING, Union, Dict, Tuple



In [2]:
def alpha_S(ind, T_t, ps_t):
    '''Calculate the persistence probability of a set of nodes S 
    ind : list of indices
    T_t : sparse transposed transition matrix
    ps_t = stationary probability column vector'''
    T_t = T_t[ind,:][:,ind] # have to do it this way but unsure why
    ps_t = ps_t[ind]
    return T_t.dot(ps_t).sum()/ps_t.sum()
    

    # sample networks to work with 
    # apparently this has ill-defined transition probability at b so I need teleportation
    net = pp.Network(directed=True)
    net.add_edge('a', 'x',weight=2)
    net.add_edge('x', 'b')
    net.add_edge('a','c')
    net.add_edge('x','b')
    net.add_edge('x','f')
    net.add_edge('c','x')
    # make less sparse - not sure if necessary
    net.add_edge('a','f')
    net.add_edge('x','y')
    net.add_edge('y','c')
    net.add_edge('c','y')
    net.add_edge('c','b')
    network = net


In [15]:
# from paths dataset - 1% sample to start
flight_paths= pp.Paths.read_file("Data/US flights 2011/US flights od.ngram", separator=',', frequency=True)
net = pp.Network.from_paths(flight_paths)

2021-04-05 11:41:22 [Severity.INFO]	Reading ngram data ... 
2021-04-05 11:41:22 [Severity.INFO]	finished. Read 358 paths with maximum length 6
2021-04-05 11:41:22 [Severity.INFO]	Calculating sub path statistics ... 
2021-04-05 11:41:23 [Severity.INFO]	finished.


In [16]:
net

# Core-periphery for $\mathcal{M}_1$ network
Quite helpful: https://github.com/IngoScholtes/kdd2018-tutorial/blob/master/solutions/1_2_pathpy.py <br>
For directed and weighted networks. But needs to be connected because uses stationary distributions. For now take reduced connected network and look at teleportation later if possible. <br>
Node strength $s_i$ is the sum of the weights of the links connected to that node
1. Start at the node with the minimum total node strength $s^{in}_i+s^{out}_i$ and set $S=v_i$
2. Add a node to $S$ such that it creates the smallest increase in persistence probability \begin{equation}
\alpha_S = \frac{\sum_{i,j \in S}p_i^*T_{ij} }{\sum_{i \in S}p_i^*}
\end{equation}
3. Then the $\alpha$-periphery is the set of nodes $S_{\alpha}$ satisfying $\alpha_i \leq \alpha$ and you can tune by $\alpha$
Choose uniformly if you get have multiple nodes with the same strength etc

In [17]:
# need a dict with indexes and need strengths
A = net.adjacency_matrix(weighted=True)
out_degrees = np.squeeze(np.asarray(A.sum(1))) #Â sum each row (defined my own to be weighted)
in_degrees = np.squeeze(np.asarray(A.sum(0))) # sum each column
node_strengths = out_degrees+in_degrees

In [18]:
# reduce network to remove hanging nodes (or else T will be ill-defined)
# look into teleportation later
if (out_degrees == 0).any():
    ind = np.where(out_degrees == 0)[0] 
    hanging_nodes = []
    hanging_nodes = []
    for i in ind: hanging_nodes.append(list(net.nodes.keys())[i])
    for node in hanging_nodes : net.remove_node(node)
    
net

In [19]:
# recalculate A etc.
A = net.adjacency_matrix(weighted=True)
N = A.shape[0]
out_degrees = np.squeeze(np.asarray(A.sum(1))) #Â sum each row (defined my own to be weighted)
in_degrees = np.squeeze(np.asarray(A.sum(0))) # sum each column
node_strengths = out_degrees+in_degrees
A.todense()

matrix([[ 0., 47.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 1.,  0.,  0., ...,  0.,  0.,  0.],
        ...,
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.]])

In [20]:
# CP algorithm needed
# find apha of si_min
i_min = np.where(node_strengths == node_strengths.min())[0] 
i_min = np.random.choice(i_min,1)[0] # randomly choose one if there's multiple mins
s0 = list(net.nodes.keys())[i_min] # node to initalise CP algorithm

T_t = net.transition_matrix()
T = T_t.transpose()

_,ps = scipy.sparse.linalg.eigs(T,k=1,which='LM',tol=0) # look into this a little i.e. L&R eigenvec
ps_t = ps.real #Â need to check about complex outputs

In [21]:
# (greedy) algorithm to test which node will create smallest increase in alpha_s
# S: peripheral nodes
# do nodes need to be connected?
# at end select nodes with coreness below alpha_c
# turn into a function and add to my_functions

nodes_ind = np.arange(0,N)

node_dict = {}
i=0
for node in net.nodes.keys():
    node_dict.update({i:node})
    i=i+1
    
S = [i_min]
alpha = alpha_S(S,T_t,ps_t) # alpha_S is zero for any single node
nodes = np.delete(nodes_ind,i_min)
coreness = dict()
coreness.update({node_dict[i_min]:0.0})

while (len(nodes)>0): # & (alpha < alpha_c) # if you want to cut off S at point

    alpha_test = np.empty(N)
    alpha_test[:] = np.nan

    # edit to only consider connected nodes
    for node in nodes:
        S_test = S + [node]
        alpha_test[node] = alpha_S(S_test,T_t,ps_t) 

    node_min = np.where(alpha_test == np.nanmin(alpha_test))[0] 
    node_min = np.random.choice(node_min,1)[0]
    alpha = alpha + alpha_test[node_min]
    coreness.update({node_dict[node_min]:alpha_test[node_min]})
    
    S = S + [node_min]
    nodes = nodes[nodes!=node_min]
    
# assign a coreness value to each node
for node, val in coreness.items():
    net.nodes[node]['coreness']=val

In [47]:
# style from https://github.com/IngoScholtes/csh2018-tutorial/blob/master/solutions/2_pathpy.ipynb
#style = { 'node_color' : {'a': '#aacc99', 'b': '#aacc99', 'd': '#aacc99', 'e': '#aacc99', 'c': '#cc6666'}}
alpha_c = 0.05
style={}
style['node_color']={v:'lightskyblue' if u < alpha_c else 'darkorange' for v,u in coreness.items()}
pp.visualisation.plot(net, **style)

In [44]:
net.nodes

{'inweight': 5730.0,
 'outweight': 6616.0,
 'indegree': 59,
 'outdegree': 64,
 'coreness': 1.0}

In [None]:
# colour nodes by coreness value

    # to check T working as it should 
    ones = np.ones(4) # generalise to get size automatically later
    ones
    T.dot(ones) #Â check this is satisfied => should have an eigenvalue unity now
    
    # laplacian for later
    L = net.laplacian_matrix(weighted=True)
    Ldense = L.todense()

# Centrality functions

In [2]:
## from pathpy repo

def eigenvector_centrality(network: Network, # arg name and type (didn't know you could do this)
                           weight: Union[str, bool, None] = None,
                           alpha=0.85,
                           **kwargs: Any) -> dict:
    """Calculates the eigenvector centrality of all nodes.

    Parameters
    ----------
    network : Network

        The :py:class:`Network` object that contains the network

    Examples
    --------
    Compute eigenvector centrality in a simple network

    >>> import pathpy as pp
    >>> net = pp.Network(directed=True)
    >>> net.add_edge('a', 'x')
    >>> net.add_edge('x', 'b')
    >>> c = pp.algorithms.centralities.eigenvector_centrality(net)
    >>> c['a']
    1
    """
    evcent: dict = dict()
        
    A = network.adjacency_matrix(weighted=True, transposed=True) # changed this for my version
    N = A.shape[0] # total nodes
    I = scipy.sparse.identity(N) # identity
    ev = spl.inv(I - alpha*A).dot(ones)
    
    if kwargs:
        _, ev = spl.eigs(A, k=1, which='LM', **kwargs) # which='LM' means find biggest, kwargs if changing eigenvec to test
    else:
        _, ev = spl.eigs(A, k=1, which='LM') #Â only returns vec for stationary distributino
    ev = ev.reshape(ev.size, ) # makes it a column vector I think
    S = np.sum(ev)
    
    for v, deg in network.nodes.items():
        evcent[v] = np.real(ev[network.nodes.index[v.uid]]/S) 
    for v in network.nodes:
        evcent[v.uid] = np.real(ev[network.nodes.index[v.uid]]/S) 
    return evcent


# so last few lines: take eigenvector corresponding to largest eigenvalue (1) of adjacency matrix of net
# reshape this vector to a column(?) vector. evcent is an empty dictionary. Then define for each key 1.uid 
#Â (not sure what uid stands for...) the values is the real part of the stationary probability averaged over 
#Â all the stationary probabilities and these will always sum to 1

## Trying to come generalise PageRank like in $(181)$

\begin{equation}
p^* = \frac{1-\alpha}{N}\begin{pmatrix}1 & ... & 1 \end{pmatrix} \begin{pmatrix} I - \alpha T \end{pmatrix}^{-1}
\end{equation}

My main issues are keeping it efficient, use scipy.sparse etc. and also keeping track of the indices...except now that I think about it all that matters are the indices of $p^*=(p_1^*...p_N^*)$ so it's ok. <br><br>
T is defined wrong here, it is meant to be $T_{ij} = \frac{A_{ij}}{k_i^{\text{out}}}$., just going to assume $A_{ij}$ defines a path from $i \to j$ but might need to change this. Hence each row will need to be divide by it's sum to get $T$.

In [169]:

def my_pagerank(network,alpha = 0.85):

    node_names = list(network.nodes.keys())

    A = network.adjacency_matrix(weighted=True, transposed=False) # unsure about transpose
    N = A.shape[0] # total nodes
    I = scipy.sparse.identity(N) # identity
    ones = np.ones(N)
    Ap = spl.inv(I - alpha*A)
    ev = (1-alpha)*Ap.dot(ones)/N
    S = sum(ev)
    ev = ev/S # normalised for some reason - might not be correct I need to check
    evcents = list(zip(node_names, zip(ev)))
    
    return evcents

I should be looking at PageRank because that's for directed networks (https://stats.stackexchange.com/questions/176874/pagerank-vs-eigenvector-centrality) PageRank uses in-degree of nodes specifically so is a special case of eigenvector centrality.

In [168]:
A = network.adjacency_matrix(weighted=True, transposed=False) # unsure about transpose
print(A)

  (0, 1)	1.0
  (0, 3)	1.0
  (1, 2)	1.0
  (3, 1)	1.0


In [197]:
out_deg = A.sum(1)
out_deg = np.squeeze(np.asarray(out_deg))

In [200]:
print(A)

  (0, 1)	1.0
  (0, 3)	1.0
  (1, 2)	1.0
  (3, 1)	1.0


In [201]:
out_deg = scipy.sparse.spdiags(out_deg, 0, out_deg.size, out_deg.size)
out_deg.dot(A)



TypeError: no supported conversion for types: (dtype('O'),)

In [165]:
network

In [164]:
my_pagerank(network)

[('a', (0.4674229310154322,)),
 ('x', (0.1816998760021117,)),
 ('b', (0.09821614919033063,)),
 ('c', (0.25266104379212556,))]

In [160]:
pp.algorithms.centralities.pagerank(network)

{'a': 0.12045209234069676,
 'x': 0.3175410567582492,
 'b': 0.39036291569760717,
 'c': 0.1716439352034471}

My calculated results are different to the results of the PageRank function, but which is more accurate? <br>
Neither perform very well tbh. It's clear that b's centrality is too high because it has no out-degree ( can't see function so don't know if that transition matrix has been set up yet, I must fix that actually).

In [162]:
network

In [94]:
A = net.adjacency_matrix()
print(A)

  (0, 1)	1.0
  (1, 2)	1.0


In [19]:
at = a.reshape(a.size,)
at

array([ 0.00000000e+000, -2.68678569e+154,  1.48219694e-323,
        0.00000000e+000,  0.00000000e+000,  4.17201348e-309])

In [20]:
at.shape

(6,)