#### Eigenvalue Centrality

In [339]:
import numpy as np

#define adjecency matrix
A = [[0,1,0,0,0,0],[1,0,0,1,0,0],[0,0,0,1,0,0],[0,1,1,0,1,1],[0,0,0,1,0,0],[0,0,0,1,0,0]]

graph = { "A":["B"],
          "B":["A","D"],
          "C":["D"],
          "D":["B,C,E,F"],
          "E":["D"],
          "F":["D"],
        }

#convert to mumpy array, calculate it's eigenvectors and eignevalues
A = np.matrix(A)
lam = np.linalg.eig(A)

#find max eigenvalue and it's corresponding vecotor
eigenvalue = lam[0].max()
eigenvector = lam[1][0] #results not ordered, 1st is only all-positive

#turn matrix into an array
C = np.squeeze(np.asarray(eigenvector))
index = ["A","B","C","D","E","F"] #index the nodes of the matrix
C = dict(zip(index,C))

#We can see that some of the centrality values are 0, and in order to improve on that we can use Katz centrality. 
beta = 0.2
alpha = 1/eigenvalue - 0.1

#C_katz = beta *(I - alpha*A.T)^-1 * 1
C_katz = (beta * np.linalg.inv(np.identity(6) - alpha * np.transpose(A) * D)).dot(np.ones(6)) 
C_katz = np.squeeze(np.asarray(C_katz))
C_katz = dict(zip(index,C_katz))
C_katz
##D is still most central node, followed by B, the single-connected C,E,F and A as least central
#This measures centrality more accuratly

{'A': -0.0014452322990098632,
 'B': -0.2636115573515258,
 'C': -0.26305935251057544,
 'D': -0.30298010945305931,
 'E': -0.26305935251057544,
 'F': -0.26305935251057538}

#### Pagerank

In [340]:
a = np.asarray([1,2,1,4,1,1])
D = np.zeros((6, 6), int)
np.fill_diagonal(D, a)

C_pagerank = (beta * np.linalg.inv((np.identity(6) - alpha * np.transpose(A)).dot(D))).dot(np.ones(6))

C_pagerank = np.squeeze(np.asarray(C_pagerank))
C_pagerank = dict(zip(index,C_pagerank))
C_pagerank

{'A': 0.56536135916031816,
 'B': 0.478112466525697,
 'C': 0.74020756364398432,
 'D': 0.35345824649225055,
 'E': 0.74020756364398443,
 'F': 0.74020756364398432}

This produces some interesting results, indicating that E,F and C has similar centralities (as expected),
however node D has smallest centralities, as we specified that it has 4 outgoing links this might've had an effect. 
This raises an issue of working with non-directional graphs.

In [341]:
#http://www.python-course.eu/graphs_python.php

The information about the distance allows us to measure betweenety as a 
centrality measure, identifying most "in-between" node as an optimum node for the office.

In [343]:
#we design matrix G, which will indicate the distance to every other node based on the network graph
G = [[0,14,50,26,43,41],[14,0,36,12,29,27],[50,36,0,24,41,39],[26,12,24,0,17,15],[43,29,41,17,0,32],[41,27,39,15,32,0]]
G = np.matrix(G)
G

matrix([[ 0, 14, 50, 26, 43, 41],
        [14,  0, 36, 12, 29, 27],
        [50, 36,  0, 24, 41, 39],
        [26, 12, 24,  0, 17, 15],
        [43, 29, 41, 17,  0, 32],
        [41, 27, 39, 15, 32,  0]])

#### Closeness Centrality

In [345]:
import pandas as pd

C_C = 82/G.sum(axis=1)
C_C_df = pd.DataFrame(C_C)

#turn to dict
C_C = np.squeeze(np.asarray(C_C_df))
C_C = dict(zip(index,C_C))
print(C_C)


#ranking order, in reverse order?
ab = C_C_df.rank().values
c = []
for i in range(6):
    c.append(int(ab[i][0]))
dict(zip(index,c))



{'A': 0.47126436781609193, 'E': 0.50617283950617287, 'F': 0.53246753246753242, 'B': 0.69491525423728817, 'D': 0.87234042553191493, 'C': 0.43157894736842106}


{'A': 2, 'B': 5, 'C': 1, 'D': 6, 'E': 3, 'F': 4}

From this centrality measurment we can determine that D is the optimal centrality, 
followed by B with the rest of nodes having roughly the same values
##### Population

Given the population, intuitevly node B would be optimal, as if consider clustering the neighbours,
cluster on the left is valued at 90k, and right cluster valued at 29k

In order to have final conclusion, we should consider both, the population and the distance, 
by incorporating it into the calulations. 

In [349]:
population = {"A":90000,"B":10000,"C":8500,"D":15000,"E":1200,"F":5000}

pop = np.asarray([90000,10000,8500,15000,12000,5000])
#normalize
pop = pop/max(pop)
P = np.zeros((6,6), int)
np.fill_diagonal(P,pop)

#Weight the pagerank centrality by population.
C_pop_df = (beta * np.linalg.inv((np.identity(6) - alpha * np.transpose(A)).dot(D))).dot(pop)
#convert to dict
C_pop = np.squeeze(np.asarray(C_pop_df))
C_pop = dict(zip(index,C_pop))
C_pop


{'A': 0.28745725445163312,
 'B': 0.11444670486647687,
 'C': 0.11572632331832812,
 'D': 0.063360811791216096,
 'E': 0.12350410109610591,
 'F': 0.10794854554055033}

This indicates the heaviest weight on node A, as previously expected. However, the range of the values is smaller 
then for centrality measure, indicating that they can't be weghted correspondingly , therefore using
appropriate constatnt to standardize the values.

Depending on the circumstances such as cost of travel, frequency of calls, average number of specialists, ect,
we can use this formula to determine the optimum location of the office, by contributing a significance factor 
which sums up to 1. 

In our example we contribute 50/50 proportion of significance to both factors

In [372]:
const = 0.32 

a = 0.5 #distance factor
b = 0.5 #population factor

C_C_df = np.matrix(C_C_df)
C_opt = a * C_C_df + np.transpose(b * C_pop_df/ const)
C_opt

matrix([[ 0.68478414],
        [ 0.5262806 ],
        [ 0.39661185],
        [ 0.53517148],
        [ 0.44606158],
        [ 0.43490337]])

To conclude with, node A seems most appropriate for the office, despite node D being most connected. This conclusion is reached after considering distance to and population of each city, giving first preference to A and second to D. 

As pointed up above, by adjusting the factors significance, optimal decision could be found. 