In [62]:
clear




In [92]:
case = 'Case1'

In [63]:
import os
import csv
import numpy as np
import pandas as pd 

Set working directory to where the file is located

In [126]:
os.chdir(r"C:\Users\chris\Desktop\MA\coding\coding\m6\centrality_analysis")
os.getcwd()

'\path'

#### Derive edgelist from File edge.csv

In [65]:
import networkx as nx
DG=nx.DiGraph()

In [66]:
def reader(filename2):
    for (lineno, line) in enumerate(open(filename2)):
        if lineno > 0: # skip header
            yield line

filename2 = "edges.csv"
(from_, to_, capac) = zip(*( row for row in csv.reader(reader(filename2))))

l_edges = list(zip(from_, to_, capac))

for f,t, c in l_edges:
    DG.add_edge(f,t)
    DG.edge[f][t]['capacity'] = int(c)
    DG.edge[f][t]['weight'] = int(c)

#### Derive nodelist from File nodes.csv

In [67]:
def reader(filename):
    for (lineno, line) in enumerate(open(filename)):
        if lineno > 0: # skip header
            yield line

filename = "nodes.csv"
(l_nodes, change) = zip(*( row for row in csv.reader(reader(filename))))

d_nodes = dict(zip(l_nodes, change))

#### Derive two incidence matrices from DG() for (A) weighted, export to incidencematrix.csv and (B) unweighted, to use for max-flow algorithm

///weighted Incidence Matrix

In [192]:
def incidence_matrix(A, nodelist=None, edgelist=None, 
                     oriented=True, weight='weight'):
    if nodelist is None:
        nodelist = l_nodes
    if edgelist is None:
        edgelist = l_edges
    A = np.zeros((len(nodelist),len(edgelist)))
    node_index = dict( (node,i) for i,node in enumerate(nodelist) )
    for ei,e in enumerate(edgelist):
        (u,v) = e[:2]
        if u == v: continue  # self loops give zero column
        try:
            ui = node_index[u]
            vi = node_index[v]
        except KeyError:
            raise NetworkXError('node %s or %s in edgelist '
                                'but not in nodelist"%(u,v)')
        if weight is None:
            wt = 1
        else:
            if DG.is_multigraph():
                ekey = e[2]
                wt = DG[u][v][ekey].get(weight,1)
            else:
                wt = DG[u][v].get(weight,1)
        if oriented:
            A[ui,ei] = -wt
            A[vi,ei] = wt
        else:
            A[ui,ei] = wt
            A[vi,ei] = wt
    return np.asmatrix(wA)

In [193]:
wA = incidence_matrix("wA")


In [194]:
df = pd.DataFrame(A)
df.to_csv("01_incidencematrix.csv", index=False, header=False)

///unweighted Incidence Matrix

In [195]:
def incidence_matrix(A, nodelist=None, edgelist=None, 
                     oriented=True, weight=None):
    if nodelist is None:
        nodelist = l_nodes
    if edgelist is None:
        edgelist = l_edges
    A = np.zeros((len(nodelist),len(edgelist)))
    node_index = dict( (node,i) for i,node in enumerate(nodelist) )
    for ei,e in enumerate(edgelist):
        (u,v) = e[:2]
        if u == v: continue  # self loops give zero column
        try:
            ui = node_index[u]
            vi = node_index[v]
        except KeyError:
            raise NetworkXError('node %s or %s in edgelist '
                                'but not in nodelist"%(u,v)')
        if weight is None:
            wt = 1
        else:
            if DG.is_multigraph():
                ekey = e[2]
                wt = DG[u][v][ekey].get(weight,1)
            else:
                wt = DG[u][v].get(weight,1)
        if oriented:
            A[ui,ei] = -wt
            A[vi,ei] = wt
        else:
            A[ui,ei] = wt
            A[vi,ei] = wt
    return np.asmatrix(A)

In [196]:
A = incidence_matrix("A")

#### Run centrality measures on nx.DG (directed graph)

Degree centrality; depends on the amount of connections (the same for all cases as the network is the same unless certain edges are zero)

In [114]:
degree = nx.degree_centrality(DG)

Betweennes centrality; accounts acting as a bridge, gatekeeper or broker

In [115]:
betweenness = nx.betweenness_centrality(DG, k=None, normalized=True, weight='weight', endpoints=False, seed=None)

Closeness centrality; A central node is one, that is close, on average, to other nodes => use inverse of weights?

In [116]:
closeness = nx.closeness_centrality(DG, u=None, distance='weight', normalized=True)

#### Create dictionary for export as 02_centrality.csv

In [117]:
from collections import defaultdict
output = defaultdict(list)

In [118]:
for d in (degree, betweenness, closeness):
    for key, value in d.items():
        output[key].append(value)


In [124]:
centrality = pd.DataFrame.from_dict(output, orient="index")
centrality.to_csv("02_centrality.csv")

#### Calculate max flow of the network (add [S] and [T], add outer arcs to model network flow)

In [127]:
def reader(filename):
    for (lineno, line) in enumerate(open(filename)):
        if lineno > 0: # skip header
            yield line

filename = "nodes.csv"
(l_nodes, change) = zip(*( row for row in csv.reader(reader(filename))))

d_nodes = dict(zip(l_nodes, change))

Create arcs based on whether the change in balance is negative (CR) or positive (DR)

In [129]:
for k, v in d_nodes.items():
    
    if float(v) < 0:
        DG.add_edge('S',k)
        #nx.set_edge_attributes(DG, 'capacity', {('S',k): abs(int(v))})

    if float(v) > 0:
        DG.add_edge(k,'T')
        #nx.set_edge_attributes(DG, 'capacity', {(k, 'T'): abs(int(v))})

#### Run algorithm to determine max flow of the network

In [132]:
print('no. of nodes:',DG.number_of_nodes())
print('no. of edges:',DG.number_of_edges())
print('maximum flow:',nx.maximum_flow_value(DG, s='S', t='T'))
print('minimum cut: ',nx.minimum_cut_value(DG, s="S", t="T"))

no. of nodes: 16
no. of edges: 29
maximum flow: 580
minimum cut:  580


In [138]:
maximum_flow = nx.maximum_flow(DG, 'S', 'T')

#### Derive vector y from max-flow data

In [158]:
ytemp = []

In [162]:
for x, z, y in l_edges:
    try:
        ytemp.append(maximum_flow[1][x][z])
    except:
        pass

In [164]:
y = np.array(ytemp).reshape(len(l_edges),1)

#### Calculate implied changes in balances; y * A = impx

Dummy vector with dimension [no. of transactions] by [1] to multiply with weighted incidence matrix. Yields implied changes (impx) in balances to be used for calculating a loss.

In [214]:
dummy = np.empty(len(l_edges))
dummy.fill(1)
dummy_vector = dummy.reshape(len(l_edges), 1)

In [215]:
impx = wA * dummy_vector

#### Derive actual x (actx) from d_nodes, deduct implied x (impx) and export it as 03_impx.csv)

In [392]:
int_change = []
for value in change:
    int_change.append(int(value))
actx = np.array(int_change).reshape(len(l_nodes), 1)

In [393]:
delta = actx - impx
deltasqrd = np.square(delta)

In [394]:
df_actx = pd.DataFrame(data = actx, index = l_nodes)
df_impx = pd.DataFrame(data = impx, index = l_nodes)
df_delta = pd.DataFrame(data = delta, index = l_nodes)
df_deltasqrd = pd.DataFrame(data = deltasqrd, index = l_nodes)

In [398]:
delta_output = pd.concat([df_actx, df_impx, df_delta, df_deltasqrd], axis=1)

In [399]:
delta_output.columns = ['actx','impx','delta','deltasqrd']

In [401]:
delta_output.to_csv("03_impx.csv")