In [None]:
%matplotlib inline
import os
import community
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import networkx as nx
import math
import json

from copy import deepcopy
from collections import defaultdict

In [None]:
os.chdir('/Users/Jaynt/Documents/Acads/Globalization of Water/Project/Matrices/Raw/')

In [None]:
NET_DENSITY = []
DEGREE_DIS = []
LINK_WEIGHT = []
CLUST_COEFF = []
BET_CENT = []
ASSORT = []
MODULARITY = []
PARTITIONS = []
STRENGTH = []

In [None]:
# Helper function to plot Node Degree Distribution
def plotDis(values,pl):
    #Sort and get the index
    xSortedInd = sorted(range(len(values)),key=lambda i:values[i], reverse=True)
    xSortedVal = [values[i] for i in xSortedInd]
    # Min positive value in the list
    minPos = min(filter(lambda x: x>0, xSortedVal))             
    pl.plot([math.log(i+1) for i in range(len(xSortedVal))],[math.log(j) if j>0 else math.log(minPos + (2e-308))
                                                             for j in xSortedVal])

In [None]:
# Name of the input file which contains the adjacency matrix for the particular year and group
file = "2014A.csv"

In [None]:
# Read in file and create a pandas dataframe
df = pd.read_csv(file)
df.head()

In [None]:
rowSums = list(df.apply(sum, axis=1))

if 'g' in file:
    delRowInd = [i for i,j in enumerate(rowSums) if j<10e6]
else:
    delRowInd = [i for i,j in enumerate(rowSums) if j<10e5]

names = list(df.columns)
mapping = {i: names[i] for i in range(len(names))}

for i in delRowInd:
    df.drop(i,inplace=True)
df.head(5)

In [None]:
delCols = [mapping[i] for i in delRowInd]
for name in delCols:
     df.drop(name, inplace=True, axis=1)
df.head(5)

In [None]:
# Create a dictionary of names and their indices and convert the data frame to a matrix
df2 = deepcopy(df) #Deepcopy data frame for weighted network analysis
df[df>0] = 1
adjMat = df.as_matrix()

#Update the names of countries and the dictionary
names = list(df.columns)
mapping = {i: names[i] for i in range(len(names))}

In [None]:
# Create adjacency matrix unweighted and undirected for calculating betweenness centrality
newMat = np.zeros((len(adjMat), len(adjMat)))
for i in range(len(adjMat)):
        for j in range(len(adjMat)):
            if adjMat[i, j] == 1 or adjMat[j, i] == 1:
                newMat[i,j] = 1
len(newMat)

In [None]:
# Find indices of columns/rows with all zeros. Nodes not connected anywhere
# Delete rows/columns for those countries
zeroID = np.where(~newMat.any(axis=0))
newMat = np.delete(newMat, zeroID, axis = 0)
newMat = np.delete(newMat, zeroID, axis = 1)
len(newMat)

In [None]:
# Create a list of names, delete names of countries not connected anywhere
names = [i for j, i in enumerate(names) if j not in zeroID]
print(len(names))
mapping = {i: names[i] for i in range(len(names))}

In [None]:
D = nx.Graph(newMat)

In [None]:
# Calculate betweenness centrality of the graph
betCen = nx.betweenness_centrality(D,normalized= True)
print betCen[132], mapping[132]
fig, pl = plt.subplots()
plotDis(betCen,pl)
plt.title("Betweenness Centrality")
sortInd = sorted(range(len(betCen)),key=lambda i:betCen[i], reverse=True)
print [mapping[i] for i in sortInd[:37]], [betCen[i] for i in sortInd[:37]] #Top 20 Betweenness Central countries

In [None]:
# Calculate clustering coefficients
clust = nx.clustering(D)
fig, pl = plt.subplots()
plotDis(clust, pl)
plt.title("Clustering Coefficient")
sortInd = sorted(range(len(clust)),key=lambda i:clust[i], reverse=True)
print [mapping[i] for i in sortInd[:20]], [clust[i] for i in sortInd[:20]] #Top 20 Closenss Central countries

In [None]:
assort = nx.degree_assortativity_coefficient(D)
assort

In [None]:
#first compute the best partition
import community
partition = community.best_partition(D)
#drawing
size = float(len(set(partition.values())))
pos = nx.spring_layout(D)
count = 0.
for com in set(partition.values()) :
    count = count + 1.
    list_nodes = [nodes for nodes in partition.keys()
                                if partition[nodes] == com]
    nx.draw_networkx_nodes(D, pos, list_nodes, node_size = 20,
                                node_color = str(count / size))


nx.draw_networkx_edges(D,pos, alpha=0.5)
#plt.show()

In [None]:
# Number of partitions
partitions = len(set(partition.values()))
modularity = community.modularity(partition, D)
print partitions, modularity

In [None]:
numNodes = len(mapping)
numEdges = sum(sum(adjMat))
netDensity = (1.*numEdges)/(numNodes*(numNodes-1))

In [None]:
inDegree = adjMat.sum(axis = 0)
outDegree = adjMat.sum(axis = 1)
totDegree = newMat.sum(axis=1)/2
print "Node Degree (Total) =", 1.*sum(totDegree)/len(totDegree)
print "Node Degree (In) =", 1.*sum(inDegree)/len(inDegree)
print "Node Degree (Out) =", 1.*sum(outDegree)/len(outDegree)

In [None]:
print len(adjMat[1])
names = df.columns
mapping = {i: names[i] for i in range(len(names))}
print len(mapping)
sortInd = sorted(range(len(totDegree)),key=lambda i:totDegree[i], reverse=True)
print [mapping[i] for i in sortInd[:20]], [totDegree[i] for i in sortInd[:20]]
fig, pl = plt.subplots()
plotDis(totDegree, pl)
plt.title("Node Degree distribution")

In [None]:
# Weighted Adjacency Matrix for node strength distribution
adjMatWt = df2.as_matrix()
zeroID = np.where(~adjMatWt.any(axis=0))[0]
adjMatWt = np.delete(adjMatWt, zeroID, axis=0)
adjMatWt = np.delete(adjMatWt, zeroID, axis=1)
names = df2.columns
names = [i for j, i in enumerate(names) if j not in zeroID]
print names
mapping = {i: names[i] for i in range(len(names))}

In [None]:
inStr = adjMatWt.sum(axis = 0)
outStr = adjMatWt.sum(axis = 1)
totStr = inStr + outStr
print len(totStr)
print mapping[128]
print totStr[131]
sortInd = sorted(range(len(totStr)),key=lambda i:totStr[i], reverse=True)
print [mapping[i] for i in sortInd[:10]], [totStr[i] for i in sortInd[:20]]
fig, pl = plt.subplots()
plotDis(totStr, pl)
plt.title("Node Strength Distribution")

In [None]:
linkWeight = list(adjMatWt.flat)
linkWeight = [x+1 for x in linkWeight]
fig, pl = plt.subplots()
plotDis(linkWeight, pl)
plt.title("Link Weight Distribution")

In [None]:
# Network properties (collect for 14 years)
NET_DENSITY.append(netDensity)
DEGREE_DIS.append(totDegree)
LINK_WEIGHT.append(linkWeight)
CLUST_COEFF.append(clust)
BET_CENT.append(betCen)
ASSORT.append(assort)
MODULARITY.append(modularity)
PARTITIONS.append(partitions)
STRENGTH.append(totStr)

In [None]:
NET_DENSITY=[np.array(x).tolist() for x in NET_DENSITY]
DEGREE_DIS = [x.tolist() for x in DEGREE_DIS]
STRENGTH = [x.tolist() for x in STRENGTH]
All = [NET_DENSITY, DEGREE_DIS, LINK_WEIGHT, CLUST_COEFF, BET_CENT, ASSORT, MODULARITY, PARTITIONS,STRENGTH]

In [None]:
#parts = [[mapping[i] for i,j in partition.items() if j==k] for k in set(partition.values())]

In [None]:
print (len(NET_DENSITY), len(DEGREE_DIS), len(LINK_WEIGHT), len(CLUST_COEFF), len(BET_CENT), len(ASSORT), 
         len(MODULARITY), len(PARTITIONS))

In [None]:
# save data in json format, for plotting in ggplot
out = '../../g6.txt'
with open(out, 'w') as outfile:
    json.dump(All, outfile)