In [55]:
#February 8, 2021
#Andy Liu (HMC '23) - investigating usage of different centrality measures on a UN treaty graph

In [3]:
import pandas as pd 
import os
import csv
import networkx as nx
import matplotlib
import matplotlib.pyplot as plt
import numpy as np 
from scipy import sparse

In [5]:

#special = ['United States of America', 'Germany', 'United Kingdom of Great Britain and Northern Ireland', 'China', 'Russian Federation', 'France', 'Brazil', 'South Africa', 'Nigeria']

#for future filtering usage
class Treaty():

    def __init__(self, number, title, bilateral, reg_date, participants):
        
        self.number = number
        self.title = title
        self.reg_date = reg_date
        self.bilateral = bilateral
        self.participants = participants

    def get_number(self):
        return(self.number)
    def __str__(self):
        return(self.title)
    def get_date(self):
        return(self.reg_date)
    def is_bilateral(self):
        return(self.bilateral)
    def get_participants(self):
        return(self.participants)
#return coordinates of each country

In [6]:
treaties_csv = '/Users/andyliu/develop/sideprojects/un_treaty_network/treaties-merged.csv'
treaties = []
countries_dic = {}
countries_dic_num = {}
#read treaties into a list
with open(treaties_csv, encoding='utf-8') as f:
    readCSV = csv.reader(f)
    next(readCSV)
    for row in readCSV:

        participantsstring = row[4][1:-1]
        participants = (participantsstring.replace('\'', '')).split(",")
        for i in range(1,len(participants)):
            participants[i] = participants[i][1:]

        treaty = Treaty(row[0], row[1], row[2], row[3], participants)
        treaties.append(treaty)

In [7]:
#create dictionary
for treaty in treaties:
    participants = treaty.get_participants()
    for i in range(0,len(participants)):
        if i<len(participants)-1:
            for j in range(i,len(participants)):
                country_i = participants[i]
                country_j = participants[j]
                try:
                    countries_dic[country_i][country_j] = countries_dic[country_i][country_j].append(treaty)
                    countries_dic_num[country_i][country_j]+=1
                except (KeyError, AttributeError):
                    try:
                        countries_dic[country_i][country_j] = [treaty]
                        countries_dic_num[country_i][country_j] = 1
                    except KeyError:
                        countries_dic[country_i] = {country_j:[treaty]}
                        countries_dic_num[country_i] = {country_j:1}

                try:
                    countries_dic[country_j][country_i] = countries_dic[country_j][country_i].append(treaty)
                    countries_dic_num[country_j][country_i]+=1
                except (KeyError, AttributeError):
                    try:
                        countries_dic[country_j][country_i] = [treaty]
                        countries_dic_num[country_j][country_i] = 1
                    except KeyError:
                        countries_dic[country_j] = {country_i:[treaty]}
                        countries_dic_num[country_j] = {country_i:1}

In [10]:
#add weights (for compatability with networkx)
for k, d in countries_dic_num.items():
    for ik in d:
        d[ik] = {'weight': d[ik]}

G = nx.from_dict_of_dicts(countries_dic_num)

In [45]:
n_top = 5

In [47]:
degree_centrality = nx.degree_centrality(G)
dc = {k: v for k, v in sorted(degree_centrality.items(), key=lambda item: item[1], reverse=True)[:n_top]}
#Germany, France, UK, Netherlands, USA top 5 by degree centrality
dc

{'Germany': 0.8020833333333333,
 'France': 0.7916666666666666,
 'United Kingdom of Great Britain and Northern Ireland': 0.7864583333333333,
 'Netherlands': 0.78125,
 'United States of America': 0.7291666666666666}

In [48]:
eigenvector_centrality = nx.eigenvector_centrality_numpy(G)
ec = {k: v for k, v in sorted(eigenvector_centrality.items(), key=lambda item: item[1], reverse=True)[:n_top]}
ec
#Germany, Netherlands, UK, France, USA

{'Germany': 0.16140333660878148,
 'Netherlands': 0.16104584644952136,
 'United Kingdom of Great Britain and Northern Ireland': 0.16087862621372223,
 'France': 0.1599939462848011,
 'United States of America': 0.15399079873121022}

In [49]:
katz_centrality = nx.katz_centrality_numpy(G, alpha=0.1)
{k: v for k, v in sorted(katz_centrality.items(), key=lambda item: item[1], reverse=True)[:n_top]}
#seems broken...

{'"Democratic Peoples Republic of Korea"': 0.19780751706933117,
 'Republic of Korea': 0.19780751706933117,
 'Mongolia': 0.1820691251429529,
 'Albania': 0.13544422770377648,
 'Romania': 0.13292683130989122}

In [50]:
katz_centrality = nx.katz_centrality_numpy(G, alpha=0.01)
kc = {k: v for k, v in sorted(katz_centrality.items(), key=lambda item: item[1], reverse=True)[:n_top]}
#because our alpha was too high
#(must be less than reciprocal of max. eigenvalue)
#Germany, UK, France, Netherlands, USA
kc

{'Germany': 0.14154994106502547,
 'United Kingdom of Great Britain and Northern Ireland': 0.14050916502427843,
 'France': 0.1404901949588748,
 'Netherlands': 0.14004841706898433,
 'United States of America': 0.1343829620627288}

In [37]:
#pagerank_centrality = nx.pagerank_numpy(G, alpha=0.85)
#deprecated?

In [51]:
closeness_centrality = nx.closeness_centrality(G)
cc = {k: v for k, v in sorted(closeness_centrality.items(), key=lambda item: item[1], reverse=True)[:n_top]}
#Germany, France, UK, Netherlands, USA
cc

{'Germany': 0.8275862068965517,
 'France': 0.8205128205128205,
 'United Kingdom of Great Britain and Northern Ireland': 0.8170212765957446,
 'Netherlands': 0.8135593220338984,
 'United States of America': 0.7804878048780488}

In [52]:
betweenness_centrality = nx.betweenness_centrality(G)
bc = {k: v for k, v in sorted(betweenness_centrality.items(), key=lambda item: item[1], reverse=True)[:n_top]}
#France, Germany, Netherlands, UK, USA
bc

{'France': 0.06319258048899991,
 'Germany': 0.060142366547652266,
 'Netherlands': 0.05993321128248173,
 'United Kingdom of Great Britain and Northern Ireland': 0.058129965320823165,
 'United States of America': 0.047952794486176537}

In [53]:
#TODO: compare performance between countries -> use to theorize?

In [54]:
#France does better on closeness/betweenness, Germany on rest