In [77]:
import networkx as nx 
import numpy as np
import pandas as pd 
import community 
from itertools import compress
from scipy.spatial.distance import pdist, squareform
from scipy.cluster.hierarchy import fcluster
from scipy.cluster import hierarchy
import matplotlib.pyplot as plt 
import seaborn as sns
from seaborn import color_palette, set_style, palplot
plt.style.use('ggplot')
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

## Export 2009 and 2015 data for gephi

#### 1. First, define some functions

In [78]:
def export_gephi(df,year,var):
    ## clean the data first 
    df_y = df[df['year']==year]
    df_y.fillna(0,inplace=True)
    df_y = df_y[df_y.total>0]
    G = nx.from_pandas_dataframe(df_y, source="country", target="counterpart", edge_attr=[var],create_using=nx.DiGraph())
    get_hierarchy_cluster(G,var)                                   ## add hierarchy_cluster to node attribute
    get_nx_community(G,var)                                        ## add nx community detection to node attribute
    nx.write_gexf(G, "../data/"+var+str(year)+".gexf")
    return G

def get_hierarchy_cluster(G,var):
    node_list = G.nodes()
    node_list.sort()
    A = nx.to_numpy_matrix(G = G,nodelist=node_list,weight=var)
    M = pdist(A, 'cosine')                          # it will return a vector, this is using cosine distance
    M = squareform(M)    
    NaNs = np.isnan(M)
    M[NaNs] = 1   
    Z = hierarchy.average(M)
    k=4
    clusters = fcluster(Z, k, criterion='maxclust')                          ## one kind of auto cluster selection 
    hierarchy_cluster = {k: int(v)-1 for k, v in zip(node_list, clusters)}   ## make it start from 0 
    nx.set_node_attributes(G, 'hierarchy_cluster', hierarchy_cluster)

def get_nx_community(G,var):
#algorism: https://sites.google.com/site/findcommunities/
#package: http://perso.crans.org/aynaud/communities/
    
    ## use adj matrix + its invert, so the edge will be the sum of in and out edge weight 
    node_list = G.nodes()
    node_list.sort()
    A = nx.to_numpy_matrix(G = G,nodelist=node_list,weight=var)
    ud_M = A + A.T 
    ud_G = nx.from_numpy_matrix(ud_M)
    ## relable node to country name 
    maplist = dict(zip(ud_G.nodes(), node_list))
    ud_G = nx.relabel_nodes(ud_G,maplist) 
    l_community = community.best_partition(ud_G,weight='weight',resolution=1)
    nx.set_node_attributes(G, 'nx_community', l_community)

#### 2. read stata data and export it 

In [79]:
## read pre-processed data from stata
df = pd.read_stata('../data/0_CPIS_CDIS_BIS_USTIC_merged_fixed1.dta')
keep_var = ['countrycode','counterpart_code','country','counterpart','year','CDIS_IAD','CPIS_IAP','loans_dep']
df = df[keep_var]
df['total']=df.CDIS_IAD + df.CPIS_IAP+df.loans_dep

In [80]:
## export all files to gephi
files = [(2009,'total'),(2009,'loans_dep'),(2015,'total'),(2015,'loans_dep')]
for x in files:
    year,var = x 
    G = export_gephi(df,year,var)