In [None]:
import networkx as nx 
import numpy as np
import pandas as pd 
import community 
from itertools import compress
import matplotlib.pyplot as plt 
import seaborn as sns
from seaborn import color_palette, set_style, palplot
plt.style.use('ggplot')
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

In [2]:
def preprocess(df):
    keep_var = ['countrycode','counterpart_code','country','counterpart','year','CDIS_IAD','CPIS_IAP','loans_dep']
    df = df[keep_var]                           ## keep only used variables 
    df = df.replace(np.nan,0)                   ## turn na to zero 
    num = df._get_numeric_data()
    num[num < 0] = 0                            ## turn negative to zero 
    df['total'] = df[['CDIS_IAD','CPIS_IAP','loans_dep']].sum(axis=1)

    mata = ['countrycode','counterpart_code','country','counterpart','year']
    var_org = ['CDIS_IAD','CPIS_IAP','loans_dep','total']
    var_sum_out = ['CDIS_Sum_out','CPIS_Sum_out','loans_dep_Sum_out','total_Sum_out']
    var_sum_in = ['CDIS_Sum_in','CPIS_Sum_in','loans_dep_Sum_in','total_Sum_in']
    var_weight = ['CDIS_weight','CPIS_weight','loans_dep_weight','total_weight']

    df[var_sum_out]= df.groupby(['countrycode','year'])[var_org].transform(sum)           ## like stata egen sum 
    df[var_sum_in]= df.groupby(['counterpart_code','year'])[var_org].transform(sum)        ## like stata egen sum 
    df_weight = pd.DataFrame((df[var_org].values / df[var_sum_out].values)*100,columns=[var_weight])
    df[var_weight] = df_weight                                                        ## create the weight variables 
    mata.extend(var_weight)
    df = df[mata]
    df.fillna(0,inplace=True)
    
    return df 

def export_gephi(df,year,var):
    ## clean the data first 
    df_y = df[df['year']==year]
    df_y.fillna(0,inplace=True)
    df_y = df_y[df_y[var]>0]
    G = nx.from_pandas_dataframe(df_y, source="country", target="counterpart", edge_attr=[var],create_using=nx.DiGraph())
    #get_hierarchy_cluster(G,var)                                   ## add hierarchy_cluster to node attribute
    get_nx_community(G,var)                                        ## add nx community detection to node attribute
    get_eigen_centrality(G,var)
    get_contagion_data(G,'../result/contagion/country_match.xlsx','Sheet2') ## merge contagion data to G 
    
    nx.write_gexf(G, "../result/gexf/"+var+str(year)+".gexf")
    
    return G

def get_nx_community(G,var):
#algorism: https://sites.google.com/site/findcommunities/
#package: http://perso.crans.org/aynaud/communities/
    
    ## use adj matrix + its invert, so the edge will be the sum of in and out edge weight 
    node_list = G.nodes()
    node_list.sort()
    A = nx.to_numpy_matrix(G = G,nodelist=node_list,weight=var)
    ud_M = A + A.T 
    ud_G = nx.from_numpy_matrix(ud_M)
    ## relable node to country name 
    maplist = dict(zip(ud_G.nodes(), node_list))
    ud_G = nx.relabel_nodes(ud_G,maplist) 
    l_community = community.best_partition(ud_G,weight='weight',resolution=1)
    nx.set_node_attributes(G, 'nx_community', l_community)
    
def get_eigen_centrality(G,var):
        ## eigenvector centrality
    e = nx.eigenvector_centrality_numpy(G,weight=var)
    nx.set_node_attributes(G, 'eigenvector_centrality', e) 
    

#### merge contaigion data go G

In [3]:
def load_cont_data(file,sheet):
    df_c = pd.read_excel(file,sheet)
    df_c['step0'] = (df_c['time step']==0).astype(int)
    df_c['step1'] = df_c['time step'].isin([0,1]).astype(int)
    df_c['step2'] = df_c['time step'].isin([0,1,2]).astype(int)
    df_c['step3'] = df_c['time step'].isin([0,1,2,3]).astype(int)
    df_c['step4'] = df_c['time step'].isin([0,1,2,3,4]).astype(int)
    df_c['step5'] = df_c['time step'].isin([0,1,2,3,4,5]).astype(int)
    df_c['step6'] = df_c['time step'].isin([0,1,2,3,4,5,6]).astype(int)
    
    return df_c

def create_steps(G,df_c):
    G_nodes= pd.DataFrame(G.nodes(),columns=['country'])
    merge_c=df_c[['country','step0','step1', 'step2','step3', 'step4', 'step5','step6']]
    G_merged = pd.merge(G_nodes,merge_c,on='country',how='left')
    G_merged.loc[G_merged.country=='United States',['step0','step1', 'step2','step3', 'step4', 'step5','step6']] = 1
    G_merged.fillna(0,inplace=True)

    return G_merged

def merge_steps(G,G_merged):
    steps = ['step0','step1', 'step2','step3', 'step4', 'step5','step6']
    for s in steps:
        con_dict = dict(zip(G_merged['country'],G_merged[s]))
        con_dict = {key: int(value) for (key,value) in con_dict.items()}
        nx.set_node_attributes(G, s, con_dict)

## run all processes  
def get_contagion_data(G,file,sheet):
    df_c = load_cont_data(file,sheet)
    G_merged = create_steps(G,df_c)
    merge_steps(G,G_merged)
    
    

#### Load network

In [4]:
## read pre-processed data from stata
df = pd.read_stata('../data/0_CPIS_CDIS_BIS_USTIC_merged_fixed1.dta')
df = preprocess(df)

In [5]:
G = export_gephi(df,2015,'total_weight')

In [None]:
## export all files to gephi
files = [(2009,'total_weight'),(2009,'loans_dep_weight'),(2009,'CDIS_weight'),(2009,'CPIS_weight'),
         (2015,'total_weight'),(2015,'loans_dep_weight'),(2015,'CDIS_weight'),(2015,'CPIS_weight')]
for x in files:
    year,var = x 
    G = export_gephi(df,year,var)