In [23]:
import networkx as nx 
import numpy as np
import pandas as pd 
import community 
from itertools import compress
import matplotlib.pyplot as plt 
import seaborn as sns
from seaborn import color_palette, set_style, palplot
plt.style.use('ggplot')
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

In [24]:
def preprocess(df):
    
    df['total'] = df['total_claims']  ## total_claims are already created in stata, so i just rename it 
    keep_var = ['countrycode','counterpart_code','country','counterpart','year','total','layer_dummy',
                'total_claims_ratio','country_dummy','country_dummy_link']
    df = df[keep_var]                           ## keep only used variables 
    df = df.replace(np.nan,0)                   ## turn na to zero 
    num = df._get_numeric_data()
    num[num < 0] = 0                            ## turn negative to zero 
    
#     mata = ['countrycode','counterpart_code','country','counterpart','year','layer_dummy']
#     var_org = ['CDIS_IADE','CDIS_IADD','CPIS_IAPE','CPIS_IAPD','loans_dep','total']
#     var_sum_out = [e+'_Sum_out' for e in var_org]
#     var_sum_in = [e+'_Sum_in' for e in var_org]
#     var_weight = [e+'_weight' for e in var_org]

#     df[var_sum_out]= df.groupby(['countrycode','year'])[var_org].transform(sum)           ## like stata egen sum 
#     df[var_sum_in]= df.groupby(['counterpart_code','year'])[var_org].transform(sum)        ## like stata egen sum 
#     df_weight = pd.DataFrame((df[var_org].values / df[var_sum_out].values)*100,columns=[var_weight])
#     df[var_weight] = df_weight                                                        ## create the weight variables 
#     mata.extend(var_weight)
#     df = df[mata]

    df.fillna(0,inplace=True)
    
    return df 



def get_nx_community(G,var):
#algorism: https://sites.google.com/site/findcommunities/
#package: http://perso.crans.org/aynaud/communities/
    
    ## use adj matrix + its invert, so the edge will be the sum of in and out edge weight 
    node_list = G.nodes()
    node_list.sort()
    A = nx.to_numpy_matrix(G = G,nodelist=node_list,weight=var)
    ud_M = A + A.T 
    ud_G = nx.from_numpy_matrix(ud_M)
    ## relable node to country name 
    maplist = dict(zip(ud_G.nodes(), node_list))
    ud_G = nx.relabel_nodes(ud_G,maplist) 
    l_community = community.best_partition(ud_G,weight='weight',resolution=1)
    nx.set_node_attributes(G, 'nx_community', l_community)
    
def get_eigen_centrality(G,var):
        ## eigenvector centrality
    e = nx.eigenvector_centrality_numpy(G,weight=var)
    nx.set_node_attributes(G, 'eigenvector_centrality', e) 
    
def get_pagerank_centrality(G,var):
    p = nx.pagerank(G,weight=var)
    nx.set_node_attributes(G, 'pagerank_centrality', p) 
    

#### merge contaigion data go G

In [25]:
def load_cont_data(file,sheet):
    df_c = pd.read_excel(file,sheet)
    time_steps = df_c['time step'].unique()
    for sp in time_steps:
        df_c['step'+str(sp)] = df_c['time step'].isin(list(range(sp+1))).astype(int)
    
    return df_c

def load_cont_single_layer_data(df_cm,layer):
    df_c = df_cm[df_cm[' layer']==layer]
    time_steps = df_c['time step'].unique()
    for sp in time_steps:
        df_c['step'+str(sp)] = df_c['time step'].isin(list(range(sp+1))).astype(int)
    
    return df_c

def create_steps(G,df_c,time_steps):
    G_nodes= pd.DataFrame(G.nodes(),columns=['country'])
    var_list = ['step'+str(v) for v in time_steps]
    var_list.append('country')
    merge_c=df_c[var_list]
    G_merged = pd.merge(G_nodes,merge_c,on='country',how='left')
    var_list.remove('country')
    G_merged.loc[G_merged.country=='United States',var_list] = 1
    G_merged.fillna(0,inplace=True)

    return G_merged

def merge_steps(G,G_merged,time_steps):
    steps = ['step'+str(v) for v in time_steps]
    for s in steps:
        con_dict = dict(zip(G_merged['country'],G_merged[s]))
        con_dict = {key: int(value) for (key,value) in con_dict.items()}
        nx.set_node_attributes(G, s, con_dict)

######################
## run all processes##
######################

def get_contagion_data(G,file,sheet):
    df_c = load_cont_data(file,sheet)
    time_steps = df_c['time step'].unique()
    G_merged = create_steps(G,df_c,time_steps)
    merge_steps(G,G_merged,time_steps)
    
def get_contagion_single_layer(G,df_cm,layer):
    df_c = load_cont_single_layer_data(df_cm,layer)
    time_steps = df_c['time step'].unique()
    G_merged = create_steps(G,df_c,time_steps)
    merge_steps(G,G_merged,time_steps)  
    
def get_contagion_multi_layer(G,df_cm,layer):
    pass


 

In [31]:
###############################
### export the entire process##
###############################

def export_gephi(df,year,var,file,sheet):
    ## clean the data first 
    df_y = df[df['year']==year]
    df_y.fillna(0,inplace=True)
    df_y = df_y[df_y[var]>0]
    G = nx.from_pandas_dataframe(df_y, source="country", target="counterpart", edge_attr=[var,'layer_dummy'],create_using=nx.DiGraph())
    #get_hierarchy_cluster(G,var)                                   ## add hierarchy_cluster to node attribute
    get_nx_community(G,var)                                        ## add nx community detection to node attribute
    get_eigen_centrality(G,var)
    get_contagion_data(G,file,sheet) ## merge contagion data to G 
    
    nx.write_gexf(G, "../result/gexf/"+var+str(year)+".gexf")
    
    return G

def export_single_layer_contagion(df,year,var,df_cm,layer):
    ## clean the data first 
    df_y = df[df['year']==year]
    df_y.fillna(0,inplace=True)
    df_y = df_y[df_y[var]>0]
    G = nx.from_pandas_dataframe(df_y, source="country", target="counterpart", edge_attr=[var],create_using=nx.DiGraph())
    #get_hierarchy_cluster(G,var)                                   ## add hierarchy_cluster to node attribute
    #get_nx_community(G,var)                                        ## add nx community detection to node attribute
    get_eigen_centrality(G,var)
    get_contagion_single_layer(G,df_cm,layer) ## merge contagion data to G 
    
    nx.write_gexf(G, "../result/gexf/"+'multi_layer_'+var+str(year)+".gexf")
    
    return G

def export_multi_layer_contagion(df,file,sheet,year,var):
    df_c = pd.read_excel(file,sheet)
    steps = df_c.columns.values.copy()[2:]
    df_y = df[df['year']==year]
    df_y.fillna(0,inplace=True)
    df_y = df_y[df_y[var]>0]
    #G = nx.from_pandas_dataframe(df_y, source="country", target="counterpart", edge_attr=[var],create_using=nx.DiGraph())
    G = nx.from_pandas_dataframe(df_y, source="counterpart", target="country", edge_attr=[var,'layer_dummy',
                'total_claims_ratio','country_dummy','country_dummy_link'],create_using=nx.DiGraph())
    G_nodes= pd.DataFrame(G.nodes(),columns=['country'])
    G_merged = pd.merge(G_nodes,df_c,on='country',how='left')
    G_merged.fillna(0,inplace=True)
    for s in steps:
        con_dict = dict(zip(G_merged['country'],G_merged[s]))
        con_dict = {key: int(value) for (key,value) in con_dict.items()}
        nx.set_node_attributes(G, s, con_dict)
    
    #get_eigen_centrality(G,var)
    get_pagerank_centrality(G,var)
    
    nx.write_gexf(G, "../result/gexf/"+'multi_layer_'+var+str(year)+".gexf")
    return G


#### Load network

In [32]:
## read pre-processed data from stata
df = pd.read_stata('../data/agg_ratio_with_dummy.dta')   ## use v 4 data 
df = preprocess(df)

In [33]:
df.columns

Index(['countrycode', 'counterpart_code', 'country', 'counterpart', 'year',
       'total', 'layer_dummy', 'total_claims_ratio', 'country_dummy',
       'country_dummy_link'],
      dtype='object')

##### Export aggregated layer 

In [16]:
## export aggregate layer for 2015 shock on US
year = 2015
sheet = 'Agg_US_15'
G = export_gephi(df,year,'total_weight','../data/country_match.xlsx',sheet)

In [26]:
G.node['United States']

{'eigenvector_centrality': 0.579030546001884,
 'nx_community': 0,
 'step0': 1,
 'step1': 1,
 'step2': 1,
 'step3': 1,
 'step4': 1,
 'step5': 1,
 'step6': 1}

#### Export multi layer with step dummy from excel

In [34]:
file = '../data/country_match.xlsx'
sheet = 'multi_steps_15'
year = 2015
var = 'total'
G = export_multi_layer_contagion(df,file,sheet,year,var)


In [35]:
G.node['United States']

{'pagerank_centrality': 0.14048378291662422,
 'step0': 3,
 'step1': 3,
 'step10': 1,
 'step11': 1,
 'step2': 3,
 'step3': 3,
 'step4': 3,
 'step5': 1,
 'step6': 1,
 'step7': 1,
 'step8': 1,
 'step9': 1}

#### Export multi layer one by one 

In [9]:
layer_map = {0:'CDIS_IADE_weight',1:'CDIS_IADD_weight',2:'CPIS_IAPE_weight',3:'CPIS_IAPD_weight',4:'loans_dep_weight'}
df_cm = pd.read_excel('../result/contagion/country_match.xlsx','Multi_US_09')    # pass in file name and sheet name

In [10]:
for key,value in layer_map.items():
    print((key,value))
    G = export_single_layer_contagion(df,2009,value,df_cm,key)
    print(G.node['United States'])

(0, 'CDIS_IADE_weight')
{'step2': 1, 'step5': 1, 'step1': 1, 'eigenvector_centrality': 0.320305390008162, 'step4': 1, 'step3': 1, 'step0': 1}
(1, 'CDIS_IADD_weight')
{'step2': 1, 'step1': 1, 'eigenvector_centrality': 0.5481888373116015, 'step4': 1, 'step3': 1, 'step0': 1}
(2, 'CPIS_IAPE_weight')
{'step2': 1, 'step5': 1, 'step1': 1, 'eigenvector_centrality': 0.7700268832310154, 'step4': 1, 'step3': 1, 'step0': 1}
(3, 'CPIS_IAPD_weight')
{'step2': 1, 'step1': 1, 'eigenvector_centrality': 0.6775382537376167, 'step4': 1, 'step3': 1, 'step0': 1}
(4, 'loans_dep_weight')
{'step2': 1, 'step1': 1, 'eigenvector_centrality': 0.5020122030386542, 'step4': 1, 'step3': 1, 'step0': 1}
