In [50]:
import networkx as nx 
import numpy as np
import pandas as pd 
import community 
from itertools import compress
from scipy.spatial.distance import pdist, squareform
from scipy.cluster.hierarchy import fcluster
from scipy.cluster import hierarchy
import matplotlib.pyplot as plt 
import seaborn as sns
from seaborn import color_palette, set_style, palplot
plt.style.use('ggplot')
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

## Export 2009 and 2015 data for gephi

#### 1. First, define some functions

###### Some thing important, for presentation purpuse, we add layer_dummy as edge attribute, so it is not going to be a generalized template

In [51]:
def preprocess(df):
    keep_var = ['countrycode','counterpart_code','country','counterpart','year','total_claims',
                'total_claims_ratio','country_dummy_link']
    df = df[keep_var]                           ## keep only used variables 
    df = df.replace(np.nan,0)                   ## turn na to zero 
    num = df._get_numeric_data()
    num[num < 0] = 0                            ## turn negative to zero 
    #df['total'] = df['total_claims']

#     df['Banking'] = df['loans_dep']
#     df['Equity'] = df['CDIS_IADE']+ df['CPIS_IAPE']
#     df['Debt'] = df['CDIS_IADD']+ df['CPIS_IAPD']

#     mata = ['countrycode','counterpart_code','country','counterpart','year','layer_dummy']
#     var_org = ['CDIS_IAD','CPIS_IAP','loans_dep','total']
#     var_sum_out = ['CDIS_Sum_out','CPIS_Sum_out','loans_dep_Sum_out','total_Sum_out']
#     var_sum_in = ['CDIS_Sum_in','CPIS_Sum_in','loans_dep_Sum_in','total_Sum_in']
#     var_weight = ['CDIS_weight','CPIS_weight','loans_dep_weight','total_weight']

#     df[var_sum_out]= df.groupby(['countrycode','year'])[var_org].transform(sum)           ## like stata egen sum 
#     df[var_sum_in]= df.groupby(['counterpart_code','year'])[var_org].transform(sum)        ## like stata egen sum 
#     df_weight = pd.DataFrame((df[var_org].values / df[var_sum_out].values)*100,columns=[var_weight])
#     df[var_weight] = df_weight                                                        ## create the weight variables 
#     mata.extend(var_weight)
#     df = df[mata]

    df.fillna(0,inplace=True)
    
    return df 

def export_gephi(df,year,var):
    ## clean the data first 
    df_y = df[df['year']==year]
    df_y.fillna(0,inplace=True)
    df_y = df_y[df_y[var]>0]
    #G = nx.from_pandas_dataframe(df_y, source="country", target="counterpart", edge_attr=var,create_using=nx.DiGraph())
    G = nx.from_pandas_dataframe(df_y, source="counterpart", target="country", edge_attr=[var,'country_dummy_link','total_claims_ratio'],create_using=nx.DiGraph())
    get_pagerank_centrality(G,var)
    nx.set_node_attributes(G, 'country_dummy', 0.0)
    nx.set_node_attributes(G, 'country_dummy', country_dummy_node)
    nx.write_gexf(G, "../result/gexf/"+var+str(year)+".gexf")
    return G
    
def get_pagerank_centrality(G,var):
    p = nx.pagerank(G,weight=var)
    nx.set_node_attributes(G, 'pagerank_centrality', p) 
    

#### 2. read stata data and export it 

In [52]:
## read pre-processed data from stata
df = pd.read_stata('../data/agg_ratio_with_dummy.dta')
df = preprocess(df)

In [53]:
df.head()

Unnamed: 0,countrycode,counterpart_code,country,counterpart,year,total_claims,total_claims_ratio,country_dummy_link
0,111.0,456.0,United States,Saudi Arabia,2009,8209.0,0.000629,2.0
1,111.0,172.0,United States,Finland,2009,75817.367188,0.005814,4.0
2,111.0,698.0,United States,Zimbabwe,2009,68.0,5e-06,4.0
3,111.0,273.0,United States,Mexico,2009,206413.0,0.015828,4.0
4,111.0,233.0,United States,Colombia,2009,18160.0,0.001393,4.0


In [54]:
country_dummy_node = ["China, P.R.: Mainland", "United States", "United Kingdom", "Swaziland", "Belgium",
                      "France", "Germany", "Italy", "Japan", "Ireland", "Spain",
                      "Luxembourg", "Netherlands", "China, P.R.: Hong Kong"]
country_dummy_node={c:v for c,v in zip(country_dummy_node,[1.0]*len(country_dummy_node))}

In [55]:
## export all files to gephi
files = [(2009,'total_claims'),(2015,'total_claims')]
for x in files:
    year,var = x 
    G = export_gephi(df,year,var)

{'Afghanistan, Islamic Republic of': {'country_dummy': 0,
  'pagerank_centrality': 0.0007302278573225256},
 'Albania': {'country_dummy': 0, 'pagerank_centrality': 0.000819299742480642},
 'Algeria': {'country_dummy': 0, 'pagerank_centrality': 0.0007747588494695256},
 'American Samoa': {'country_dummy': 0,
  'pagerank_centrality': 0.00073268728118854},
 'Andorra': {'country_dummy': 0, 'pagerank_centrality': 0.0007431059848257687},
 'Angola': {'country_dummy': 0, 'pagerank_centrality': 0.0009498268326595597},
 'Anguilla': {'country_dummy': 0,
  'pagerank_centrality': 0.0010204200806731205},
 'Antigua and Barbuda': {'country_dummy': 0,
  'pagerank_centrality': 0.000709123180479836},
 'Argentina': {'country_dummy': 0,
  'pagerank_centrality': 0.0012630386839044788},
 'Armenia, Republic of': {'country_dummy': 0,
  'pagerank_centrality': 0.0007188864647653233},
 'Aruba': {'country_dummy': 0, 'pagerank_centrality': 0.000738112648565891},
 'Australia': {'country_dummy': 0, 'pagerank_centrality'

In [8]:
# ## export all files to gephi
# files = [(2009,'total_weight'),(2009,'loans_dep_weight'),(2009,'CDIS_weight'),(2009,'CPIS_weight'),
#          (2015,'total_weight'),(2015,'loans_dep_weight'),(2015,'CDIS_weight'),(2015,'CPIS_weight')]
# for x in files:
#     year,var = x 
#     G = export_gephi(df,year,var)