In [49]:
import networkx as nx 
import numpy as np
import pandas as pd 
import community 
from itertools import compress
from scipy.spatial.distance import pdist, squareform
from scipy.cluster.hierarchy import fcluster
from scipy.cluster import hierarchy
import matplotlib.pyplot as plt 
import seaborn as sns
from seaborn import color_palette, set_style, palplot
plt.style.use('ggplot')
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

## Export 2009 and 2015 data for gephi

#### 1. First, define some functions

###### Some thing important, for presentation purpuse, we add layer_dummy as edge attribute, so it is not going to be a generalized template

In [72]:
def preprocess(df):
    keep_var = ['countrycode','counterpart_code','country','counterpart','year','CDIS_IAD','CPIS_IAP','loans_dep','layer_dummy']
    df = df[keep_var]                           ## keep only used variables 
    df = df.replace(np.nan,0)                   ## turn na to zero 
    num = df._get_numeric_data()
    num[num < 0] = 0                            ## turn negative to zero 
    df['total'] = df[['CDIS_IAD','CPIS_IAP','loans_dep']].sum(axis=1)

#     mata = ['countrycode','counterpart_code','country','counterpart','year','layer_dummy']
#     var_org = ['CDIS_IAD','CPIS_IAP','loans_dep','total']
#     var_sum_out = ['CDIS_Sum_out','CPIS_Sum_out','loans_dep_Sum_out','total_Sum_out']
#     var_sum_in = ['CDIS_Sum_in','CPIS_Sum_in','loans_dep_Sum_in','total_Sum_in']
#     var_weight = ['CDIS_weight','CPIS_weight','loans_dep_weight','total_weight']

#     df[var_sum_out]= df.groupby(['countrycode','year'])[var_org].transform(sum)           ## like stata egen sum 
#     df[var_sum_in]= df.groupby(['counterpart_code','year'])[var_org].transform(sum)        ## like stata egen sum 
#     df_weight = pd.DataFrame((df[var_org].values / df[var_sum_out].values),columns=[var_weight])
#     df[var_weight] = df_weight                                                        ## create the weight variables 
#     mata.extend(var_weight)
#     df = df[mata]
    df.fillna(0,inplace=True)
    
    return df 

def export_gephi(df,year,var):
    ## clean the data first 
    df_y = df[df['year']==year]
    df_y.fillna(0,inplace=True)
    df_y = df_y[df_y[var]>0]
    G = nx.from_pandas_dataframe(df_y, source="country", target="counterpart", edge_attr=[var,'layer_dummy'],create_using=nx.DiGraph())
    #get_hierarchy_cluster(G,var)                                   ## add hierarchy_cluster to node attribute
    #get_nx_community(G,var)                                        ## add nx community detection to node attribute
    #get_eigen_centrality(G,var)
    #nx.write_gexf(G, "../result/gexf/"+var+str(year)+".gexf")
    return G

    

#### Read stata data

In [73]:
## read pre-processed data from stata
df = pd.read_stata('../data/agg_v4_layer_dummy.dta')
df = preprocess(df)

#### Show US to UK link

In [74]:
df[(df.country == 'United States') & (df.counterpart == 'United Kingdom')&(df.year == 2015)]

Unnamed: 0,countrycode,counterpart_code,country,counterpart,year,CDIS_IAD,CPIS_IAP,loans_dep,layer_dummy,total
7,111.0,112.0,United States,United Kingdom,2015,717895.0,1244554.0,614951.0,4.0,2577400.0


##### Load to Graph

In [76]:
## export all files to gephi
files = [(2015,'total')]
for x in files:
    year,var = x 
    G = export_gephi(df,year,var)

In [77]:
var = 'total_weight'
c = nx.pagerank(G,weight=var)
c_df = pd.DataFrame(list(c.items()),columns=['country','centrality'])      ## make it into dataframe
c_df.sort_values(by='centrality',ascending=0,inplace=True)                 ## sort it 
c_df.head(20)

Unnamed: 0,country,centrality
159,India,0.013614
187,Italy,0.01326
74,Belgium,0.013076
94,Switzerland,0.012999
160,United Kingdom,0.01296
96,Canada,0.012791
2,Luxembourg,0.012723
117,France,0.012683
181,Isle of Man,0.012492
71,Austria,0.012439


#### US to UK link in the graph

In [45]:
G['United States']['United Kingdom']

{'layer_dummy': 4.0, 'total_weight': 0.14458359613570412}

#### Convert to adj matrix

In [46]:
var = 'total_weight'
node_list = G.nodes()
node_list.sort()
A = nx.to_numpy_matrix(G = G,nodelist=node_list,weight=var)

In [25]:
node_list.index('United States')

200

In [26]:
node_list.index('United Kingdom')

199

In [48]:
A[200,199]

0.14458359613570412

In [8]:
# ## export all files to gephi
# files = [(2009,'total_weight'),(2009,'loans_dep_weight'),(2009,'CDIS_weight'),(2009,'CPIS_weight'),
#          (2015,'total_weight'),(2015,'loans_dep_weight'),(2015,'CDIS_weight'),(2015,'CPIS_weight')]
# for x in files:
#     year,var = x 
#     G = export_gephi(df,year,var)