# Networkx And Global Flow of Funds

In [20]:
import csv
import networkx as nx 
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt 
plt.style.use('ggplot')
%matplotlib inline

In [21]:
## read pre-processed data from stata
df = pd.read_stata('data/0_CPIS_CDIS_BIS_USTIC_merged_fixed.dta')

In [22]:
## print the variables that we have in our data 
df.columns.values

array(['country', 'countrycode', 'counterpart', 'counterpart_code', 'year',
       'CDIS_IADE', 'CDIS_IADD', 'CDIS_IAD', 'CDIS_IADF', 'CPIS_IAP',
       'CPIS_IAPE', 'CPIS_IAPD', 'loans_dep'], dtype=object)

In [27]:
keep_var = ['countrycode','counterpart_code','country','counterpart','CDIS_IAD','CPIS_IAP','loans_dep']
df_2015 = df[df['year']==2015][keep_var]
df_2015.fillna(0,inplace=True)
df_2015.loc[df['CDIS_IAD']<0,'CDIS_IAD'] = 0                       # recode all negative values to 0 
df_2015.loc[df['CPIS_IAP']<0,'CPIS_IAP'] = 0                       # recode negative values to 0 
## df_2015['log_CDIS_IAD'] = np.log(df_2015['CDIS_IAD'])        # Take log 

In [28]:
df_2015.head()

Unnamed: 0,countrycode,counterpart_code,country,counterpart,CDIS_IAD,CPIS_IAP,loans_dep
7,316.0,111.0,Barbados,United States,3285.0,4575.566589,0.0
21,960.0,111.0,Croatia,United States,2.619745,2084.0,0.0
22,443.0,111.0,Kuwait,United States,741.56012,2064.07074,0.0
29,514.0,111.0,Bhutan,United States,0.0,50.0,0.0
30,722.0,111.0,Senegal,United States,0.0,4.0,0.0


In [29]:
df_2015.describe()

Unnamed: 0,countrycode,counterpart_code,CDIS_IAD,CPIS_IAP,loans_dep
count,31745.0,31745.0,31745.0,31745.0,31745.0
mean,522.174133,525.499756,1205.572,1323.649,421.135837
std,287.403625,286.209991,17908.1,21591.84,9055.887223
min,111.0,111.0,0.0,0.0,0.0
25%,238.0,248.0,0.0,0.0,0.0
50%,532.0,534.0,0.0,0.0,0.0
75%,746.0,748.0,0.1113362,0.0,0.0
max,968.0,968.0,1115748.0,1369423.0,940793.0


In [30]:
## see how mancy countries we have
print(df_2015['country'].unique().shape)
print(df_2015['counterpart'].unique().shape)

(233,)
(235,)


### Now, we dump data into a network 

In [61]:
# Create a network with pandas dataframe 
df_2015 = df_2015[df_2015.CDIS_IAD>0]
df_2015['rev_CDIS_IAD'] = 1/df_2015.CDIS_IAD
G = nx.from_pandas_dataframe(df_2015, source="country", target="counterpart", edge_attr=['CDIS_IAD','rev_CDIS_IAD'],create_using=nx.DiGraph()) ##,'CPIS_IAP','loans_dep'

In [62]:
# print out one particular edge, just to make sure it wroks 
G['United States']['United Kingdom']

{'CDIS_IAD': 717895.0, 'rev_CDIS_IAD': 1.3929613942309516e-06}

In [63]:
# make sure the graph is created correctly 
df_2015[(df_2015.country == 'United States') & (df_2015.counterpart == 'United Kingdom')]

Unnamed: 0,countrycode,counterpart_code,country,counterpart,CDIS_IAD,CPIS_IAP,loans_dep,rev_CDIS_IAD
2811,111.0,112.0,United States,United Kingdom,717895.0,1244554.0,614951.0,1e-06


In [64]:
print(nx.info(G))

Name: 
Type: DiGraph
Number of nodes: 234
Number of edges: 8872
Average in degree:  37.9145
Average out degree:  37.9145


In [65]:
print('Degree of US: {}'.format((G.degree("United States"))))
print('Degree of UK: {}'.format((G.degree("United Kingdom"))))

Degree of US: 193
Degree of UK: 249


### First look at degree centrality

In [66]:
### calculate some centrality mearuses 
### This is the sum of both in and out folows 
d=G.degree(weight='CDIS_IAD')                                 ## get the degree centrality for all countries 
cdis_degree = pd.DataFrame(list(d.items()),columns=['country','CDIS_IAD'])      ## make it into dataframe
cdis_degree.sort_values(by='CDIS_IAD',ascending=0,inplace=True)                 ## sort it 
cdis_degree.head(10)

Unnamed: 0,country,CDIS_IAD
149,United States,10779930.0
103,Netherlands,9037645.0
216,Luxembourg,7958590.0
189,United Kingdom,5546120.0
153,Germany,3294401.0
92,"China, P.R.: Hong Kong",3163979.0
128,France,2506216.0
56,"China, P.R.: Mainland",2492785.0
97,Switzerland,2451718.0
220,Ireland,2448264.0



### Closeness Centrality

In [90]:
## for closeness centrality, there is a problem here, 
## we simply coded weigt = 0 for countries with no connection, but this may have an impact on btwettness, closeness,etc calculation
#nx.shortest_path_length(G)['United States']
c=nx.closeness_centrality(G)#distance ='rev_CDIS_IAD'                                  ## use CDIS_IAD data as distance mearuse, so the order should be reversed
cdis_closeness = pd.DataFrame(list(c.items()),columns=['country','CDIS_IAD'])      ## make it into dataframe
cdis_closeness.sort_values(by='CDIS_IAD',ascending=0,inplace=True)    
cdis_closeness.head(10)

Unnamed: 0,country,CDIS_IAD
196,Italy,0.934764
153,Germany,0.838891
191,"Korea, Republic of",0.797969
10,Brazil,0.738765
213,Denmark,0.724738
128,France,0.72018
135,Sub-Saharan Africa,0.715679
55,Belgium,0.713449
6,Mauritius,0.700358
22,Poland,0.698223


## Betweeness

In [96]:
b = nx.betweenness_centrality(G)
cdis_between = pd.DataFrame(list(b.items()),columns=['country','CDIS_IAD'])      ## make it into dataframe
cdis_between.sort_values(by='CDIS_IAD',ascending=0,inplace=True)    
cdis_between.head(10)

Unnamed: 0,country,CDIS_IAD
196,Italy,0.192156
153,Germany,0.081278
191,"Korea, Republic of",0.045279
135,Sub-Saharan Africa,0.04478
10,Brazil,0.039908
79,Thailand,0.038741
56,"China, P.R.: Mainland",0.028956
23,Russian Federation,0.028655
6,Mauritius,0.027918
213,Denmark,0.02468


## Eigenvector Centrality

In [101]:
e = nx.eigenvector_centrality_numpy(G,weight='CDIS_IAD')
cdis_eigenvector = pd.DataFrame(list(e.items()),columns=['country','CDIS_IAD'])      ## make it into dataframe
cdis_eigenvector.sort_values(by='CDIS_IAD',ascending=0,inplace=True)    
cdis_eigenvector.head(10)

Unnamed: 0,country,CDIS_IAD
149,United States,0.494055
103,Netherlands,0.453902
189,United Kingdom,0.423208
216,Luxembourg,0.383761
220,Ireland,0.217005
97,Switzerland,0.169624
153,Germany,0.165636
31,Canada,0.134037
232,Bermuda,0.131964
55,Belgium,0.107828


## Density measure

In [106]:
## density of network 
den = nx.density(G)
den

0.16272330435420565

### Diameter

In [109]:
## diameter of net work 
try:
    nx.diameter(G)
except:
    print("Some nodes is not reachable, network is weekly connected")

Some nodes is not reachable, network is weekly connected


### Convert graph to adj matrix 

In [358]:
### create adj matrix from a graph 

## sort node list order 
node_list = G.nodes()
node_list.sort()
adj_matrix = nx.to_numpy_matrix(G = G,nodelist=node_list,weight ='CDIS_IAD')
adj_matrix