## Networkx testing

In [345]:
import csv
import networkx as nx 
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt 
plt.style.use('ggplot')
%matplotlib inline

In [346]:
## read pre-processed data from stata
df = pd.read_stata('data/0_CPIS_CDIS_BIS_USTIC_merged_fixed.dta')

In [347]:
## print the variables that we have in our data 
df.columns.values

array(['country', 'countrycode', 'counterpart', 'counterpart_code', 'year',
       'CDIS_IADE', 'CDIS_IADD', 'CDIS_IAD', 'CDIS_IADF', 'CPIS_IAP',
       'CPIS_IAPE', 'CPIS_IAPD', 'loans_dep'], dtype=object)

In [348]:
keep_var = ['countrycode','counterpart_code','country','counterpart','CDIS_IAD','CPIS_IAP','loans_dep']
df_2015 = df[df['year']==2015][keep_var]
df_2015.fillna(0,inplace=True)

In [349]:
df_2015.describe()

Unnamed: 0,countrycode,counterpart_code,CDIS_IAD,CPIS_IAP,loans_dep
count,31745.0,31745.0,31745.0,31745.0,31745.0
mean,522.174133,525.499756,1205.338,1323.649,421.135837
std,287.403625,286.209991,17908.14,21591.84,9055.887223
min,111.0,111.0,-2530.259,-6.356924,0.0
25%,238.0,248.0,0.0,0.0,0.0
50%,532.0,534.0,0.0,0.0,0.0
75%,746.0,748.0,0.1113362,0.0,0.0
max,968.0,968.0,1115748.0,1369423.0,940793.0


In [350]:
## see how mancy countries we have
print(df_2015['country'].unique().shape)
print(df_2015['counterpart'].unique().shape)

(233,)
(235,)


### Now, we dump data into a network 

In [351]:
# Create a network with pandas dataframe 
G = nx.from_pandas_dataframe(df_2015, source="country", target="counterpart", edge_attr=['CDIS_IAD','CPIS_IAP'],create_using=nx.DiGraph()) ##,'CPIS_IAP','loans_dep'

In [352]:
# print out one particular edge, just to make sure it wroks 
G['United States']['United Kingdom']

{'CDIS_IAD': 717895.0, 'CPIS_IAP': 1244554.0}

In [353]:
# make sure the graph is created correctly 
df_2015[(df_2015.country == 'United States') & (df_2015.counterpart == 'United Kingdom')]

Unnamed: 0,countrycode,counterpart_code,country,counterpart,CDIS_IAD,CPIS_IAP,loans_dep
2811,111.0,112.0,United States,United Kingdom,717895.0,1244554.0,614951.0


In [354]:
print(nx.info(G))

Name: 
Type: DiGraph
Number of nodes: 235
Number of edges: 31745
Average in degree: 135.0851
Average out degree: 135.0851


In [355]:
print('Degree of US: {}'.format((G.degree("United States"))))
print('Degree of UK: {}'.format((G.degree("United Kingdom"))))

Degree of US: 439
Degree of UK: 466


### First look at degree centrality

In [385]:
### calculate some centrality mearuses 
### This is the sum of both in and out folows 
d=G.degree(weight='CPIS_IAP')
cdis_degree = pd.DataFrame(list(d.items()),columns=['country','CDIS_IAD'])
cdis_degree.sort_values(by='CDIS_IAD',ascending=0,inplace=True)
cdis_degree.head(20)

Unnamed: 0,country,CDIS_IAD
52,United States,19034070.0
127,United Kingdom,6700723.0
185,Luxembourg,6626278.0
50,Japan,5224266.0
209,Germany,5040810.0
97,France,5004191.0
158,Netherlands,3432468.0
222,Cayman Islands,2675532.0
19,Ireland,2599359.0
125,Italy,2419403.0


### Convert graph to adj matrix 

In [358]:
### create adj matrix from a graph 

## sort node list order 
node_list = G.nodes()
node_list.sort()
adj_matrix = nx.to_numpy_matrix(G = G,nodelist=node_list,weight ='CDIS_IAD')
adj_matrix