In [236]:
import igraph
import networkx as nx 
import numpy as np
import pandas as pd 
import copy

#### Use the way I process it 

In [239]:
def preprocess(df):
    keep_var = ['countrycode','counterpart_code','country','counterpart','year','CDIS_IAD','CPIS_IAP','loans_dep','total_claims']
    df = df[keep_var]                           ## keep only used variables 
    df = df.replace(np.nan,0)                   ## turn na to zero 
    num = df._get_numeric_data()
    num[num < 0] = 0                            ## turn negative to zero 
    df['total'] = df[['total_claims']].sum(axis=1)
    df.fillna(0,inplace=True)
    
    return df 

def load_graph_nx(df,year,var):
    ## clean the data first 
    df_y = df[df['year']==year].copy()
    df_y.fillna(0,inplace=True)
    df_y = df_y[df_y[var]>0]
    #G = nx.from_pandas_dataframe(df_y, source="country", target="counterpart", edge_attr=[var],create_using=nx.DiGraph())
    G = nx.from_pandas_dataframe(df_y, source="counterpart", target="country", edge_attr=[var],create_using=nx.DiGraph())
    return G


In [240]:
## read pre-processed data from stata
df = pd.read_stata('../data/0_CPIS_CDIS_BIS_USTIC_merged_fixed4.dta')
df = preprocess(df)

In [241]:
## export all files to gephi
files = [(2015,'total')]
for x in files:
    year,var = x 
    G = load_graph_nx(df,year,var)

In [242]:
var = 'total'
c = nx.pagerank(G,weight=var)
c_df = pd.DataFrame(list(c.items()),columns=['country','centrality'])      ## make it into dataframe
c_df.sort_values(by='centrality',ascending=0,inplace=True)                 ## sort it 
c_df.head(5)

Unnamed: 0,country,centrality
177,United States,0.140484
19,United Kingdom,0.083465
184,Luxembourg,0.073232
4,Netherlands,0.062734
156,France,0.054287


In [243]:
G.degree(weight='total')['United States']

35676003.20309484

In [244]:
G.degree(weight='total')['Netherlands']

13673349.121010609

In [245]:
G['United States']['United Kingdom']

{'total': 2348202.0}

In [246]:
#df[(df.country == 'United States') & (df.counterpart == 'United Kingdom')&(df.year == 2015)]
df[(df.country == 'United Kingdom') & (df.counterpart == 'United States')&(df.year == 2015)]

Unnamed: 0,countrycode,counterpart_code,country,counterpart,year,CDIS_IAD,CPIS_IAP,loans_dep,total_claims,total
1217,112.0,111.0,United Kingdom,United States,2015,439222.9375,968186.1875,940793.0,2348202.0,2348202.0


#### Convert to adj matrix

In [247]:
var = 'total'
node_list = G.nodes()
node_list.sort()
A = nx.to_numpy_matrix(G = G,nodelist=node_list,weight=var)
A_adj = np.squeeze(np.asarray(A))

#### Igraph

to make sure we are using the same data, i imported adj matrix directed from netwrokx output 

In [248]:
year = 2015
#import the aggregate adjacency matrix
#aggregate_am = np.genfromtxt ('../data/AM4_all_nodes_aggregateNorm'+str(year)+'.csv', delimiter=",")
#df_names = pd.read_csv('../data/all_country_name4.csv', header=None)
#names = list(df_names[0])
#Aggregate_g = igraph.Graph.Weighted_Adjacency(list(aggregate_am))
Aggregate_g = igraph.Graph.Weighted_Adjacency(list(A_adj))
#Aggregate_g.vs["name"] = copy.deepcopy(names)
Aggregate_g.vs["name"]=node_list

In [249]:
def countries_starting_num(countries_name_starting, g):
    '''Function takes a list of the strings of countries and returns a list of index of those countries in graph g'''
    c_list = []
    for c in countries_name_starting:
        c_list.append(g.vs["name"].index(c))
    return c_list

countries_name_starting = ["United States", "United Kingdom", "Netherlands", "Luxembourg", "China, P.R.: Hong Kong", "Germany", "France", "China, P.R.: Mainland" ]
countries_starting = countries_starting_num(countries_name_starting, Aggregate_g)

In [250]:
countries_starting

[200, 199, 137, 116, 41, 74, 68, 43]

In [251]:
STR = Aggregate_g.strength(weights=Aggregate_g.es["weight"])

In [252]:
STR[200], STR[137]

(35676003.20309484, 13673349.121010609)

In [253]:
PR = Aggregate_g.personalized_pagerank(weights=Aggregate_g.es["weight"])

In [254]:
pr= zip(node_list,PR)
c_df = pd.DataFrame(list(pr),columns=['country','centrality']) 
c_df.sort_values(by='centrality',ascending=0,inplace=True)                 ## sort it 
c_df.head(5)

Unnamed: 0,country,centrality
200,United States,0.140485
199,United Kingdom,0.083459
116,Luxembourg,0.073228
137,Netherlands,0.06273
68,France,0.054281


### Use the adj matrix from maria's results

In [269]:
year = 2015
#import the aggregate adjacency matrix
aggregate_am = np.genfromtxt ('../data/adj/AM4_all_nodes_aggregateNorm'+str(year)+'.csv', delimiter=",")
df_names = pd.read_csv('../data/all_country_name4.csv', header=None)
names = list(df_names[0])
Aggregate_g = igraph.Graph.Weighted_Adjacency(list(aggregate_am))
Aggregate_g.vs["name"] = copy.deepcopy(names)

In [270]:
i =  Aggregate_g.get_eid('United States','United Kingdom')

In [271]:
Aggregate_g.es[i]   ## so the edge weight is different from my result already

igraph.Edge(<igraph.Graph object at 0x7efe56d674f8>, 13687, {'weight': 122463.06200932177})

In [272]:
countries_name_starting = ["United States", "United Kingdom", "Netherlands", "Luxembourg", "China  P.R.: Hong Kong", "Germany", "France", "China  P.R.: Mainland" ]
countries_starting = countries_starting_num(countries_name_starting, Aggregate_g)

In [273]:
countries_starting

[201, 200, 137, 116, 41, 74, 68, 43]

In [274]:
PR = Aggregate_g.personalized_pagerank(weights=Aggregate_g.es["weight"])
pr= zip(node_list,PR)
c_df = pd.DataFrame(list(pr),columns=['country','centrality']) 
c_df.sort_values(by='centrality',ascending=0,inplace=True)                 ## sort it 
c_df.head(5)

Unnamed: 0,country,centrality
137,Netherlands,0.119218
201,Uruguay,0.10456
116,Luxembourg,0.078744
200,United States,0.050586
74,Germany,0.05047
