In [1]:
import igraph
import networkx as nx 
import numpy as np
import pandas as pd 
import copy

#### Use the way I process it 

In [10]:
def preprocess(df):
    keep_var = ['countrycode','counterpart_code','country','counterpart','year','CDIS_IAD','CPIS_IAP','CPIS_IAPD','CPIS_IAPE','loans_dep','total_claims']
    df = df[keep_var]                           ## keep only used variables 
    df = df.replace(np.nan,0)                   ## turn na to zero 
    num = df._get_numeric_data()
    num[num < 0] = 0                            ## turn negative to zero 
    df['total'] = df['total_claims']
    df.fillna(0,inplace=True)
    
    return df 

def load_graph_nx(df,year,var):
    ## clean the data first 
    df_y = df[df['year']==year].copy()
    df_y.fillna(0,inplace=True)
    df_y = df_y[df_y[var]>0]
    #G = nx.from_pandas_dataframe(df_y, source="country", target="counterpart", edge_attr=[var],create_using=nx.DiGraph())
    G = nx.from_pandas_dataframe(df_y, source="counterpart", target="country", edge_attr=[var],create_using=nx.DiGraph())
    return G


In [11]:
## read pre-processed data from stata
df = pd.read_stata('../data/0_CPIS_CDIS_BIS_USTIC_merged_fixed5.dta')
df = preprocess(df)

In [12]:
## export all files to gephi
files = [(2015,'total')]
for x in files:
    year,var = x 
    G = load_graph_nx(df,year,var)

In [13]:
var = 'total'
c = nx.pagerank(G,weight=var)
c_df = pd.DataFrame(list(c.items()),columns=['country','centrality'])      ## make it into dataframe
c_df.sort_values(by='centrality',ascending=0,inplace=True)                 ## sort it 
c_df.head(20)

Unnamed: 0,country,centrality
93,United States,0.138594
58,United Kingdom,0.081226
141,Luxembourg,0.070865
175,Netherlands,0.06116
165,France,0.053177
127,Germany,0.051176
109,Japan,0.035403
124,"China, P.R.: Hong Kong",0.028956
46,Switzerland,0.026536
57,"China, P.R.: Mainland",0.022557


In [14]:
G.degree(weight='total')['United States']

37408878.89059484

In [15]:
G.degree(weight='total')['Netherlands']

13673349.121010609

In [16]:
G['United States']['United Kingdom']

{'total': 2348202.0}

In [17]:
#df[(df.country == 'United States') & (df.counterpart == 'United Kingdom')&(df.year == 2015)]
df[(df.country == 'China, P.R.: Mainland') & (df.counterpart == 'United States')&(df.year == 2015)]

Unnamed: 0,countrycode,counterpart_code,country,counterpart,year,CDIS_IAD,CPIS_IAP,CPIS_IAPD,CPIS_IAPE,loans_dep,total_claims,total
88318,924.0,111.0,"China, P.R.: Mainland",United States,2015,25952.001953,1844020.0,1513531.0,330489.0,97638.0,1967610.0,1967610.0


#### Convert to adj matrix

In [18]:
var = 'total'
node_list = G.nodes()
node_list.sort()
A = nx.to_numpy_matrix(G = G,nodelist=node_list,weight=var)
A_adj = np.squeeze(np.asarray(A))

#### Igraph

- i want to check if it is because Igraph and Networkx are doing pagerank differently 
- to make sure we are using the same data, i imported adj matrix directed from netwrokx output 

In [19]:
year = 2015
#import the aggregate adjacency matrix
#aggregate_am = np.genfromtxt ('../data/AM4_all_nodes_aggregateNorm'+str(year)+'.csv', delimiter=",")
#df_names = pd.read_csv('../data/all_country_name4.csv', header=None)
#names = list(df_names[0])
#Aggregate_g = igraph.Graph.Weighted_Adjacency(list(aggregate_am))
Aggregate_g = igraph.Graph.Weighted_Adjacency(list(A_adj))
#Aggregate_g.vs["name"] = copy.deepcopy(names)
Aggregate_g.vs["name"]=node_list

In [20]:
def countries_starting_num(countries_name_starting, g):
    '''Function takes a list of the strings of countries and returns a list of index of those countries in graph g'''
    c_list = []
    for c in countries_name_starting:
        c_list.append(g.vs["name"].index(c))
    return c_list

countries_name_starting = ["United States", "United Kingdom", "Netherlands", "Luxembourg", "China, P.R.: Hong Kong", "Germany", "France", "China, P.R.: Mainland" ]
countries_starting = countries_starting_num(countries_name_starting, Aggregate_g)

In [21]:
countries_starting

[200, 199, 137, 116, 41, 74, 68, 43]

In [22]:
STR = Aggregate_g.strength(weights=Aggregate_g.es["weight"])

- here you can see the degree sum is the same as Netwrokx output

In [23]:
STR[200], STR[137]

(37408878.89059484, 13673349.121010609)

In [24]:
PR = Aggregate_g.personalized_pagerank(weights=Aggregate_g.es["weight"])

In [25]:
pr= zip(node_list,PR)
c_df = pd.DataFrame(list(pr),columns=['country','centrality']) 
c_df.sort_values(by='centrality',ascending=0,inplace=True)                 ## sort it 
c_df.head(5)

Unnamed: 0,country,centrality
200,United States,0.138593
199,United Kingdom,0.081219
116,Luxembourg,0.070852
137,Netherlands,0.06115
68,France,0.053167


- the result is exact the same as networkx pagerank output

### Use the adj matrix from maria's results

In [26]:
year = 2015
#import the aggregate adjacency matrix
aggregate_am = np.genfromtxt ('../data/adj/AM4_all_nodes_aggregate'+str(year)+'.csv', delimiter=",")
df_names = pd.read_csv('../data/adj/all_country_name4.csv', header=None)
names = list(df_names[0])
Aggregate_g = igraph.Graph.Weighted_Adjacency(list(aggregate_am))
Aggregate_g.vs["name"] = copy.deepcopy(names)

- i am trying to check the edge weight between US and UK 

In [28]:
i =  Aggregate_g.get_eid('United States','China  P.R.: Mainland')

- I change the norm = False, now we are getting the same edge weight 

In [29]:
Aggregate_g.es[i]

igraph.Edge(<igraph.Graph object at 0x7f4ace2364f8>, 13554, {'weight': 234734.314453125})

In [62]:
countries_name_starting = ["United States", "United Kingdom", "Netherlands", "Luxembourg", "China  P.R.: Hong Kong", "Germany", "France", "China  P.R.: Mainland" ]
countries_starting = countries_starting_num(countries_name_starting, Aggregate_g)

In [63]:
countries_starting

[201, 200, 137, 116, 41, 74, 68, 43]

In [64]:
PR = Aggregate_g.personalized_pagerank(weights=Aggregate_g.es["weight"])
pr= zip(Aggregate_g.vs["name"],PR)
c_df = pd.DataFrame(list(pr),columns=['country','centrality']) 
c_df.sort_values(by='centrality',ascending=0,inplace=True)                 ## sort it 
c_df.head(20)

Unnamed: 0,country,centrality
201,United States,0.105344
200,United Kingdom,0.088913
68,France,0.062342
137,Netherlands,0.058759
116,Luxembourg,0.048253
74,Germany,0.047244
37,Cayman Islands,0.044103
98,Japan,0.03447
181,Switzerland,0.029128
19,Belgium,0.027521


In [52]:
c_df[c_df.country=="United Kingdom"]

Unnamed: 0,country,centrality
200,United Kingdom,0.083475


- and the results are different