In [1]:
import csv
import networkx as nx 
import numpy as np
import pandas as pd 
#import community 
from itertools import compress
import matplotlib.pyplot as plt 
import seaborn as sns
from seaborn import color_palette, set_style, palplot
plt.style.use('ggplot')
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

In [2]:
## read pre-processed data from stata
df = pd.read_stata('../data/0_CPIS_CDIS_BIS_USTIC_merged_fixed1.dta')
keep_var = ['countrycode','counterpart_code','country','counterpart','year','CDIS_IAD','CPIS_IAP','loans_dep']
df = df[keep_var]                           ## keep only used variables 
df = df.replace(np.nan,0)                   ## turn na to zero 
num = df._get_numeric_data()
num[num < 0] = 0                            ## turn negative to zero 
df['total'] = df[['CDIS_IAD','CPIS_IAP','loans_dep']].sum(axis=1)
#df.describe()

mata = ['countrycode','counterpart_code','country','counterpart','year']
var_org = ['CDIS_IAD','CPIS_IAP','loans_dep','total']
var_sum_out = ['CDIS_Sum_out','CPIS_Sum_out','loans_dep_Sum_out','total_Sum_out']
var_sum_in = ['CDIS_Sum_in','CPIS_Sum_in','loans_dep_Sum_in','total_Sum_in']
var_weight = ['CDIS_weight','CPIS_weight','loans_dep_weight','total_weight']

df[var_sum_out]= df.groupby(['countrycode','year'])[var_org].transform(sum)           ## like stata egen sum 
df[var_sum_in]= df.groupby(['counterpart_code','year'])[var_org].transform(sum)        ## like stata egen sum 
df_weight = pd.DataFrame(df[var_org].values / df[var_sum_out].values,columns=[var_weight])
df[var_weight] = df_weight                                                        ## create the weight variables 
mata.extend(var_weight)
df = df[mata]
df.fillna(0,inplace=True)

In [4]:
var_weight = ['total_weight'] #'CDIS_weight','CPIS_weight','loans_dep_weight',

for var in var_weight:
    #var = 'total_weight'
    var_dist = 'distance'
    degree_centrality = None
    between_centrality = None
    eigenvector_centrality = None
    closeness_centrality = None
    
    for year in range(2015,2016):     ## 2015
        df_graph = df[(df['year']==year) & (df[var]>0)]
        df_graph[var_dist] = 1-df_graph[var]
        G = nx.from_pandas_dataframe(df_graph, source="country", target="counterpart", edge_attr=[var,var_dist],create_using=nx.DiGraph())

    
    ## exoprt to 
    export_to_excel('../result/'+var)

Unnamed: 0,countrycode,counterpart_code,country,counterpart,year,CDIS_weight,CPIS_weight,loans_dep_weight,total_weight
0,351.0,111.0,Montserrat,United States,2014,0.000000e+00,1.000000,0.000000,9.922181e-01
1,137.0,111.0,Luxembourg,United States,2013,1.837319e-01,0.213069,0.030791,1.854821e-01
2,182.0,111.0,Portugal,United States,2011,1.993076e-02,0.060403,0.032693,4.329482e-02
3,138.0,111.0,Netherlands,United States,2008,0.000000e+00,0.243393,0.092664,1.838358e-01
4,463.0,111.0,Syrian Arab Republic,United States,2011,0.000000e+00,0.998109,0.008852,1.063921e-02
5,238.0,111.0,Costa Rica,United States,2010,5.536269e-01,0.387639,0.299523,3.880189e-01
6,887.0,111.0,French Territories: French Polynesia,United States,2013,0.000000e+00,0.000000,0.117862,1.161991e-01
7,423.0,111.0,Cyprus,United States,2012,0.000000e+00,0.019939,0.029337,1.006975e-02
8,936.0,111.0,Slovak Republic,United States,2013,4.930427e-03,0.027204,0.017743,1.981924e-02
9,258.0,111.0,Guatemala,United States,2010,8.693238e-02,0.999750,0.466120,6.167710e-01


In [6]:
year = 2015 
var = 'total_weight'
var_dist = 'distance'
df_graph = df[(df['year']==year) & (df[var]>0)]
df_graph[var_dist] = 1-df_graph[var]
G = nx.from_pandas_dataframe(df_graph, source="country", target="counterpart", edge_attr=[var,var_dist],create_using=nx.DiGraph())

In [22]:
def to_undirected(G):
    node_list = G.nodes()
    node_list.sort()
    A = nx.to_numpy_matrix(G = G,nodelist=node_list,weight=var)
    ud_M = A + A.T 
    ud_G = nx.from_numpy_matrix(ud_M)
    ## relable node to country name 
    maplist = dict(zip(ud_G.nodes(), node_list))
    ud_G = nx.relabel_nodes(ud_G,maplist)
    
    return ud_G

def merge_local_cluster_coef(local_cluster,G,var,year):
    ## local cluster coefficient
    ud_G = to_undirected(G)
    lc=nx.clustering(G=ud_G,weight = 'weight')                           ## after changing to un_directed, weight name changed to weeight
    lc_df = pd.DataFrame(list(lc.items()),columns=['country',year])      ## make it into dataframe
    lc_df.sort_values(by=year,ascending=0,inplace=True)   
    if local_cluster is None:
        local_cluster = lc_df
    else:
        local_cluster = pd.merge(local_cluster,lc_df,on ='country',how='outer')
    return local_cluster

In [28]:
local_cluster = None

In [26]:
merge_local_cluster_coef(local_cluster,G,'total_weight',2015)

In [27]:
local_cluster

Unnamed: 0,country,2015
104,Martinique,0.071354
47,Guadeloupe,0.039684
102,Wallis and Futuna,0.034893
90,Bouvet Island,0.033155
33,"Micronesia, Federated States of",0.021954
43,"Bonaire, Sint Eustatius and Saba",0.020999
109,Tuvalu,0.020537
37,United States,0.018551
32,Somalia,0.018517
72,South Sudan,0.016454
