## Export to matlab for community detection 

In [16]:
import csv
import networkx as nx 
import numpy as np
import pandas as pd 
import scipy.io
import community 
from itertools import compress
import matplotlib.pyplot as plt 
import seaborn as sns
from seaborn import color_palette, set_style, palplot
plt.style.use('ggplot')
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

In [2]:
## read pre-processed data from stata
df = pd.read_stata('../data/0_CPIS_CDIS_BIS_USTIC_merged_fixed1.dta')
keep_var = ['countrycode','counterpart_code','country','counterpart','year','CDIS_IAD','CPIS_IAP','loans_dep']
df = df[keep_var]                           ## keep only used variables 
df = df.replace(np.nan,0)                   ## turn na to zero 
num = df._get_numeric_data()
num[num < 0] = 0                            ## turn negative to zero 
df['total'] = df[['CDIS_IAD','CPIS_IAP','loans_dep']].sum(axis=1)
#df.describe()

mata = ['countrycode','counterpart_code','country','counterpart','year']
var_org = ['CDIS_IAD','CPIS_IAP','loans_dep','total']
var_sum_out = ['CDIS_Sum_out','CPIS_Sum_out','loans_dep_Sum_out','total_Sum_out']
var_sum_in = ['CDIS_Sum_in','CPIS_Sum_in','loans_dep_Sum_in','total_Sum_in']
var_weight = ['CDIS_weight','CPIS_weight','loans_dep_weight','total_weight']

df[var_sum_out]= df.groupby(['countrycode','year'])[var_org].transform(sum)           ## like stata egen sum 
df[var_sum_in]= df.groupby(['counterpart_code','year'])[var_org].transform(sum)        ## like stata egen sum 
df_weight = pd.DataFrame(df[var_org].values / df[var_sum_out].values,columns=[var_weight])
df[var_weight] = df_weight                                                        ## create the weight variables 
mata.extend(var_weight)
df = df[mata]
df.fillna(0,inplace=True)

In [3]:
def to_undirected(G):
    node_list = G.nodes()
    node_list.sort()
    A = nx.to_numpy_matrix(G = G,nodelist=node_list,weight=var)
    ud_M = A + A.T 
    ud_G = nx.from_numpy_matrix(ud_M)
    ## relable node to country name 
    maplist = dict(zip(ud_G.nodes(), node_list))
    ud_G = nx.relabel_nodes(ud_G,maplist)
    
    return ud_G

### Test on one graph 

In [4]:
## get the undirected graph for a particular graph
year = 2015
var = 'CDIS_weight' ## 

df_graph = df[(df['year']==year) & (df[var]>0)]
G = nx.from_pandas_dataframe(df_graph, source="country", 
                             target="counterpart", edge_attr=[var],
                             create_using=nx.DiGraph())
G = to_undirected(G)

In [11]:
## export to matlab for community detection 
node_list = G.nodes()
node_list.sort()
A = nx.to_numpy_matrix(G = G,nodelist=node_list,weight='weight')
save_path = '../result/2015_CDIS.mat'
scipy.io.savemat(save_path,mdict={'A':A,'nodes':node_list})

In [14]:
## read back the result of matlab community detection 
matlab_community = pd.read_excel('../result/out.xlsx')
l_community = community.best_partition(G,weight='weight',resolution=1)
python_community = pd.DataFrame(list(l_community.items()),columns=['country','python_community'])
matlab_community.columns = ['country','matlab_community']
m_df= pd.merge(matlab_community,python_community, on ='country')
m_df.to_csv('../result/compare.csv')

Unnamed: 0,country,matlab_community
0,"Afghanistan, Islamic Republic of",1
1,Albania,2
2,Algeria,1
3,American Samoa,3
4,Andorra,4
5,Angola,5
6,Anguilla,4
7,Antigua and Barbuda,4
8,Argentina,4
9,"Armenia, Republic of",5
