In [37]:
import pandas as pd
import numpy as np
import networkx as nx

In [2]:
initial_trades = pd.read_csv("../data/2011/initial_trades.csv")

Unique countries in initial trades network

In [3]:
len(set(initial_trades["ReporterISO3"].unique()).union(set(initial_trades["PartnerISO3"].unique())))

241

Reported trades count

In [4]:
len(initial_trades)

361320

Summary of network

In [40]:
net = nx.from_pandas_edgelist(initial_trades, source="ReporterISO3", target="PartnerISO3", create_using=nx.DiGraph())

In [44]:
print(nx.info(net))

Name: 
Type: DiGraph
Number of nodes: 241
Number of edges: 23085
Average in degree:  95.7884
Average out degree:  95.7884


In [46]:
nx.algorithms.average_clustering(net)

0.775718261856327

In [47]:
nx.average_shortest_path_length(net)

1.0234094052558782

In [5]:
gdp = pd.read_csv("../data/2011/features/gdp.csv")
inflation_rate = pd.read_csv("../data/2011/features/inflation.csv")
gdp_growth = pd.read_csv("../data/2011/features/gdp_growth.csv")
agriculture_of_gdp = pd.read_csv("../data/2011/features/agriculture_forestry_fishing_of_gdp.csv")
industry_of_gdp = pd.read_csv("../data/2011/features/industry_of_gdp.csv")
merchandise_of_gdp = pd.read_csv("../data/2011/features/merchandise_of_gdp.csv")
net_barter_of_trade = pd.read_csv("../data/2011/features/net_barter_of_trade.csv")
foreign_direct_investment_inflows = pd.read_csv("../data/2011/features/foreign_direct_investment_inflows.csv")
cepii = pd.read_csv("../data/common/geo_cepii.csv")
datasets = [gdp, inflation_rate, gdp_growth, agriculture_of_gdp, industry_of_gdp, merchandise_of_gdp, net_barter_of_trade, foreign_direct_investment_inflows, cepii]

In [6]:
nodal_covariates_df = pd.DataFrame(columns= ["count", "mean", "std", "min", "50%", "max"])
for dataset in datasets:
    nodal_covariates_df = nodal_covariates_df.append(dataset.describe(percentiles=[0.5]).T)

In [7]:
nodal_covariates_df

Unnamed: 0,count,mean,std,min,50%,max
gdp_us_dollar,261.0,2298232000000.0,7947971000000.0,38711810.0,41953430000.0,73460350000000.0
inflation_rate,202.0,6.019412,5.807742,-0.4000407,4.629727,53.2287
gdp_growth,259.0,3.603243,5.714971,-62.07592,4.032602,21.67271
agriculture_forestry_fishing_of_gdp,197.0,11.10478,11.23587,0.03466452,7.445151,54.59347
industry_of_gdp,197.0,27.38498,15.03689,4.160941,24.67238,80.0
merchandise_of_gdp,198.0,72.73686,49.42198,14.46293,62.02943,435.2191
net_barter_of_trade,201.0,127.5091,56.07902,49.61863,104.8921,447.0234
foreign_direct_investment_inflows,200.0,11841210000.0,39827510000.0,-6008001000.0,999315500.0,332071000000.0
area,208.0,849330.5,2178894.0,21.0,131145.5,17075400.0
landlocked,208.0,0.1826923,0.3873463,0.0,0.0,1.0


In [8]:
pd.set_option('display.float_format', lambda x: '%.5f' % x)

In [9]:
nodal_covariates_df

Unnamed: 0,count,mean,std,min,50%,max
gdp_us_dollar,261.0,2298232105028.963,7947971125077.408,38711810.21,41953433591.0,73460345663712.0
inflation_rate,202.0,6.01941,5.80774,-0.40004,4.62973,53.2287
gdp_growth,259.0,3.60324,5.71497,-62.07592,4.0326,21.67271
agriculture_forestry_fishing_of_gdp,197.0,11.10478,11.23587,0.03466,7.44515,54.59347
industry_of_gdp,197.0,27.38498,15.03689,4.16094,24.67238,80.0
merchandise_of_gdp,198.0,72.73686,49.42198,14.46293,62.02943,435.21913
net_barter_of_trade,201.0,127.50914,56.07902,49.61863,104.89206,447.02343
foreign_direct_investment_inflows,200.0,11841209969.30596,39827510633.94955,-6008001205.17497,999315510.38515,332071022439.732
area,208.0,849330.52404,2178894.46865,21.0,131145.5,17075400.0
landlocked,208.0,0.18269,0.38735,0.0,0.0,1.0


In [14]:
cepii["landlocked"] = cepii["landlocked"].replace(0, "No")
cepii["landlocked"] = cepii["landlocked"].replace(1, "Yes")

In [15]:
cepii["landlocked"].describe()

count     208
unique      2
top        No
freq      170
Name: landlocked, dtype: object

In [69]:
cepii["continent"].describe()

count        208
unique         5
top       Africa
freq          56
Name: continent, dtype: object

In [70]:
cepii["langoff_1"].describe()

count         208
unique         64
top       English
freq           60
Name: langoff_1, dtype: object

Distance network

In [24]:
dist_net = pd.read_csv("../data/common/dist_cepii.csv")
len(set(dist_net["iso_o"].unique()).union(set(dist_net["iso_d"].unique())))

224

In [26]:
len(dist_net)

50176

Diplomatic exchange

In [28]:
dp = pd.read_csv("../data/2011/features/diplomatic_exchange.csv")
dp

Unnamed: 0,Destination,Country
0,Afghanistan,Australia
1,Afghanistan,Bulgaria
2,Afghanistan,Canada
3,Afghanistan,China
4,Afghanistan,Czech Republic
...,...,...
8954,Zimbabwe,Switzerland
8955,Zimbabwe,"Tanzania, United Republic of"
8956,Zimbabwe,United Kingdom
8957,Zimbabwe,United States


In [30]:
len(set(dp["Destination"].unique()).union(set(dp["Country"].unique())))

201

In [31]:
len(dp)

8959

Colonization

In [36]:
colony_el = pd.read_csv("../data/common/colonization_el.csv")
len(colony_el[colony_el["colonization"] == 1])

97