In [261]:
edges = read.csv('../data/2011/edgelist.csv')
nodes = read.csv('../data/2011/nodelist.csv')

In [262]:
nodes = transform(nodes, landlocked=as.factor(landlocked))

In [263]:
nodes$gdp_us_dollar <- log(nodes$gdp_us_dollar)
nodes$area <- log(nodes$area)
nodes$population <- log(nodes$population)
nodes$gdp_per_capita <- log(nodes$gdp_per_capita)
nodes$gni_atlas <- log(nodes$gni_atlas)

In [264]:
nodes$gdp_us_dollar = as.numeric(scale(nodes$gdp_us_dollar))
nodes$gdp_growth = as.numeric(scale(nodes$gdp_growth))
nodes$inflation_rate = as.numeric(scale(nodes$inflation_rate))
nodes$population = as.numeric(scale(nodes$population))
nodes$gdp_per_capita = as.numeric(scale(nodes$gdp_per_capita))
nodes$life_expectancy = as.numeric(scale(nodes$life_expectancy))
nodes$gni_atlas = as.numeric(scale(nodes$gni_atlas))
nodes$agriculture_forestry_fishing_of_gdp = as.numeric(scale(nodes$agriculture_forestry_fishing_of_gdp))
nodes$industry_of_gdp = as.numeric(scale(nodes$industry_of_gdp))
nodes$merchandise_of_gdp = as.numeric(scale(nodes$merchandise_of_gdp))
nodes$net_barrier_of_trade = as.numeric(scale(nodes$net_barrier_of_trade))
nodes$foreign_direct_investment_inflows = as.numeric(scale(nodes$foreign_direct_investment_inflows))
nodes$happiness = as.numeric(scale(nodes$happiness))

In [265]:
head(nodes, 3)

Unnamed: 0_level_0,country_iso3,foreign_direct_investment_inflows,colonizer,agriculture_forestry_fishing_of_gdp,net_barrier_of_trade,gdp_growth,inflation_rate,gdp_us_dollar,continent,area,happiness,life_expectancy,gni_atlas,landlocked,merchandise_of_gdp,population,industry_of_gdp,langoff_1,gdp_per_capita
Unnamed: 0_level_1,<chr>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<fct>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>
1,AFG,-0.3281748,USA,1.0489473,0.1520729,-0.50491108,0.7633134,-0.7224803,Asia,13.38814,0.228143,-1.023194,-0.7252517,1,-0.8511799,0.5813344,-0.4905843,Persian,-1.5109274
2,AGO,-0.3951331,PRT,-0.4737484,1.8249821,-0.05496927,1.0187255,0.2174899,Africa,14.03601,0.1461749,-1.604665,0.1116836,0,0.1773835,0.4379398,1.8543603,Portuguese,-0.1590264
3,ALB,-0.3064818,NONE,0.5795895,-0.694899,-0.19186763,-0.5112593,-0.8877026,Europe,10.26632,-0.7365583,0.68693,-0.8416688,0,-0.3795621,-0.9576597,-0.3677123,Albanian,-0.1849468


In [266]:
n_countries = dim(nodes)[1]
dyads = matrix(0, nrow = n_countries, ncol = n_countries)
nodecovs = array(
    rep(0, length(numerical_columns)*n_countries*n_countries), 
    c(length(numerical_columns), n_countries, n_countries)
)
absdiffs = array(
    rep(0, length(numerical_columns)*n_countries*n_countries), 
    c(length(numerical_columns), n_countries, n_countries)
)
nodematchs = array(
    rep(0, length(categorical_columns)*n_countries*n_countries), 
    c(length(categorical_columns), n_countries, n_countries)
)

In [267]:
for (i in 1:n_countries) {
    for (j in i:n_countries) {
        if (sum(edges$source == nodes$country_iso3[i] & edges$target == nodes$country_iso3[j])) {
            dyads[i, j] = 1
        }    
        for (k in 1:length(numerical_columns)) {
            nodecovs[k, i, j] = nodes[i, numerical_columns[k]] + nodes[j, numerical_columns[k]]
            absdiffs[k, i, j] = abs(nodes[i, numerical_columns[k]] - nodes[j, numerical_columns[k]])
        }
        for (k in 1:length(categorical_columns)) {
            if (nodes[i, categorical_columns[k]] == nodes[j, categorical_columns[k]])
            nodematchs[k, i, j] = 1
        }
    }
}

In [268]:
df = data.frame(
    edge = as.vector(t(dyads))
)

In [269]:
for (k in 1:length(numerical_columns)) {
    df[, paste('nodecov-',numerical_columns[k], sep = '')] = as.vector(t(nodecovs[k,,]))
}
for (k in 1:length(numerical_columns)) {
    df[, paste('absdiff-',numerical_columns[k], sep = '')] = as.vector(t(absdiffs[k,,]))
}
for (k in 1:length(categorical_columns)) {
    df[, paste('nodematch-',categorical_columns[k], sep = '')] = as.vector(t(nodematchs[k,,]))
}

In [278]:
self_loops_indices = matrix(0, nrow = n_countries)
for (i in 1:n_countries) {
    self_loops_indices[i] = 1 + 142*(i - 1) + (i - 1)
}

In [281]:
df = df[-self_loops_indices, ]

In [284]:
model = glm(edge ~ ., data = df, family = 'binomial', control=glm.control(maxit=50))

"glm.fit: fitted probabilities numerically 0 or 1 occurred"


In [285]:
summary(model)


Call:
glm(formula = edge ~ ., family = "binomial", data = df, control = glm.control(maxit = 50))

Deviance Residuals: 
     Min        1Q    Median        3Q       Max  
-1.35203  -0.46994  -0.04776  -0.04776   3.00954  

Coefficients:
                                              Estimate Std. Error z value
(Intercept)                                   -6.77582    0.26137 -25.924
`nodecov-foreign_direct_investment_inflows`   -2.29342    0.33875  -6.770
`nodecov-agriculture_forestry_fishing_of_gdp` -0.05589    0.05514  -1.014
`nodecov-net_barrier_of_trade`                 0.02053    0.03200   0.642
`nodecov-gdp_growth`                           0.04739    0.03208   1.477
`nodecov-inflation_rate`                      -0.00888    0.04254  -0.209
`nodecov-gdp_us_dollar`                       -2.65183    1.62561  -1.631
`nodecov-area`                                 0.13767    0.01202  11.449
`nodecov-happiness`                            0.05566    0.02546   2.186
`nodecov-life_expectanc