In [32]:
edges = read.csv('../data/2011/edgelist.csv')
nodes = read.csv('../data/2011/nodelist.csv')

In [33]:
nodes = transform(nodes, landlocked=as.factor(landlocked))
n_countries = dim(nodes)[1]

In [34]:
nodes$gdp_us_dollar <- log(nodes$gdp_us_dollar)
nodes$area <- log(nodes$area)
nodes$population <- log(nodes$population)
nodes$gdp_per_capita <- log(nodes$gdp_per_capita)

In [35]:
nodes$gdp_us_dollar = as.numeric(scale(nodes$gdp_us_dollar))
nodes$gdp_growth = as.numeric(scale(nodes$gdp_growth))
nodes$inflation_rate = as.numeric(scale(nodes$inflation_rate))
nodes$population = as.numeric(scale(nodes$population))
nodes$gdp_per_capita = as.numeric(scale(nodes$gdp_per_capita))
nodes$agriculture_forestry_fishing_of_gdp = as.numeric(scale(nodes$agriculture_forestry_fishing_of_gdp))
nodes$industry_of_gdp = as.numeric(scale(nodes$industry_of_gdp))
nodes$merchandise_of_gdp = as.numeric(scale(nodes$merchandise_of_gdp))
nodes$net_barter_of_trade = as.numeric(scale(nodes$net_barter_of_trade))
nodes$foreign_direct_investment_inflows = as.numeric(scale(nodes$foreign_direct_investment_inflows))

In [36]:
nodes = subset(nodes, select = -c(population, area, gdp_per_capita))

In [37]:
head(nodes, 3)

Unnamed: 0_level_0,country_iso3,gdp_growth,industry_of_gdp,langoff_1,gdp_us_dollar,colonizer,landlocked,merchandise_of_gdp,agriculture_forestry_fishing_of_gdp,continent,inflation_rate,net_barter_of_trade,foreign_direct_investment_inflows
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<chr>,<fct>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>
1,AFG,-0.5184423,-0.3752702,Persian,-0.3212859,USA,1,-0.8848238,1.0539166,Asia,0.8818298,0.237519,-0.2945139
2,AGO,-0.0371874,1.8657481,Portuguese,0.4554125,PRT,0,0.2061753,-0.4986081,Africa,1.1566566,1.9683538,-0.3679682
3,ALB,-0.183613,-0.2578438,Albanian,-0.4578093,NONE,0,-0.3845779,0.5753643,Europe,-0.489627,-0.6387798,-0.2707165


In [38]:
numerical_columns = colnames(nodes)[unlist(lapply(nodes, is.numeric))]
categorical_columns = colnames(nodes)[!unlist(lapply(nodes, is.numeric))]
categorical_columns = categorical_columns[categorical_columns != 'country_iso3']

In [39]:
dyads = matrix(0, nrow = n_countries, ncol = n_countries)
nodecovs = array(
    rep(0, length(numerical_columns)*n_countries*n_countries), 
    c(length(numerical_columns), n_countries, n_countries)
)
absdiffs = array(
    rep(0, length(numerical_columns)*n_countries*n_countries), 
    c(length(numerical_columns), n_countries, n_countries)
)
nodematchs = array(
    rep(0, length(categorical_columns)*n_countries*n_countries), 
    c(length(categorical_columns), n_countries, n_countries)
)

In [40]:
for (i in 1:n_countries) {
    for (j in 1:n_countries) {
        if (sum(edges$source == nodes$country_iso3[i] & edges$target == nodes$country_iso3[j])) {
            dyads[i, j] = 1
        }    
        for (k in 1:length(numerical_columns)) {
            nodecovs[k, i, j] = nodes[i, numerical_columns[k]] + nodes[j, numerical_columns[k]]
            absdiffs[k, i, j] = abs(nodes[i, numerical_columns[k]] - nodes[j, numerical_columns[k]])
        }
        for (k in 1:length(categorical_columns)) {
            if (nodes[i, categorical_columns[k]] == nodes[j, categorical_columns[k]]) {
                 nodematchs[k, i, j] = 1   
            }
        }
    }
}

In [44]:
df = data.frame(
    edge = as.vector(t(dyads))
)

In [46]:
for (k in 1:length(numerical_columns)) {
    df[, paste('nodecov-',numerical_columns[k], sep = '')] = as.vector(t(nodecovs[k,,]))
}
for (k in 1:length(numerical_columns)) {
    df[, paste('absdiff-',numerical_columns[k], sep = '')] = as.vector(t(absdiffs[k,,]))
}
for (k in 1:length(categorical_columns)) {
    df[, paste('nodematch-',categorical_columns[k], sep = '')] = as.vector(t(nodematchs[k,,]))
}

In [53]:
self_loops_indices = matrix(0, nrow = n_countries)
for (i in 1:n_countries) {
    self_loops_indices[i] = 1 + n_countries*(i - 1) + (i - 1)
}

In [57]:
df = df[-self_loops_indices, ]

In [58]:
model = glm(edge ~ ., data = df, family = 'binomial', control=glm.control(maxit=50))

"glm.fit: fitted probabilities numerically 0 or 1 occurred"


In [59]:
summary(model)


Call:
glm(formula = edge ~ ., family = "binomial", data = df, control = glm.control(maxit = 50))

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-0.9386  -0.5562  -0.4774  -0.3627   2.7199  

Coefficients:
                                                Estimate Std. Error z value
(Intercept)                                   -3.0054933  0.1347067 -22.311
`nodecov-gdp_growth`                           0.0148037  0.0196555   0.753
`nodecov-industry_of_gdp`                      0.0921898  0.0229817   4.011
`nodecov-gdp_us_dollar`                       -0.0075006  0.0194455  -0.386
`nodecov-merchandise_of_gdp`                   0.0410224  0.0187946   2.183
`nodecov-agriculture_forestry_fishing_of_gdp` -0.0667034  0.0234224  -2.848
`nodecov-inflation_rate`                      -0.0415240  0.0249150  -1.667
`nodecov-net_barter_of_trade`                  0.1118276  0.0194524   5.749
`nodecov-foreign_direct_investment_inflows`   -2.2157195  0.2124336 -10.430
`absdiff-gdp_

In [62]:
attributes(model)

In [105]:
p_values = coef(summary(model))[,'Pr(>|z|)']
p_values = unname(p_values)
names = colnames(df)
names[1] = 'intercept'

In [106]:
significant_indices = which(p_values < .1)

In [107]:
p_values = p_values[significant_indices]
names = names[significant_indices]

In [108]:
result_df = data.frame(
    effect = names, 
    significance = p_values
)

In [111]:
write.csv(result_df, '../reports/gravity_model_results.csv', row.names = F)