In [1]:
edges = read.csv('../data/2011/edgelist.csv')
nodes = read.csv('../data/2011/nodelist.csv')

In [2]:
nodes = transform(nodes, landlocked=as.factor(landlocked))
n_countries = dim(nodes)[1]

In [3]:
nodes$gdp_us_dollar <- log(nodes$gdp_us_dollar)
nodes$area <- log(nodes$area)
nodes$population <- log(nodes$population)
nodes$gdp_per_capita <- log(nodes$gdp_per_capita)

In [4]:
nodes$gdp_us_dollar = as.numeric(scale(nodes$gdp_us_dollar))
nodes$gdp_growth = as.numeric(scale(nodes$gdp_growth))
nodes$inflation_rate = as.numeric(scale(nodes$inflation_rate))
nodes$population = as.numeric(scale(nodes$population))
nodes$gdp_per_capita = as.numeric(scale(nodes$gdp_per_capita))
nodes$agriculture_forestry_fishing_of_gdp = as.numeric(scale(nodes$agriculture_forestry_fishing_of_gdp))
nodes$industry_of_gdp = as.numeric(scale(nodes$industry_of_gdp))
nodes$merchandise_of_gdp = as.numeric(scale(nodes$merchandise_of_gdp))
nodes$net_barter_of_trade = as.numeric(scale(nodes$net_barter_of_trade))
nodes$foreign_direct_investment_inflows = as.numeric(scale(nodes$foreign_direct_investment_inflows))

In [5]:
nodes = subset(nodes, select = -c(population, area, gdp_per_capita))

In [6]:
head(nodes, 3)

Unnamed: 0_level_0,country_iso3,net_barter_of_trade,inflation_rate,continent,merchandise_of_gdp,landlocked,foreign_direct_investment_inflows,industry_of_gdp,agriculture_forestry_fishing_of_gdp,gdp_growth,gdp_us_dollar,langoff_1
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
1,AFG,0.285079,0.9180415,Asia,-0.8762355,1,-0.3009823,-0.3946306,1.0662655,-0.52191825,-0.3714823,Persian
2,AGO,2.1918886,1.1996211,Africa,0.201488,0,-0.3728506,1.8415661,-0.4800047,-0.04976185,0.4290518,Portuguese
3,ALB,-0.6803136,-0.4871133,Europe,-0.3820768,0,-0.2776986,-0.2774569,0.5896411,-0.19341918,-0.5121953,Albanian


In [7]:
numerical_columns = colnames(nodes)[unlist(lapply(nodes, is.numeric))]
categorical_columns = colnames(nodes)[!unlist(lapply(nodes, is.numeric))]
categorical_columns = categorical_columns[categorical_columns != 'country_iso3']

In [8]:
dyads = matrix(0, nrow = n_countries, ncol = n_countries)
nodecovs = array(
    rep(0, length(numerical_columns)*n_countries*n_countries), 
    c(length(numerical_columns), n_countries, n_countries)
)
absdiffs = array(
    rep(0, length(numerical_columns)*n_countries*n_countries), 
    c(length(numerical_columns), n_countries, n_countries)
)
nodematchs = array(
    rep(0, length(categorical_columns)*n_countries*n_countries), 
    c(length(categorical_columns), n_countries, n_countries)
)

In [9]:
for (i in 1:n_countries) {
    for (j in 1:n_countries) {
        if (sum(edges$source == nodes$country_iso3[i] & edges$target == nodes$country_iso3[j])) {
            dyads[i, j] = 1
        }    
        for (k in 1:length(numerical_columns)) {
            nodecovs[k, i, j] = nodes[i, numerical_columns[k]] + nodes[j, numerical_columns[k]]
            absdiffs[k, i, j] = abs(nodes[i, numerical_columns[k]] - nodes[j, numerical_columns[k]])
        }
        for (k in 1:length(categorical_columns)) {
            if (nodes[i, categorical_columns[k]] == nodes[j, categorical_columns[k]]) {
                 nodematchs[k, i, j] = 1   
            }
        }
    }
}

In [10]:
df = data.frame(
    edge = as.vector(t(dyads))
)

In [11]:
for (k in 1:length(numerical_columns)) {
    df[, paste('nodecov-',numerical_columns[k], sep = '')] = as.vector(t(nodecovs[k,,]))
}
for (k in 1:length(numerical_columns)) {
    df[, paste('absdiff-',numerical_columns[k], sep = '')] = as.vector(t(absdiffs[k,,]))
}
for (k in 1:length(categorical_columns)) {
    df[, paste('nodematch-',categorical_columns[k], sep = '')] = as.vector(t(nodematchs[k,,]))
}

In [12]:
self_loops_indices = matrix(0, nrow = n_countries)
for (i in 1:n_countries) {
    self_loops_indices[i] = 1 + n_countries*(i - 1) + (i - 1)
}

In [13]:
df = df[-self_loops_indices, ]

In [17]:
set.seed(19746)
model = glm(edge ~ ., data = df, family = 'binomial', control=glm.control(maxit=50))

“glm.fit: fitted probabilities numerically 0 or 1 occurred”


In [18]:
summary(model)


Call:
glm(formula = edge ~ ., family = "binomial", data = df, control = glm.control(maxit = 50))

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-0.9540  -0.5618  -0.4751  -0.3525   2.7894  

Coefficients:
                                              Estimate Std. Error z value
(Intercept)                                   -2.98575    0.14430 -20.692
`nodecov-net_barter_of_trade`                  0.08481    0.01894   4.479
`nodecov-inflation_rate`                      -0.04013    0.02598  -1.545
`nodecov-merchandise_of_gdp`                   0.06380    0.01986   3.212
`nodecov-foreign_direct_investment_inflows`   -2.19540    0.22848  -9.609
`nodecov-industry_of_gdp`                      0.09144    0.02343   3.903
`nodecov-agriculture_forestry_fishing_of_gdp` -0.04796    0.02391  -2.006
`nodecov-gdp_growth`                           0.01544    0.02092   0.738
`nodecov-gdp_us_dollar`                       -0.05454    0.01970  -2.769
`absdiff-net_barter_of_trade`    

In [19]:
p_values = coef(summary(model))[,'Pr(>|z|)']
p_values = unname(p_values)
names = colnames(df)
names[1] = 'intercept'

In [20]:
significant_indices = which(p_values < .1)

In [21]:
p_values = p_values[significant_indices]
names = names[significant_indices]

In [22]:
result_df = data.frame(
    effect = names, 
    significance = p_values
)

In [23]:
write.csv(result_df, '../reports/gravity_model_results.csv', row.names = F)