In [1]:
import numpy as np
import pandas as pd

Removing unused countries

In [2]:
gdp = pd.read_csv("../data/gdp_2011.csv")
trades = pd.read_csv("../data/trades_export_2011.csv")

In [3]:
all_countries = set.union(set(trades['source'].unique()), set(trades['target'].unique()))

In [4]:
for i, row in gdp.iterrows():
    if row["country_iso3"] not in all_countries:
        gdp.drop(gdp[gdp["country_iso3"] == row["country_iso3"]].index, inplace = True)

In [5]:
no_gdp_countries = []
for country in all_countries:
    match = gdp[gdp["country_iso3"] == country]
    if len(match) == 0:
        no_gdp_countries.append(country)

In [6]:
for country in no_gdp_countries:
    trades.drop(trades[trades["source"] == country].index, inplace = True)
    trades.drop(trades[trades["target"] == country].index, inplace = True)

Backboning

In [7]:
from backbone import *
import networkx as nx

In [8]:
def create_directed_graph(trades: pd.DataFrame, gdp: pd.DataFrame):
    net = nx.DiGraph()
    nodes = []
    for i, row in gdp.iterrows():
        nodes.append((row["country_iso3"], {"label": row["country_iso3"],"gdp_us_dollar": row["gdp_us_dollar"]}))
    net.add_nodes_from(nodes)
    
    for i, row in trades.iterrows():
        net.add_edge(row["source"], row["target"], weight = row["weight"])
    return net

In [9]:
net = create_directed_graph(trades, gdp)

In [10]:
def create_backbone_graph(net: nx.DiGraph, min_alpha_ptile = 0.5, min_degree = 2):
    graph = net.copy()
    alpha_measures = disparity_filter(graph)
    quantiles, num_quant = calc_alpha_ptile(alpha_measures)
    alpha_cutoff = quantiles[round(num_quant * min_alpha_ptile)]
    cut_graph(graph, min_alpha_ptile, min_degree)
    return graph

In [11]:
cut_net = create_backbone_graph(net)

	ptile	alpha
	0.00	0.0000
	0.10	0.2087
	0.20	0.6953
	0.30	0.8869
	0.40	0.9587
	0.50	0.9852
	0.60	0.9949
	0.70	0.9985
	0.80	0.9997
	0.90	1.0000


In [12]:
def get_nodes_list(net: nx.DiGraph):
    data={}
    data["country_iso3"]=[x for x in net]
    data["gdp_us_dollar"] = [net.nodes[x]["gdp_us_dollar"] for x in net.nodes()]
    return pd.DataFrame(data)

In [13]:
cut_edge_list = nx.convert_matrix.to_pandas_edgelist(cut_net).drop(columns = ["alpha_ptile", "alpha", "norm_weight"])

In [14]:
cut_node_list = get_nodes_list(cut_net)

In [15]:
cut_edge_list.to_csv("../data/edgelist_2011.csv", index = False)
cut_node_list.to_csv("../data/nodelist_2011.csv", index = False)