In [1]:
import dispindiffs
import polars as pl
import pandas as pd
import numpy as np

In [2]:
iso3 = pd.read_csv("./dat/iso3.csv") # https://data.apps.fao.org/catalog/dataset/iso-3-code-list-global-region-country
iso3_to_name = dict(zip(iso3["iso3"], iso3["name"]))

df = pl.read_csv("./dat/sitc_country_country_year.csv") # https://atlas.hks.harvard.edu/data-downloads - "Country Trade by Partner"
df = df.filter(pl.col("year")>=2020)

In [3]:
elist = df.group_by(["country_iso3_code", "partner_iso3_code"]).agg(pl.col("export_value").sum())
elist = elist.filter(pl.col("export_value")>0)
N_T = len(set(elist["country_iso3_code"]) | set(elist["partner_iso3_code"]))
E_T = len(elist)
print(N_T, E_T)

235 32296


In [4]:
Trade = dispindiffs.DisparityInDifferences(elist, source="country_iso3_code", target="partner_iso3_code", weight="export_value")
Trade.calc_disp()
Trade.calc_disp_in_diffs()

Merging bilateral relations
Generating pre-sampled values from beta distributions
Calculating statistical significance
Done


### Disparity Filter

In [5]:
n_nodes_edges_by_th = []
for th in [10**(-k) for k in np.arange(20, -0.1, -0.25)]:
    bb, th, N, E = Trade.extr_disp_backbone(th=th)    
    n_nodes_edges_by_th.append((th, N, E))
pd.DataFrame(n_nodes_edges_by_th, columns=["th", "n_nodes", "n_edges"]).to_csv("./outputs/trade_disp_info_by_th.csv", index=False)

In [6]:
disp_backbone, _, _, _ = Trade.extr_disp_backbone(th=0.01)

### Disparity-in-Differences

In [7]:
n_nodes_edges_by_th = []
for th in [10**(-k) for k in np.arange(20, -0.1, -0.25)]:
    bb, th, N, E = Trade.extr_disp_in_diffs_backbone(th=th)    
    n_nodes_edges_by_th.append((th, N, E))
pd.DataFrame(n_nodes_edges_by_th, columns=["th", "n_nodes", "n_edges"]).to_csv("./outputs/trade_disp_in_diffs_info_by_th.csv", index=False)

In [8]:
disp_in_diffs_backbone, _, _, _  = Trade.extr_disp_in_diffs_backbone(th=0.01)    

### Comparisons

#### Top countries with respect to the number of incoming edges

In [9]:
top_disp = disp_backbone.group_by("target").agg(pl.len()).sort("len", descending=True)
top_disp = top_disp.with_columns(pl.col("target").replace_strict(iso3_to_name, default="").alias("name"))
print(list(top_disp["name"]))

['United States of America', 'China', 'Germany', 'Netherlands', 'United Kingdom', 'India', 'Italy', 'France', 'United Arab Emirates', 'Spain', 'Republic of Korea', 'Japan', 'Poland', 'Singapore', 'Switzerland', 'Türkiye', 'Belgium', 'China, Hong Kong SAR', 'Russian Federation', 'Thailand', 'China, Taiwan Province of', 'Viet Nam', 'South Africa', 'Canada', 'Australia', 'Austria', 'Czechia', 'Brazil', 'Saudi Arabia', 'Hungary', 'Denmark', 'Indonesia', 'Malaysia', 'Romania', 'Sweden', 'Mexico', 'Greece', 'Serbia', 'Kazakhstan', 'Chile', 'Pakistan', 'Egypt', 'Iraq', 'Norway', 'Ukraine', 'Democratic Republic of the Congo', 'Mali', 'Guyana', 'Kenya', 'Zimbabwe', 'Bulgaria', 'Ireland', 'Jordan', 'Philippines', 'Guatemala', 'Trinidad and Tobago', 'New Zealand', 'Nicaragua', 'Portugal', 'Costa Rica', 'Uzbekistan', 'Peru', 'Oman', 'Burkina Faso', 'Finland', 'Honduras', 'Libya', 'Lebanon', 'Uganda', 'Slovenia', 'Panama', "Côte d'Ivoire", 'Argentina', 'Afghanistan', 'Croatia', 'Israel', 'Nigeria',

In [10]:
top_disp_in_diffs = disp_in_diffs_backbone.group_by("target").agg(pl.len()).sort("len", descending=True)
top_disp_in_diffs = top_disp_in_diffs.with_columns(pl.col("target").replace_strict(iso3_to_name, default="").alias("name"))
print(list(top_disp_in_diffs["name"]))

['United States of America', 'China', 'Germany', 'India', 'Netherlands', 'United Kingdom', 'Italy', 'France', 'United Arab Emirates', 'Japan', 'Spain', 'Republic of Korea', 'Switzerland', 'Poland', 'Singapore', 'Russian Federation', 'Türkiye', 'China, Hong Kong SAR', 'Canada', 'South Africa', 'Thailand', 'China, Taiwan Province of', 'Belgium', 'Saudi Arabia', 'Australia', 'Viet Nam', 'Brazil', "Côte d'Ivoire", 'Kazakhstan', 'Iraq', 'Denmark', 'Panama', 'Sweden', 'Greece', 'Romania', 'Trinidad and Tobago', 'Jordan', 'Egypt', 'Uganda', 'Austria', 'Gabon', 'Malaysia', 'New Zealand', 'Democratic Republic of the Congo', 'Chile', 'Pakistan', 'Mexico', 'Czechia', 'Guatemala', 'Mali', 'Ireland', 'Bulgaria', 'Mozambique', 'Oman', 'Qatar', 'Libya', 'Serbia', 'Indonesia', 'Norway', 'United Republic of Tanzania', 'Senegal', 'Kenya', 'Argentina', 'Peru', 'Suriname', 'Hungary', 'Guyana', 'Ukraine', 'Armenia', 'Portugal', 'Congo', 'Israel', 'Sierra Leone', 'Bahamas', 'Lebanon', 'Madagascar', 'Colombi

In [11]:
top_comparison = pd.DataFrame({
    "disp": list(top_disp["name"])[:30],
    "disp_in_diffs": list(top_disp_in_diffs["name"])[:30],
})
top_comparison.head(10)

Unnamed: 0,disp,disp_in_diffs
0,United States of America,United States of America
1,China,China
2,Germany,Germany
3,Netherlands,India
4,United Kingdom,Netherlands
5,India,United Kingdom
6,Italy,Italy
7,France,France
8,United Arab Emirates,United Arab Emirates
9,Spain,Japan


#### Mexico case

In [12]:
disp_backbone.filter((pl.col("source")=="MEX") | (pl.col("target")=="MEX")).to_pandas().sort_values(by="source")

Unnamed: 0,source,target,weight,sum_w_ij,p_ij,k_i_out,disp_alpha
1,BRA,MEX,25701566153,1128768646637,0.02277,233,0.004778601
7,CHN,MEX,275781775753,12305027286532,0.022412,212,0.00837289
0,COL,MEX,5903510232,173734098745,0.03398,198,0.001102179
6,GTM,MEX,2436617412,53411621728,0.04562,179,0.000245713
4,HKG,MEX,16298635685,713205934380,0.022853,204,0.009159951
9,HND,MEX,1444977527,33820070687,0.042725,145,0.00185891
8,MEX,CAN,88035619684,2095978504153,0.042002,181,0.0004421968
10,MEX,USA,1656354175473,2095978504153,0.790253,181,8.036758e-123
3,NIC,MEX,3094907751,27436759223,0.112802,151,1.596318e-08
5,TTO,MEX,1048686921,36189281870,0.028978,167,0.00758644


In [13]:
disp_in_diffs_backbone.filter((pl.col("source")=="MEX") | (pl.col("target")=="MEX")).to_pandas().sort_values(by="source")

Unnamed: 0,source,target,disp_in_diffs,disp_in_diffs_alpha
1,COL,MEX,0.028374,0.0016
2,GTM,MEX,0.04058,0.0
3,HND,MEX,0.040785,0.0011
0,MEX,CAN,-0.029086,0.997
5,MEX,USA,0.628676,0.0
4,NIC,MEX,-0.110992,1.0
