## Migration network completion
There are evidences that migration and trade networks are linked [1]. As we are missing many of the migration links, in this notebook we explore the possibility of completing the migration network using trande network info

[1] Sgrignoli, P., Metulini, R., Schiavo, S., Riccaboni, M., 2015. The relation between global migration and trade networks. Physica A: Statistical Mechanics and its Applications 417, 245–260. https://doi.org/10.1016/j.physa.2014.09.037



In [1]:
try:
    output_filepath
except:
    output_filepath = 's3://workspaces-clarity-mgmt-pro/jaime.oliver/misc/social_capital/data/processed/'
    input_filepath = 's3://workspaces-clarity-mgmt-pro/jaime.oliver/misc/social_capital/data/raw/'

In [2]:
%load_ext autoreload
%autoreload 2

In [209]:
import os
from pathlib import Path

import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx

from urllib.parse import urlparse
from functools import reduce

from src.utils.utils_s3 import read_s3_graphml

In [4]:
year='2015'

In [203]:
B = read_s3_graphml(os.path.join(output_filepath, year, 'B_country.graphml'))
A = read_s3_graphml(os.path.join(output_filepath, year, 'A_country.graphml'))
M = read_s3_graphml(os.path.join(output_filepath, year, 'migration_network.graphml'))

In [65]:
df_list = []

for network in A, B, M:
    link_list = [(c1, c2, w['weight']) for c1, c2, w in network.edges(data=True)]
    df_ = pd.DataFrame(link_list, columns = ['country_from', 'country_to', 'weight'])
    df_.set_index(['country_from', 'country_to'], inplace=True)
    
    df_list.append(df_)

In [78]:
df = reduce(lambda df1,df2: pd.merge(df1,df2,on=['country_from', 'country_to'], how='outer'), df_list)
df.columns = ['a_link', 'b_link', 'm_link']

In [89]:
df.describe()

Unnamed: 0,a_link,b_link,m_link
count,36099.0,36099.0,4996.0
mean,0.002662586,0.002585357,0.0002490384
std,0.0295192,0.02941462,0.001834524
min,3.494289e-11,2.453964e-10,0.0
25%,5.660713e-06,6.929959e-06,8.747368e-07
50%,2.848555e-05,2.78135e-05,8.948309e-06
75%,0.0001654927,0.0001200384,5.760262e-05
max,0.6129783,0.8945897,0.05859059


## Simple regression model

In [91]:
df_model = df[df.b_link>1.e-6]
df_model = df_model.dropna()

In [108]:
import statsmodels.api as sm
from statsmodels.regression.quantile_regression import QuantReg
    
y = df_model['m_link']
X = df_model[['a_link', 'b_link']]
X = sm.add_constant(X)

model = sm.OLS(y,X)
model = QuantReg(y,X)

results = model.fit(q=.99)

results.summary()

0,1,2,3
Dep. Variable:,m_link,Pseudo R-squared:,0.3913
Model:,QuantReg,Bandwidth:,0.0004751
Method:,Least Squares,Sparsity:,0.002838
Date:,"Tue, 03 Aug 2021",No. Observations:,4586.0
Time:,09:18:04,Df Residuals:,4583.0
,,Df Model:,2.0

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0002,4.39e-06,46.442,0.000,0.000,0.000
a_link,1.3412,0.001,1363.575,0.000,1.339,1.343
b_link,-0.0225,0.001,-20.269,0.000,-0.025,-0.020


## Dummy model
DIOC-E (release 3.0) (year 2000) data found in https://www.un.org/en/development/desa/population/migration/data/estimates2/estimates19.asp

In [206]:
class EstimatedMigrationNetwork:
    
    def __init__(self, input_filepath, output_filepath):
        self.input_filepath = input_filepath
        self.output_filepath = output_filepath
        
    def load_emigration_rates(self):

        df_emigration_rate = pd.read_csv(os.path.join(self.input_filepath,'File4_DIOC-E_3_Emigration Rates.csv'), encoding='latin-1')
        columns = ['coub', 'ERT1']
        df_emigration_rate = df_emigration_rate.loc[df_emigration_rate.sex == 'Total', columns]
        df_emigration_rate.columns = ['country', 'emigration_rate']
        df_emigration_rate['emigration_rate'] = df_emigration_rate['emigration_rate']/100
        df_emigration_rate.dropna(inplace=True)
        
        self.emigration_rate = dict(zip(df_emigration_rate.country, df_emigration_rate.emigration_rate))
        
    def load_trade_network(self, year):
        
        self.estimated_M = read_s3_graphml(os.path.join(self.output_filepath, year, 'B_country.graphml'))
        
    def estimate_emigration_rate(self):
        
        for u,v,d in self.estimated_M.edges(data=True):
            d['weight']*=self.emigration_rate.get(u, np.nan)
            if u==v:d['weight'] = 0
                
        
        
e = EstimatedMigrationNetwork(input_filepath, output_filepath)
e.load_emigration_rates()
e.load_trade_network(year = '2015')
e.estimate_emigration_rate()

In [232]:
np.sum([w['weight'] for c,w in e.estimated_M['ABW'].items()])

0.3660222825990804

In [234]:
e.estimated_M['KOR'].items()

ItemsView(AtlasView({'KOR': {'weight': 0.5674253619734483}, 'ABW': {'weight': 0.00015857971799605774}, 'AFG': {'weight': 2.4716840266398638e-05}, 'AGO': {'weight': 0.007509106418887963}, 'ALB': {'weight': 5.2451260331528324e-05}, 'AND': {'weight': 0.0007800957796924829}, 'ANT': {'weight': 0.00015428840061372143}, 'ARE': {'weight': 0.0109198596228619}, 'ARG': {'weight': 0.0003506522194906659}, 'ARM': {'weight': 7.482054036623825e-05}, 'ATG': {'weight': 0.003440652565985963}, 'AUS': {'weight': 0.005865379911781617}, 'AUT': {'weight': 0.0019314834473777066}, 'AZE': {'weight': 6.312204675588714e-05}, 'BDI': {'weight': 0.0002137868744309302}, 'BEL': {'weight': 0.002243845883918512}, 'BEN': {'weight': 5.5572941067092175e-05}, 'BFA': {'weight': 4.880415062456567e-05}, 'BGD': {'weight': 0.0008958648413963196}, 'BGR': {'weight': 0.00032984010089605505}, 'BHR': {'weight': 0.009985731741118933}, 'BHS': {'weight': 7.932481976134514e-05}, 'BIH': {'weight': 3.313264382562785e-05}, 'BLR': {'weight': 

In [221]:
e.load_trade_network(year='2015')
e.estimated_M.out_degree(weight='weight')

OutDegreeView({'ABW': 0.36602228259908043, 'AFG': 0.4796901610058105, 'AGO': 0.5050181661327501, 'ALB': 0.3455595069600028, 'AND': 0.42943926889152934, 'ANT': 0.35846421264749, 'ARE': 0.47361002117418444, 'ARG': 0.38377162234586465, 'ARM': 0.3239190291583719, 'ATG': 0.39771139037616315, 'AUS': 0.7088683424240623, 'AUT': 0.45060266541735355, 'AZE': 0.3904583414599202, 'BDI': 0.3766546023012995, 'BEL': 0.6956308433610819, 'BEN': 0.4315322726056965, 'BFA': 0.3836078905747025, 'BGD': 0.39800359855616596, 'BGR': 0.5275964412951141, 'BHR': 0.516185436517193, 'BHS': 0.33516240490557125, 'BIH': 0.3886751498062579, 'BLR': 0.2214360224144457, 'BLZ': 0.2877955286556278, 'BMU': 0.34019659144125647, 'BOL': 0.37815949355981227, 'BRA': 0.7747841839048849, 'BRB': 0.38165997060565876, 'BRN': 0.48903577769585627, 'BTN': 0.3858345471420485, 'BWA': 0.35790625563243633, 'CAF': 0.4297571751924953, 'CAN': 0.5529983530478623, 'CHE': 0.42477499383859885, 'CHL': 0.4403670386522116, 'CHN': 1.8629324256564261, 'C

In [210]:
df_list = []

for network in A, B, M, e.estimated_M:
    link_list = [(c1, c2, w['weight']) for c1, c2, w in network.edges(data=True)]
    df_ = pd.DataFrame(link_list, columns = ['country_from', 'country_to', 'weight'])
    df_.set_index(['country_from', 'country_to'], inplace=True)
    
    df_list.append(df_)
    
df = reduce(lambda df1,df2: pd.merge(df1,df2,on=['country_from', 'country_to'], how='outer'), df_list)
df.columns = ['a_link', 'b_link', 'm_link', 'm_estimated_link']

In [213]:
df.dropna().sort_values(by='m_link')

Unnamed: 0_level_0,Unnamed: 1_level_0,a_link,b_link,m_link,m_estimated_link
country_from,country_to,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
STP,EST,0.001310,0.000021,0.000000,2.672545e-06
MDV,LVA,0.000196,0.000018,0.000000,1.417323e-07
MDV,LUX,0.000133,0.000004,0.000000,2.974317e-08
CHL,CHL,0.373862,0.365353,0.000000,0.000000e+00
MDV,HUN,0.000162,0.000003,0.000000,2.116014e-08
...,...,...,...,...,...
ROU,DEU,0.017973,0.000799,0.024291,4.791110e-05
BGR,DEU,0.015497,0.000242,0.025879,2.157991e-05
WSM,NZL,0.013978,0.000056,0.031667,2.272690e-05
ALB,DEU,0.005038,0.000014,0.053787,2.799719e-06


In [214]:
df.dropna().describe(include='all')

Unnamed: 0,a_link,b_link,m_link,m_estimated_link
count,4249.0,4249.0,4249.0,4249.0
mean,0.002667199,0.001674946,0.0002029331,4.018081e-05
std,0.01717074,0.01626235,0.001629217,0.0001939442
min,7.177867e-07,2.904838e-08,0.0,0.0
25%,9.491331e-05,4.084236e-06,6.495946e-07,1.566401e-07
50%,0.000361011,2.467162e-05,7.189137e-06,1.128922e-06
75%,0.00144965,0.0002525392,4.690202e-05,8.672826e-06
max,0.4265551,0.4188109,0.05859059,0.00446572
