# International migrants and refugees

# Import Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings

# Options and Settings

In [2]:
%matplotlib inline
plt.style.use('ggplot')
plt.rcParams['figure.autolayout'] = True
plt.rcParams['font.size'] = 12
path = os.getcwd()                                         # get current working directory
warnings.simplefilter('ignore')

# Import Data

In [3]:
filepath = os.path.join('datasets', 'International Migrants and Refugees.csv')
df = pd.read_csv(filepath)

# Head and Tail

In [4]:
df

Unnamed: 0,Region/Country/Area,Unnamed: 1,Year,Series,Value,Footnotes,Source,Unnamed: 7,Unnamed: 8
0,1,"Total, all countries or areas",2005,International migrant stock: Both sexes (number),191446828.0,,"United Nations Population Division, New York, ...",,
1,1,"Total, all countries or areas",2005,International migrant stock: Both sexes (% tot...,2.9,,"United Nations Population Division, New York, ...",,
2,1,"Total, all countries or areas",2005,International migrant stock: Male (% total Pop...,3.0,,"United Nations Population Division, New York, ...",,
3,1,"Total, all countries or areas",2005,International migrant stock: Female (% total P...,2.9,,"United Nations Population Division, New York, ...",,
4,1,"Total, all countries or areas",2010,International migrant stock: Both sexes (number),220983187.0,,"United Nations Population Division, New York, ...",,
...,...,...,...,...,...,...,...,...,...
6587,716,Zimbabwe,2020,International migrant stock: Female (% total P...,2.3,Including refugees.,"United Nations Population Division, New York, ...",,
6588,716,Zimbabwe,2020,Total refugees and people in refugee-like situ...,9118.0,,United Nations High Commissioner for Refugees ...,,
6589,716,Zimbabwe,2020,"Asylum seekers, including pending cases (number)",11766.0,,United Nations High Commissioner for Refugees ...,,
6590,716,Zimbabwe,2020,Other of concern to UNHCR (number),271263.0,A study is being pursued to provide a revised ...,United Nations High Commissioner for Refugees ...,,


In [5]:
df.shape

(6592, 9)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6592 entries, 0 to 6591
Data columns (total 9 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Region/Country/Area  6592 non-null   int64  
 1   Unnamed: 1           6592 non-null   object 
 2   Year                 6592 non-null   int64  
 3   Series               6592 non-null   object 
 4   Value                6592 non-null   float64
 5   Footnotes            3856 non-null   object 
 6   Source               6592 non-null   object 
 7   Unnamed: 7           629 non-null    object 
 8   Unnamed: 8           16 non-null     object 
dtypes: float64(1), int64(2), object(6)
memory usage: 463.6+ KB


In [7]:
df.columns

Index(['Region/Country/Area', 'Unnamed: 1', 'Year', 'Series', 'Value',
       'Footnotes', 'Source', 'Unnamed: 7', 'Unnamed: 8'],
      dtype='object')

In [8]:
df.rename(columns={'Region/Country/Area': 'Code', 
                   'Unnamed: 1': 'RegionCountryArea',
                   }, inplace=True)

df

Unnamed: 0,Code,RegionCountryArea,Year,Series,Value,Footnotes,Source,Unnamed: 7,Unnamed: 8
0,1,"Total, all countries or areas",2005,International migrant stock: Both sexes (number),191446828.0,,"United Nations Population Division, New York, ...",,
1,1,"Total, all countries or areas",2005,International migrant stock: Both sexes (% tot...,2.9,,"United Nations Population Division, New York, ...",,
2,1,"Total, all countries or areas",2005,International migrant stock: Male (% total Pop...,3.0,,"United Nations Population Division, New York, ...",,
3,1,"Total, all countries or areas",2005,International migrant stock: Female (% total P...,2.9,,"United Nations Population Division, New York, ...",,
4,1,"Total, all countries or areas",2010,International migrant stock: Both sexes (number),220983187.0,,"United Nations Population Division, New York, ...",,
...,...,...,...,...,...,...,...,...,...
6587,716,Zimbabwe,2020,International migrant stock: Female (% total P...,2.3,Including refugees.,"United Nations Population Division, New York, ...",,
6588,716,Zimbabwe,2020,Total refugees and people in refugee-like situ...,9118.0,,United Nations High Commissioner for Refugees ...,,
6589,716,Zimbabwe,2020,"Asylum seekers, including pending cases (number)",11766.0,,United Nations High Commissioner for Refugees ...,,
6590,716,Zimbabwe,2020,Other of concern to UNHCR (number),271263.0,A study is being pursued to provide a revised ...,United Nations High Commissioner for Refugees ...,,


In [9]:
df1 = df[['Code', 'RegionCountryArea', 'Year', 'Series', 'Value']]
df1

Unnamed: 0,Code,RegionCountryArea,Year,Series,Value
0,1,"Total, all countries or areas",2005,International migrant stock: Both sexes (number),191446828.0
1,1,"Total, all countries or areas",2005,International migrant stock: Both sexes (% tot...,2.9
2,1,"Total, all countries or areas",2005,International migrant stock: Male (% total Pop...,3.0
3,1,"Total, all countries or areas",2005,International migrant stock: Female (% total P...,2.9
4,1,"Total, all countries or areas",2010,International migrant stock: Both sexes (number),220983187.0
...,...,...,...,...,...
6587,716,Zimbabwe,2020,International migrant stock: Female (% total P...,2.3
6588,716,Zimbabwe,2020,Total refugees and people in refugee-like situ...,9118.0
6589,716,Zimbabwe,2020,"Asylum seekers, including pending cases (number)",11766.0
6590,716,Zimbabwe,2020,Other of concern to UNHCR (number),271263.0


In [10]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6592 entries, 0 to 6591
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Code               6592 non-null   int64  
 1   RegionCountryArea  6592 non-null   object 
 2   Year               6592 non-null   int64  
 3   Series             6592 non-null   object 
 4   Value              6592 non-null   float64
dtypes: float64(1), int64(2), object(2)
memory usage: 257.6+ KB


In [11]:
df1.shape

(6592, 5)

In [12]:
df1.describe(include='object')

Unnamed: 0,RegionCountryArea,Series
count,6592,6592
unique,263,8
top,"China, Hong Kong SAR",International migrant stock: Both sexes (number)
freq,36,1045


In [13]:
df1['RegionCountryArea'].value_counts()

China, Hong Kong SAR    36
Eritrea                 35
Egypt                   35
Switzerland             35
Ghana                   34
                        ..
San Marino               8
Isle of Man              8
Tokelau                  8
Cook Islands             8
Latin America            4
Name: RegionCountryArea, Length: 263, dtype: int64

In [14]:
df1['Series'].value_counts()

International migrant stock: Both sexes (number)                 1045
International migrant stock: Both sexes (% total population)     1045
International migrant stock: Male (% total Population)            913
International migrant stock: Female (% total Population)          913
Total population of concern to UNHCR (number)                     752
Total refugees and people in refugee-like situations (number)     702
Asylum seekers, including pending cases (number)                  685
Other of concern to UNHCR (number)                                537
Name: Series, dtype: int64

In [15]:
df1['Year'].value_counts()

2020    1666
2015    1640
2010    1609
2005    1535
2019      61
2017      28
2018      27
2016      26
Name: Year, dtype: int64

In [16]:
df1['Value'].describe()

count    6.592000e+03
mean     7.122918e+05
std      7.074442e+06
min      0.000000e+00
25%      4.200000e+00
50%      6.260000e+01
75%      2.429975e+04
max      2.805981e+08
Name: Value, dtype: float64

# Missing Values Exploration

In [17]:
df1.isna().sum()

Code                 0
RegionCountryArea    0
Year                 0
Series               0
Value                0
dtype: int64

# Grouping and Aggregation

Filter By Region

In [18]:
regions = ['Africa', 'Northern Africa', 'Sub-Saharan Africa', 'Eastern Africa', 'Middle Africa', 'Southern Africa', 'Western Africa', 
           'Northern America', 'Latin America & the Caribbean', 'Caribbean', 'Latin America', 'Central America', 'South America', 
           'Asia', 'Central Asia', 'Eastern Asia', 'South-central Asia', 'South-eastern Asia', 'Southern Asia', 'Western Asia', 
           'Europe',  'Eastern Europe', 'Northern Europe', 'Southern Europe', 'Western Europe', 'Oceania', 'Australia and New Zealand', 
           'Melanesia']

df_reg_filt = df1[df1['RegionCountryArea'].isin(regions)]
df_reg_filt

Unnamed: 0,Code,RegionCountryArea,Year,Series,Value
24,2,Africa,2005,International migrant stock: Both sexes (number),16040087.0
25,2,Africa,2005,International migrant stock: Both sexes (% tot...,1.8
26,2,Africa,2005,International migrant stock: Male (% total Pop...,1.9
27,2,Africa,2005,International migrant stock: Female (% total P...,1.6
28,2,Africa,2010,International migrant stock: Both sexes (number),17806677.0
...,...,...,...,...,...
496,54,Melanesia,2015,International migrant stock: Female (% total P...,1.1
497,54,Melanesia,2020,International migrant stock: Both sexes (number),124073.0
498,54,Melanesia,2020,International migrant stock: Both sexes (% tot...,1.1
499,54,Melanesia,2020,International migrant stock: Male (% total Pop...,1.2


In [19]:
df_reg_filt.rename(columns={
    'RegionCountryArea': 'Region'
}, inplace=True)

df_reg_filt

Unnamed: 0,Code,Region,Year,Series,Value
24,2,Africa,2005,International migrant stock: Both sexes (number),16040087.0
25,2,Africa,2005,International migrant stock: Both sexes (% tot...,1.8
26,2,Africa,2005,International migrant stock: Male (% total Pop...,1.9
27,2,Africa,2005,International migrant stock: Female (% total P...,1.6
28,2,Africa,2010,International migrant stock: Both sexes (number),17806677.0
...,...,...,...,...,...
496,54,Melanesia,2015,International migrant stock: Female (% total P...,1.1
497,54,Melanesia,2020,International migrant stock: Both sexes (number),124073.0
498,54,Melanesia,2020,International migrant stock: Both sexes (% tot...,1.1
499,54,Melanesia,2020,International migrant stock: Male (% total Pop...,1.2


Filter International migrant stock: Both sexes (number)

In [20]:
number = ['International migrant stock: Both sexes (number)']

df_reg_filt_ims = df_reg_filt[df_reg_filt['Series'].isin(number)]
df_reg_filt_ims

Unnamed: 0,Code,Region,Year,Series,Value
24,2,Africa,2005,International migrant stock: Both sexes (number),16040087.0
28,2,Africa,2010,International migrant stock: Both sexes (number),17806677.0
32,2,Africa,2015,International migrant stock: Both sexes (number),22860792.0
40,2,Africa,2020,International migrant stock: Both sexes (number),25389464.0
48,15,Northern Africa,2005,International migrant stock: Both sexes (number),1749718.0
...,...,...,...,...,...
481,53,Australia and New Zealand,2020,International migrant stock: Both sexes (number),9067584.0
485,54,Melanesia,2005,International migrant stock: Both sexes (number),103881.0
489,54,Melanesia,2010,International migrant stock: Both sexes (number),110691.0
493,54,Melanesia,2015,International migrant stock: Both sexes (number),119343.0


Filter International migrant stock in Percentage

In [21]:
ims_pct = ['International migrant stock: Both sexes (% total population)',
           'International migrant stock: Male (% total Population)',
           'International migrant stock: Female (% total Population)']

df_reg_filt_imspct = df_reg_filt[df_reg_filt['Series'].isin(ims_pct)]
df_reg_filt_imspct

Unnamed: 0,Code,Region,Year,Series,Value
25,2,Africa,2005,International migrant stock: Both sexes (% tot...,1.8
26,2,Africa,2005,International migrant stock: Male (% total Pop...,1.9
27,2,Africa,2005,International migrant stock: Female (% total P...,1.6
29,2,Africa,2010,International migrant stock: Both sexes (% tot...,1.7
30,2,Africa,2010,International migrant stock: Male (% total Pop...,1.8
...,...,...,...,...,...
495,54,Melanesia,2015,International migrant stock: Male (% total Pop...,1.3
496,54,Melanesia,2015,International migrant stock: Female (% total P...,1.1
498,54,Melanesia,2020,International migrant stock: Both sexes (% tot...,1.1
499,54,Melanesia,2020,International migrant stock: Male (% total Pop...,1.2


Filter Total refugees and people in refugee-like situations (number)

In [22]:
tot_refug = ['Total refugees and people in refugee-like situations (number)']

df_reg_filt_totref = df_reg_filt[df_reg_filt['Series'].isin(tot_refug)]
df_reg_filt_totref

Unnamed: 0,Code,Region,Year,Series,Value
36,2,Africa,2015,Total refugees and people in refugee-like situ...,4493139.0
44,2,Africa,2020,Total refugees and people in refugee-like situ...,6999300.0
148,21,Northern America,2015,Total refugees and people in refugee-like situ...,416385.0
151,21,Northern America,2019,Total refugees and people in refugee-like situ...,420131.0
171,419,Latin America & the Caribbean,2015,Total refugees and people in refugee-like situ...,336552.0
175,419,Latin America & the Caribbean,2019,Total refugees and people in refugee-like situ...,220962.0
199,420,Latin America,2020,Total refugees and people in refugee-like situ...,283211.0
247,142,Asia,2015,Total refugees and people in refugee-like situ...,8178380.0
255,142,Asia,2020,Total refugees and people in refugee-like situ...,9904753.0
367,150,Europe,2015,Total refugees and people in refugee-like situ...,1626214.0


Filter Asylum seekers, including pending cases (number)

In [23]:
asy_seek = ['Asylum seekers, including pending cases (number)']

df_reg_filt_asyseek = df_reg_filt[df_reg_filt['Series'].isin(asy_seek)]
df_reg_filt_asyseek

Unnamed: 0,Code,Region,Year,Series,Value
37,2,Africa,2015,"Asylum seekers, including pending cases (number)",1044031.0
45,2,Africa,2020,"Asylum seekers, including pending cases (number)",617584.0
149,21,Northern America,2015,"Asylum seekers, including pending cases (number)",238989.0
152,21,Northern America,2019,"Asylum seekers, including pending cases (number)",809134.0
172,419,Latin America & the Caribbean,2015,"Asylum seekers, including pending cases (number)",37378.0
176,419,Latin America & the Caribbean,2019,"Asylum seekers, including pending cases (number)",759306.0
200,420,Latin America,2020,"Asylum seekers, including pending cases (number)",972780.0
248,142,Asia,2015,"Asylum seekers, including pending cases (number)",320437.0
256,142,Asia,2020,"Asylum seekers, including pending cases (number)",624623.0
368,150,Europe,2015,"Asylum seekers, including pending cases (number)",678737.0


Filter Other's of concern to UNHCR (number)

In [24]:
others = ['Other of concern to UNHCR (number)']

df_reg_filt_others = df_reg_filt[df_reg_filt['Series'].isin(others)]
df_reg_filt_others

Unnamed: 0,Code,Region,Year,Series,Value
38,2,Africa,2015,Other of concern to UNHCR (number),11530138.0
46,2,Africa,2020,Other of concern to UNHCR (number),24919267.0
153,21,Northern America,2019,Other of concern to UNHCR (number),3790.0
173,419,Latin America & the Caribbean,2015,Other of concern to UNHCR (number),6697290.0
177,419,Latin America & the Caribbean,2019,Other of concern to UNHCR (number),12450136.0
201,420,Latin America,2020,Other of concern to UNHCR (number),12526860.0
249,142,Asia,2015,Other of concern to UNHCR (number),19921907.0
257,142,Asia,2020,Other of concern to UNHCR (number),18096139.0
369,150,Europe,2015,Other of concern to UNHCR (number),2368815.0
377,150,Europe,2020,Other of concern to UNHCR (number),3334420.0


Filter Total population of concern to UNHCR (number)

In [25]:
tot_pop = ['Total population of concern to UNHCR (number)']

df_reg_filt_totpop = df_reg_filt[df_reg_filt['Series'].isin(tot_pop)]
df_reg_filt_totpop

Unnamed: 0,Code,Region,Year,Series,Value
39,2,Africa,2015,Total population of concern to UNHCR (number),17067308.0
47,2,Africa,2020,Total population of concern to UNHCR (number),32536151.0
150,21,Northern America,2015,Total population of concern to UNHCR (number),655374.0
154,21,Northern America,2019,Total population of concern to UNHCR (number),1233055.0
174,419,Latin America & the Caribbean,2015,Total population of concern to UNHCR (number),7071220.0
178,419,Latin America & the Caribbean,2019,Total population of concern to UNHCR (number),13430404.0
202,420,Latin America,2020,Total population of concern to UNHCR (number),13782851.0
250,142,Asia,2015,Total population of concern to UNHCR (number),28420724.0
258,142,Asia,2020,Total population of concern to UNHCR (number),28625515.0
370,150,Europe,2015,Total population of concern to UNHCR (number),4673766.0
