# International migrants and refugees

# Import Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings

# Options and Settings

In [2]:
%matplotlib inline
plt.style.use('ggplot')
plt.rcParams['figure.autolayout'] = True
plt.rcParams['font.size'] = 12
path = os.getcwd()                                         # get current working directory
warnings.simplefilter('ignore')

# Import Data

In [3]:
filepath = os.path.join('datasets', 'International Migrants and Refugees.csv')
df = pd.read_csv(filepath)

# Head and Tail

In [4]:
df

Unnamed: 0,Region/Country/Area,Unnamed: 1,Year,Series,Value,Footnotes,Source,Unnamed: 7,Unnamed: 8
0,1,"Total, all countries or areas",2005,International migrant stock: Both sexes (number),191446828.0,,"United Nations Population Division, New York, ...",,
1,1,"Total, all countries or areas",2005,International migrant stock: Both sexes (% tot...,2.9,,"United Nations Population Division, New York, ...",,
2,1,"Total, all countries or areas",2005,International migrant stock: Male (% total Pop...,3.0,,"United Nations Population Division, New York, ...",,
3,1,"Total, all countries or areas",2005,International migrant stock: Female (% total P...,2.9,,"United Nations Population Division, New York, ...",,
4,1,"Total, all countries or areas",2010,International migrant stock: Both sexes (number),220983187.0,,"United Nations Population Division, New York, ...",,
...,...,...,...,...,...,...,...,...,...
6587,716,Zimbabwe,2020,International migrant stock: Female (% total P...,2.3,Including refugees.,"United Nations Population Division, New York, ...",,
6588,716,Zimbabwe,2020,Total refugees and people in refugee-like situ...,9118.0,,United Nations High Commissioner for Refugees ...,,
6589,716,Zimbabwe,2020,"Asylum seekers, including pending cases (number)",11766.0,,United Nations High Commissioner for Refugees ...,,
6590,716,Zimbabwe,2020,Other of concern to UNHCR (number),271263.0,A study is being pursued to provide a revised ...,United Nations High Commissioner for Refugees ...,,


In [5]:
df.shape

(6592, 9)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6592 entries, 0 to 6591
Data columns (total 9 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Region/Country/Area  6592 non-null   int64  
 1   Unnamed: 1           6592 non-null   object 
 2   Year                 6592 non-null   int64  
 3   Series               6592 non-null   object 
 4   Value                6592 non-null   float64
 5   Footnotes            3856 non-null   object 
 6   Source               6592 non-null   object 
 7   Unnamed: 7           629 non-null    object 
 8   Unnamed: 8           16 non-null     object 
dtypes: float64(1), int64(2), object(6)
memory usage: 463.6+ KB


In [7]:
df.columns

Index(['Region/Country/Area', 'Unnamed: 1', 'Year', 'Series', 'Value',
       'Footnotes', 'Source', 'Unnamed: 7', 'Unnamed: 8'],
      dtype='object')

In [8]:
df.rename(columns={'Region/Country/Area': 'Code', 
                   'Unnamed: 1': 'RegionCountryArea',
                   }, inplace=True)

df

Unnamed: 0,Code,RegionCountryArea,Year,Series,Value,Footnotes,Source,Unnamed: 7,Unnamed: 8
0,1,"Total, all countries or areas",2005,International migrant stock: Both sexes (number),191446828.0,,"United Nations Population Division, New York, ...",,
1,1,"Total, all countries or areas",2005,International migrant stock: Both sexes (% tot...,2.9,,"United Nations Population Division, New York, ...",,
2,1,"Total, all countries or areas",2005,International migrant stock: Male (% total Pop...,3.0,,"United Nations Population Division, New York, ...",,
3,1,"Total, all countries or areas",2005,International migrant stock: Female (% total P...,2.9,,"United Nations Population Division, New York, ...",,
4,1,"Total, all countries or areas",2010,International migrant stock: Both sexes (number),220983187.0,,"United Nations Population Division, New York, ...",,
...,...,...,...,...,...,...,...,...,...
6587,716,Zimbabwe,2020,International migrant stock: Female (% total P...,2.3,Including refugees.,"United Nations Population Division, New York, ...",,
6588,716,Zimbabwe,2020,Total refugees and people in refugee-like situ...,9118.0,,United Nations High Commissioner for Refugees ...,,
6589,716,Zimbabwe,2020,"Asylum seekers, including pending cases (number)",11766.0,,United Nations High Commissioner for Refugees ...,,
6590,716,Zimbabwe,2020,Other of concern to UNHCR (number),271263.0,A study is being pursued to provide a revised ...,United Nations High Commissioner for Refugees ...,,


In [9]:
df1 = df[['Code', 'RegionCountryArea', 'Year', 'Series', 'Value']]
df1

Unnamed: 0,Code,RegionCountryArea,Year,Series,Value
0,1,"Total, all countries or areas",2005,International migrant stock: Both sexes (number),191446828.0
1,1,"Total, all countries or areas",2005,International migrant stock: Both sexes (% tot...,2.9
2,1,"Total, all countries or areas",2005,International migrant stock: Male (% total Pop...,3.0
3,1,"Total, all countries or areas",2005,International migrant stock: Female (% total P...,2.9
4,1,"Total, all countries or areas",2010,International migrant stock: Both sexes (number),220983187.0
...,...,...,...,...,...
6587,716,Zimbabwe,2020,International migrant stock: Female (% total P...,2.3
6588,716,Zimbabwe,2020,Total refugees and people in refugee-like situ...,9118.0
6589,716,Zimbabwe,2020,"Asylum seekers, including pending cases (number)",11766.0
6590,716,Zimbabwe,2020,Other of concern to UNHCR (number),271263.0


In [10]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6592 entries, 0 to 6591
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Code               6592 non-null   int64  
 1   RegionCountryArea  6592 non-null   object 
 2   Year               6592 non-null   int64  
 3   Series             6592 non-null   object 
 4   Value              6592 non-null   float64
dtypes: float64(1), int64(2), object(2)
memory usage: 257.6+ KB


In [11]:
df1.shape

(6592, 5)

In [12]:
df1.describe(include='object')

Unnamed: 0,RegionCountryArea,Series
count,6592,6592
unique,263,8
top,"China, Hong Kong SAR",International migrant stock: Both sexes (number)
freq,36,1045


In [13]:
df1['RegionCountryArea'].value_counts()

China, Hong Kong SAR    36
Eritrea                 35
Egypt                   35
Switzerland             35
Ghana                   34
                        ..
San Marino               8
Isle of Man              8
Tokelau                  8
Cook Islands             8
Latin America            4
Name: RegionCountryArea, Length: 263, dtype: int64

In [14]:
df1['Series'].value_counts()

International migrant stock: Both sexes (number)                 1045
International migrant stock: Both sexes (% total population)     1045
International migrant stock: Male (% total Population)            913
International migrant stock: Female (% total Population)          913
Total population of concern to UNHCR (number)                     752
Total refugees and people in refugee-like situations (number)     702
Asylum seekers, including pending cases (number)                  685
Other of concern to UNHCR (number)                                537
Name: Series, dtype: int64

In [15]:
df1['Year'].value_counts()

2020    1666
2015    1640
2010    1609
2005    1535
2019      61
2017      28
2018      27
2016      26
Name: Year, dtype: int64

In [16]:
df1['Value'].describe()

count    6.592000e+03
mean     7.122918e+05
std      7.074442e+06
min      0.000000e+00
25%      4.200000e+00
50%      6.260000e+01
75%      2.429975e+04
max      2.805981e+08
Name: Value, dtype: float64

# Missing Values Exploration

In [17]:
df1.isna().sum()

Code                 0
RegionCountryArea    0
Year                 0
Series               0
Value                0
dtype: int64

# Grouping and Aggregation

In [18]:
reg_coun_area = df1.groupby('RegionCountryArea')
reg_coun_area.head()

Unnamed: 0,Code,RegionCountryArea,Year,Series,Value
0,1,"Total, all countries or areas",2005,International migrant stock: Both sexes (number),191446828.0
1,1,"Total, all countries or areas",2005,International migrant stock: Both sexes (% tot...,2.9
2,1,"Total, all countries or areas",2005,International migrant stock: Male (% total Pop...,3.0
3,1,"Total, all countries or areas",2005,International migrant stock: Female (% total P...,2.9
4,1,"Total, all countries or areas",2010,International migrant stock: Both sexes (number),220983187.0
...,...,...,...,...,...
6561,716,Zimbabwe,2005,International migrant stock: Both sexes (number),402226.0
6562,716,Zimbabwe,2005,International migrant stock: Both sexes (% tot...,3.3
6563,716,Zimbabwe,2005,International migrant stock: Male (% total Pop...,4.0
6564,716,Zimbabwe,2005,International migrant stock: Female (% total P...,2.7


In [19]:
agg_value_stats = reg_coun_area.agg(
    agg_pct = pd.NamedAgg('Value', np.min),
    agg_mean = pd.NamedAgg('Value', np.mean),
    agg_max = pd.NamedAgg('Value', np.max),
    agg_sum = pd.NamedAgg('Value', np.sum)
)

agg_value_stats

Unnamed: 0_level_0,agg_pct,agg_mean,agg_max,agg_sum
RegionCountryArea,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Afghanistan,0.3,3.700264e+05,2802857.0,11840846.2
Africa,1.6,7.554332e+06,32536151.0,181303959.9
Albania,1.0,7.813232e+03,64739.0,242210.2
Algeria,0.5,6.004057e+04,250378.0,1681136.0
American Samoa,40.7,1.188458e+04,24233.0,95076.6
...,...,...,...,...
Western Europe,11.8,6.588340e+06,33231117.0,105413433.6
Western Sahara,0.8,1.187375e+03,5424.0,18998.0
Yemen,0.7,4.222225e+05,4010022.0,13511118.5
Zambia,0.8,4.661269e+04,252895.0,1491606.2


# Data Exploration By Region

# Africa

In [20]:
reg_coun_area.get_group('Africa').head(10)

Unnamed: 0,Code,RegionCountryArea,Year,Series,Value
24,2,Africa,2005,International migrant stock: Both sexes (number),16040087.0
25,2,Africa,2005,International migrant stock: Both sexes (% tot...,1.8
26,2,Africa,2005,International migrant stock: Male (% total Pop...,1.9
27,2,Africa,2005,International migrant stock: Female (% total P...,1.6
28,2,Africa,2010,International migrant stock: Both sexes (number),17806677.0
29,2,Africa,2010,International migrant stock: Both sexes (% tot...,1.7
30,2,Africa,2010,International migrant stock: Male (% total Pop...,1.8
31,2,Africa,2010,International migrant stock: Female (% total P...,1.6
32,2,Africa,2015,International migrant stock: Both sexes (number),22860792.0
33,2,Africa,2015,International migrant stock: Both sexes (% tot...,1.9


Northern Africa

In [21]:
reg_coun_area.get_group('Northern Africa').head(10)

Unnamed: 0,Code,RegionCountryArea,Year,Series,Value
48,15,Northern Africa,2005,International migrant stock: Both sexes (number),1749718.0
49,15,Northern Africa,2005,International migrant stock: Both sexes (% tot...,0.9
50,15,Northern Africa,2010,International migrant stock: Both sexes (number),1952040.0
51,15,Northern Africa,2010,International migrant stock: Both sexes (% tot...,1.0
52,15,Northern Africa,2015,International migrant stock: Both sexes (number),2138918.0
53,15,Northern Africa,2015,International migrant stock: Both sexes (% tot...,1.0
54,15,Northern Africa,2020,International migrant stock: Both sexes (number),3167926.0
55,15,Northern Africa,2020,International migrant stock: Both sexes (% tot...,1.3


Sub-Saharan Africa

In [22]:
reg_coun_area.get_group('Sub-Saharan Africa').head(10)

Unnamed: 0,Code,RegionCountryArea,Year,Series,Value
56,202,Sub-Saharan Africa,2005,International migrant stock: Both sexes (number),14290369.0
57,202,Sub-Saharan Africa,2005,International migrant stock: Both sexes (% tot...,2.0
58,202,Sub-Saharan Africa,2005,International migrant stock: Male (% total Pop...,2.1
59,202,Sub-Saharan Africa,2005,International migrant stock: Female (% total P...,1.8
60,202,Sub-Saharan Africa,2010,International migrant stock: Both sexes (number),15854637.0
61,202,Sub-Saharan Africa,2010,International migrant stock: Both sexes (% tot...,1.9
62,202,Sub-Saharan Africa,2010,International migrant stock: Male (% total Pop...,2.0
63,202,Sub-Saharan Africa,2010,International migrant stock: Female (% total P...,1.8
64,202,Sub-Saharan Africa,2015,International migrant stock: Both sexes (number),20721874.0
65,202,Sub-Saharan Africa,2015,International migrant stock: Both sexes (% tot...,2.2


Eastern Africa

In [23]:
reg_coun_area.get_group('Middle Africa').head(10)

Unnamed: 0,Code,RegionCountryArea,Year,Series,Value
88,17,Middle Africa,2005,International migrant stock: Both sexes (number),1961951.0
89,17,Middle Africa,2005,International migrant stock: Both sexes (% tot...,1.8
90,17,Middle Africa,2005,International migrant stock: Male (% total Pop...,1.8
91,17,Middle Africa,2005,International migrant stock: Female (% total P...,1.7
92,17,Middle Africa,2010,International migrant stock: Both sexes (number),2436802.0
93,17,Middle Africa,2010,International migrant stock: Both sexes (% tot...,1.9
94,17,Middle Africa,2010,International migrant stock: Male (% total Pop...,1.9
95,17,Middle Africa,2010,International migrant stock: Female (% total P...,1.8
96,17,Middle Africa,2015,International migrant stock: Both sexes (number),3479911.0
97,17,Middle Africa,2015,International migrant stock: Both sexes (% tot...,2.3


Middle Africa

In [24]:
reg_coun_area.get_group('Middle Africa').head(10)

Unnamed: 0,Code,RegionCountryArea,Year,Series,Value
88,17,Middle Africa,2005,International migrant stock: Both sexes (number),1961951.0
89,17,Middle Africa,2005,International migrant stock: Both sexes (% tot...,1.8
90,17,Middle Africa,2005,International migrant stock: Male (% total Pop...,1.8
91,17,Middle Africa,2005,International migrant stock: Female (% total P...,1.7
92,17,Middle Africa,2010,International migrant stock: Both sexes (number),2436802.0
93,17,Middle Africa,2010,International migrant stock: Both sexes (% tot...,1.9
94,17,Middle Africa,2010,International migrant stock: Male (% total Pop...,1.9
95,17,Middle Africa,2010,International migrant stock: Female (% total P...,1.8
96,17,Middle Africa,2015,International migrant stock: Both sexes (number),3479911.0
97,17,Middle Africa,2015,International migrant stock: Both sexes (% tot...,2.3


Southern Africa

In [25]:
reg_coun_area.get_group('Southern Africa').head(10)

Unnamed: 0,Code,RegionCountryArea,Year,Series,Value
104,18,Southern Africa,2005,International migrant stock: Both sexes (number),1582721.0
105,18,Southern Africa,2005,International migrant stock: Both sexes (% tot...,2.9
106,18,Southern Africa,2005,International migrant stock: Male (% total Pop...,3.4
107,18,Southern Africa,2005,International migrant stock: Female (% total P...,2.4
108,18,Southern Africa,2010,International migrant stock: Both sexes (number),2352541.0
109,18,Southern Africa,2010,International migrant stock: Both sexes (% tot...,4.0
110,18,Southern Africa,2010,International migrant stock: Male (% total Pop...,4.7
111,18,Southern Africa,2010,International migrant stock: Female (% total P...,3.4
112,18,Southern Africa,2015,International migrant stock: Both sexes (number),3477903.0
113,18,Southern Africa,2015,International migrant stock: Both sexes (% tot...,5.5


Western Africa

In [26]:
reg_coun_area.get_group('Western Africa').head(10)

Unnamed: 0,Code,RegionCountryArea,Year,Series,Value
120,11,Western Africa,2005,International migrant stock: Both sexes (number),5957048.0
121,11,Western Africa,2005,International migrant stock: Both sexes (% tot...,2.2
122,11,Western Africa,2005,International migrant stock: Male (% total Pop...,2.4
123,11,Western Africa,2005,International migrant stock: Female (% total P...,2.1
124,11,Western Africa,2010,International migrant stock: Both sexes (number),6321637.0
125,11,Western Africa,2010,International migrant stock: Both sexes (% tot...,2.1
126,11,Western Africa,2010,International migrant stock: Male (% total Pop...,2.2
127,11,Western Africa,2010,International migrant stock: Female (% total P...,1.9
128,11,Western Africa,2015,International migrant stock: Both sexes (number),7067138.0
129,11,Western Africa,2015,International migrant stock: Both sexes (% tot...,2.0


# Alternative Indexing

In [27]:
cols = ['Africa', 'Northern Africa', 'Sub-Saharan Africa', 'Eastern Africa', 
        'Middle Africa', 'Southern Africa', 'Western Africa', 'Northern America']

df2 = df1[df['RegionCountryArea'].isin(cols)]
df2

Unnamed: 0,Code,RegionCountryArea,Year,Series,Value
24,2,Africa,2005,International migrant stock: Both sexes (number),16040087.0
25,2,Africa,2005,International migrant stock: Both sexes (% tot...,1.8
26,2,Africa,2005,International migrant stock: Male (% total Pop...,1.9
27,2,Africa,2005,International migrant stock: Female (% total P...,1.6
28,2,Africa,2010,International migrant stock: Both sexes (number),17806677.0
...,...,...,...,...,...
154,21,Northern America,2019,Total population of concern to UNHCR (number),1233055.0
155,21,Northern America,2020,International migrant stock: Both sexes (number),58708795.0
156,21,Northern America,2020,International migrant stock: Both sexes (% tot...,15.9
157,21,Northern America,2020,International migrant stock: Male (% total Pop...,15.5


# Grouping By Region - Africa

In [28]:
df2.groupby('RegionCountryArea').head()

Unnamed: 0,Code,RegionCountryArea,Year,Series,Value
24,2,Africa,2005,International migrant stock: Both sexes (number),16040087.0
25,2,Africa,2005,International migrant stock: Both sexes (% tot...,1.8
26,2,Africa,2005,International migrant stock: Male (% total Pop...,1.9
27,2,Africa,2005,International migrant stock: Female (% total P...,1.6
28,2,Africa,2010,International migrant stock: Both sexes (number),17806677.0
48,15,Northern Africa,2005,International migrant stock: Both sexes (number),1749718.0
49,15,Northern Africa,2005,International migrant stock: Both sexes (% tot...,0.9
50,15,Northern Africa,2010,International migrant stock: Both sexes (number),1952040.0
51,15,Northern Africa,2010,International migrant stock: Both sexes (% tot...,1.0
52,15,Northern Africa,2015,International migrant stock: Both sexes (number),2138918.0
