In [18]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pycountry_convert as pc
from pyecharts.charts import  Sankey
from pyecharts import options as opts
import warnings
warnings.filterwarnings("ignore")

In [19]:
popu=pd.read_csv("../data/population.csv")

popu1=popu.loc[:,["Year","Country of origin (ISO)","Country of asylum (ISO)","Refugees under UNHCR's mandate"]]

popu1.columns=["year","Origin_ISO","Asylum_ISO","Number"]

# only keep observations after 2002
popu1=popu1[popu1.year>=2002]

# delete rows with missing values
popu1=popu1[-pd.isna(popu1.Origin_ISO)]
popu1=popu1[-pd.isna(popu1.Asylum_ISO)]

# delete rows of domestic refugee movement
popu1=popu1[popu1.Origin_ISO!=popu1.Asylum_ISO]

# delete rows with meaningless value
popu1=popu1[popu1.Number!=0]

In [20]:
popu1=popu1.reset_index().iloc[:,1:5]
popu1.head()

Unnamed: 0,year,Origin_ISO,Asylum_ISO,Number
0,2002,IRQ,ALB,5
1,2002,SRB,ALB,6
2,2002,TUR,ALB,7
3,2002,CMR,DZA,6
4,2002,PSE,DZA,4005


In [21]:
# define a function to get continent name for each country

def country_to_continent(country_alpha3):
    country_alpha2 = pc.country_alpha3_to_country_alpha2(country_alpha3)
    country_continent_code = pc.country_alpha2_to_continent_code(country_alpha2)
    country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)
    return country_continent_name

In [22]:
origin_continent=[]
asylum_continent=[]
for i in range(len(popu1)):
    try:
        origin_continent.append(country_to_continent(popu1.Origin_ISO[i]))
    except:
        origin_continent.append('')
    try:
        asylum_continent.append(country_to_continent(popu1.Asylum_ISO[i]))
    except:
        asylum_continent.append('')
    

In [25]:
# add columns that display continents of origin and asylum country 
popu1["Origin_continent"]=origin_continent
popu1["Asylum_continent"]=asylum_continent

In [27]:
popu1=popu1[popu1.Origin_continent!='']
popu1=popu1[popu1.Asylum_continent!='']
popu1.head()

Unnamed: 0,year,Origin_ISO,Asylum_ISO,Number,Origin_continent,Asylum_continent
0,2002,IRQ,ALB,5,Asia,Europe
1,2002,SRB,ALB,6,Europe,Europe
2,2002,TUR,ALB,7,Asia,Europe
3,2002,CMR,DZA,6,Africa,Africa
4,2002,PSE,DZA,4005,Asia,Africa


In [28]:
# group the data by origin and asylum continent
df_grouped=popu1.groupby(['Origin_continent','Asylum_continent']).sum().iloc[:,1]
print(df_grouped)


Origin_continent  Asylum_continent
Africa            Africa               84061708
                  Asia                  4097435
                  Europe                8192838
                  North America         2351434
                  Oceania                192007
                  South America           79495
Asia              Africa                3855792
                  Asia                139136469
                  Europe               22189323
                  North America         3912121
                  Oceania                877000
                  South America           81297
Europe            Africa                   1704
                  Asia                   165855
                  Europe               14604476
                  North America         1363642
                  Oceania                 57619
                  South America           11267
North America     Africa                     59
                  Asia                       62
     

In [29]:
# create nodes for the sankey plot
nodes=[{'name':'Europe(from)'},{'name':'Africa(from)'},{'name':'Asia(from)'},
       {'name':'North America(from)'},{'name':'South America(from)'},{'name':'Oceania(from)'},
       {'name':'Europe(to)'},{'name':'Africa(to)'},{'name':'Asia(to)'},
       {'name':'North America(to)'},{'name':'South America(to)'},{'name':'Oceania(to)'}]

In [30]:
# create linkes for the sankey plot of global refugee movement
linkes_all=[]
for i in range(35):
    dic={}
    dic['source']=df_grouped.index[i][0]+"(from)"
    dic['target']=df_grouped.index[i][1]+"(to)"
    dic['value']=df_grouped[i]
    linkes_all.append(dic)

linkes_all.append({'source':'Oceania(from)','target':'South America(to)','value':0})
len(linkes_all)

36

In [36]:
# Read the txt files which contain the linkes for the sankey plot of refugee flow

from pathlib import Path
txt1 = Path('./Sankey_plot_linkes/linkes_all.txt').read_text()
txt1=txt1.replace("\n","")
linkes_all=eval(txt1)

txt2 = Path('./Sankey_plot_linkes/linkes_eu.txt').read_text()
txt2=txt2.replace("\n","")
linkes_eu=eval(txt2)

In [37]:
# | label: fig-map1
# | fig-cap: Sankey plot of global refugee flow within past two decades. The nodes on the left side represent for the original continents of refugees, while nodes on the right side represent for the continents of asylum countries.
pic_all=(
    Sankey().add(
        '',
        nodes,
        linkes_all,
        # set the opacity, shape and color
        linestyle_opt=opts.LineStyleOpts(opacity=0.3,curve=0.5,color='gradient'),
        # set the labels
        label_opts=opts.LabelOpts(position='right'),
        # set the gap between nodes
        node_gap=10,
    )
    .set_global_opts(title_opts=opts.TitleOpts(title='Sankey Plot of Global Refugee Movement',pos_left = '20%'))
)
pic_all.render_notebook()


According to this sankey plot of global refugee movement, we can clearly observe that Africa, Asia and Europe have most refugee inflows and outflows in past 20 years. Also, this graph tells us that the vast majority of refugees in Asia and Africa move only within continents. However, more than half of refugees who move towards Europe come from other continents. Taking into consideration for both high proportion of refugees arriving from other continents and recent local conflict in Europe that may cause significant fluctuation of refugee movement, we will mainly focus on refugees from and to Europe in subsequent analysis.

In [35]:
# | label: fig-map2
# | fig-cap: Sankey plot of refugees from and towards Europe within past two decades. The nodes on the left side represent for the original continents of refugees, while nodes on the right side represent for the continents of asylum countries.
pic_eu=(
    Sankey().add(
        '',
        nodes,
        linkes_eu,
        # set the opacity, shape and color
        linestyle_opt=opts.LineStyleOpts(opacity=0.5,curve=0.5,color='gradient'),
        # set the labels
        label_opts=opts.LabelOpts(position='right'),
        # set the gap between nodes
        node_gap=12,
    )
    .set_global_opts(title_opts=opts.TitleOpts(title='Refugees from and to Europe',pos_left = '25%'))
)
pic_eu.render_notebook()


Unlike the former plot of global refugee flows, this graph only display refugees from and to Europe since 2002. From this plot, we notice that most of refugees that origin from Europe move to other European countries and a considerable number of refugees move to North America. By contrast, for the refugees move to Europe, almost half are from Asia, followed by Europe and Africa.