In [11]:
import pandas as pd
import plotly.io as pio
pio.renderers.default='plotly_mimetype+notebook_connected'
import altair as alt
from countrygroups import EUROPEAN_UNION

import warnings
warnings.filterwarnings('ignore')

In [12]:
df = pd.read_csv("../data/population.csv")
df.head()

Unnamed: 0,Year,Country of origin,Country of origin (ISO),Country of asylum,Country of asylum (ISO),Refugees under UNHCR's mandate,Asylum-seekers,IDPs of concern to UNHCR,Other people in need of international protection,Stateless persons,Host Community,Others of concern
0,1951,Unknown,,Australia,AUS,180000,0,0,-,0,-,0
1,1951,Unknown,,Austria,AUT,282000,0,0,-,0,-,0
2,1951,Unknown,,Belgium,BEL,55000,0,0,-,0,-,0
3,1951,Unknown,,Canada,CAN,168511,0,0,-,0,-,0
4,1951,Unknown,,Denmark,DNK,2000,0,0,-,0,-,0


In [13]:
df = df.rename(columns={'Country of origin':'Origin', 'Country of origin (ISO)':'Origin_ISO', 'Country of asylum':'Asylum', 
                        'Country of asylum (ISO)':'Asylum_ISO', "Refugees under UNHCR's mandate":'Refugees', 
                        'Asylum-seekers':'asylum_seekers', 'IDPs of concern to UNHCR':'idps_of_concern', 
                        'Other people in need of international protection':'need_protect', 'Stateless persons':'Stateless',
                        'Host Community':'Host', 'Others of concern':'other_of_concern'})

In [14]:
df = df[df.Origin != "Unknown "]
df = df[df.Year>=2000]
# df = df[df.refugees>=100]
df = df.reset_index(drop=True)

In [15]:
df_europe = df[(df.Asylum_ISO).isin(EUROPEAN_UNION)]
df_europe = df_europe.reset_index(drop=True)
df_europe.loc[df_europe["Origin"]=="Serbia and Kosovo: S/RES/1244 (1999)", "Origin"] = "Serbia"
df_europe.loc[df_europe["Origin"]=="Russian Federation", "Origin"] = "Russia"

In [16]:
df_europe_ori_mean = df_europe.groupby(["Origin"], as_index=False).mean()
top_ori = df_europe_ori_mean.sort_values(by='Refugees', ascending=False).head(10)
df_europe_ori_year = df_europe.groupby(["Origin", "Year"], as_index=False).mean()
df_europe_ori_year = df_europe_ori_year[(df_europe_ori_year.Origin).isin(top_ori.Origin)]

In [17]:
df_europe_asy_mean = df_europe.groupby(["Asylum"], as_index=False).mean()
top_asy = df_europe_asy_mean.sort_values(by='Refugees', ascending=False).head(10)
df_europe_asy_year = df_europe.groupby(["Asylum", "Year"], as_index=False).mean()
df_europe_asy_year = df_europe_asy_year[(df_europe_asy_year.Asylum).isin(top_asy.Asylum)]

Method: Linked bar chart and line chart

The linked chart is created using Altair, looking for the countries with most refugees escaped to Europe, and European countries that offer asylum to most refugees. Selection feature is added into the charts, so with a click on the bars we can see how the number of refugees change over time.

In [18]:
selection = alt.selection_single(fields=["Origin"], name="random")
color = alt.condition(selection, "Refugees", alt.value("lightgrey"))
bar1 = (alt.Chart(df_europe_ori_mean)
        .mark_bar()
        .encode(x=alt.X("Origin:N", sort=alt.EncodingSortField(field="Refugees", op="mean", order="descending")),
                y=alt.Y("mean(Refugees):Q"),
                color=color
        )
).transform_window(
    rank='rank(Refugees)',
    sort=[alt.SortField("Refugees", order="descending")]
).transform_filter(
    (alt.datum.rank < 10)
).add_selection(selection)
bar1.title = "Countries with Most Refugees Escaped to Europe"
bar1.encoding.x.title = "Origin Country of Refugees"
bar1.encoding.y.title = "Mean Number of Refugees, by Year"


color1 = alt.condition(selection, alt.Color("Origin:N"), alt.value("white"))
opacity1 = alt.condition(selection, alt.value(1.0), alt.value(0.0))
line1 = (alt.Chart(df_europe_ori_year)
        .mark_line().encode(
                x=alt.X('Year:O'),
                y=alt.Y("Refugees:Q"),
                color=color1,
                opacity=opacity1
        )
).add_selection(selection)
line1.title = "Numbers of Refugees Escaped to Europe Each Year"
line1.encoding.x.title = "Time (Year)"
line1.encoding.y.title = "Number of Refugees"

alt.hconcat(bar1, line1).configure_axis(
    labelFontSize=12,
    titleFontSize=16
).configure_title(
    fontSize=18
).configure_legend(
    titleFontSize=14,
    labelFontSize=12
) 

Caption of Figure: This is a linked bar chart and line chart representing the top ten countries with most refugees escaped to Europe, and how the number of refugees of each countries change with time in the 21st century.

In [19]:
selection2 = alt.selection_single(fields=["Asylum"], name="random")
color2 = alt.condition(selection2, "Refugees", alt.value("lightgrey"))

bar2 = (alt.Chart(df_europe_asy_mean)
        .mark_bar()
        .encode(x=alt.X("Asylum:N", sort=alt.EncodingSortField(field="Refugees", op="mean", order="descending")),
                y=alt.Y("mean(Refugees):Q"),
                color=color2
        )
).transform_window(
    rank='rank(Refugees)',
    sort=[alt.SortField("Refugees", order="descending")]
).transform_filter(
    (alt.datum.rank < 10)
).add_selection(selection2)
bar2.title = "European Countries with Most Refugees Accepted"
bar2.encoding.x.title = "Country of Asylum"
bar2.encoding.y.title = "Mean Number of Refugees, by Year"


color3 = alt.condition(selection2, alt.Color("Asylum:N"), alt.value("lightgrey"))
opacity2 = alt.condition(selection2, alt.value(1.0), alt.value(0.0))
line2 = (alt.Chart(df_europe_asy_year)
        .mark_line().encode(
                x=alt.X('Year:O'),
                y=alt.Y("Refugees:Q"),
                color=color3,
                opacity = opacity2
        )
# ).transform_aggregate(
#     means='mean(refugees_copy)',
#     groupby=['origin_iso']
# ).transform_window(
#     rank='rank(means)',
#     sort=[alt.SortField("means", order="descending")]
# ).transform_filter(
#      (alt.datum.rank < 20)
).add_selection(selection2)
line2.title = "Numbers of Refugees Accepted by European Countries Each Year"
line2.encoding.x.title = "Time (Year)"
line2.encoding.y.title = "Number of Refugees"

alt.hconcat(bar2, line2).configure_axis(
    labelFontSize=12,
    titleFontSize=16
).configure_title(
    fontSize=18
).configure_legend(
    titleFontSize=14,
    labelFontSize=12
) 

Caption of Figure: This is a linked bar chart and line chart representing the top ten European countries offering asylum to most refugees, and how the number of refugees accepted by each countries change with time in the 21st century.

Conclusion: Countries that have most refugees escaped to Europe are Middle East countries like Syria, Iraq and Afghanistan, as well as Ukraine. Refugees from Syria and other Middle East countries were gradually increasing since 2015 and reached 40000 per year, due to the war and unstable situations in the area. Reguees from Ukraine had a major increase in 2022 only, from almost zero to nearly 140000, obviously caused by the Russia-Ukraine war happened in 2022. 

Out of the European countries who offer asylum to refugees from all over the war, Germany is the country with most refugees accepted, and the number is higher than other countries almost every year. Therefore, not only Germany os wealthy and stable enough, policies about refugees are also always generous in this country. It is followed by France and Poland, where the number of refugees escaped to Poland had an explosive growth in 2022 alone. Having the same situation in the last chart, and also being neighboring countries, it is obvious that Poland offered asylumn to a large part of Ukrainian refugees in 2022. 