In [1]:
import wbdata
import datetime
import pandas as pd
import pycountry
from pycountry_convert import country_alpha2_to_continent_code, convert_continent_code_to_continent_name

df = pd.read_csv("../data/population1.csv")
df = df.dropna(subset=['Country of origin (ISO)', 'Country of asylum (ISO)'])

# Define a function to determine the continent of each country
def iso_to_continent(country_iso):
    try:
        country = pycountry.countries.get(alpha_3=country_iso)
        country_alpha2 = country.alpha_2
        continent_code = country_alpha2_to_continent_code(country_alpha2)
        continent_name = convert_continent_code_to_continent_name(continent_code)
        return continent_name
    except (KeyError, AttributeError):
        return None
    
# Clean dataset
df['asy_continent'] = df['Country of asylum (ISO)'].apply(iso_to_continent)

df = df.dropna()
df2 =df[df["asy_continent"]=="Europe"]
df2 = df2[df2["Year"]>=2000]
df2 = df2[df2["Year"]<=2021]
df3 = df2.groupby(["Country of asylum", "Country of asylum (ISO)", "Year"]).sum("Refugees under UNHCR's mandate")
df4 = df3[df3["Refugees under UNHCR's mandate"] != 0].reset_index()

df4.to_csv('gdp.csv', index=False)


# Function to get the population, GDP, and income level for each country
for index, row in df4.iterrows():
    # print(row["Country of asylum (ISO)"])
    data_date = datetime.datetime(row["Year"], 1, 1)
    # print(row["Year"])
    gdp_data = wbdata.get_data("NY.GDP.MKTP.CD", country=row["Country of asylum (ISO)"], data_date=data_date)
    population_data = wbdata.get_data("SP.POP.TOTL", country=row["Country of asylum (ISO)"], data_date=data_date)
    if row["Year"] == 2021:
        gni_data = wbdata.get_data("NY.GNP.MKTP.PC.CD", country=row["Country of asylum (ISO)"], data_date=datetime.datetime(2020, 1, 1))
    else:
        gni_data = wbdata.get_data("NY.GNP.MKTP.PC.CD", country=row["Country of asylum (ISO)"], data_date=data_date)

    df4.at[index, "GDP"] = gdp_data[0]["value"]
    df4.at[index, "Population"] = population_data[0]["value"]

    if row["Country of asylum (ISO)"] == "GRC":
        df4.at[index, "Income level"] = "High Income"
    elif row["Country of asylum (ISO)"] == "LIE":
        df4.at[index, "Income level"] = "High Income"
    elif row["Country of asylum (ISO)"] == "MCO":
        df4.at[index, "Income level"] = "High Income"
    elif gni_data[0]["value"] > 12475:
        df4.at[index, "Income level"] = "High Income"
    elif gni_data[0]["value"] < 12475:
        df4.at[index, "Income level"] = "Middle Income"

df4.to_csv('gdp.csv', index=False)

# Plot with plotly
import plotly.express as px
df = px.data.gapminder()

fig = px.scatter(df4, x="GDP", y="Population", animation_frame="Year",animation_group="Country of asylum",
                 size="Refugees under UNHCR's mandate", color="Income level", range_x=[3000000000,8000000000000], range_y=[1000000,1000000000],
                 category_orders={"Income level": ["High Income", "Middle Income"]},
                 hover_name="Country of asylum", log_x=True, log_y=True, size_max=60)

fig.update_xaxes(title_text="GDP (USD)")
fig.update_layout(template="plotly_white",
    title=dict(
    text="Refugee Influx of European Countries by GDP and Population",
    font=dict(
    family="Arial", size=20)
    ),

    yaxis_title=dict(
    # text="Asylum Countries of Ukraine Refugees from 2014 to 2022",
    font=dict(
    family="Arial", size=18)
    ),

    xaxis_title=dict(
    # text="Asylum Countries of Ukraine Refugees from 2014 to 2022",
    font=dict(
    family="Arial", size=18)
    ))
fig.show()