In [23]:
import requests
import json
import pandas as pd

from collections import Counter
import numpy as np
import plotly.express as px
import plotly as p
from sklearn.preprocessing import OneHotEncoder
import math
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import seaborn as sns

In [24]:
def api_func(year):
    api_URL= f"https://ec.europa.eu/eurostat/api/dissemination/statistics/1.0/data/migr_asydcfsta?format=JSON&time={year}&unit=PER&citizen=LI&citizen=UK_OCT&citizen=BA&citizen=ME&citizen=MD&citizen=MK&citizen=GE&citizen=AL&citizen=RS&citizen=TR&citizen=UA&citizen=XK&citizen=AD&citizen=BY&citizen=VA&citizen=MC&citizen=RU&citizen=SM&citizen=AO&citizen=CM&citizen=CF&citizen=TD&citizen=CG&citizen=CD&citizen=GQ&citizen=GA&citizen=ST&citizen=BI&citizen=KM&citizen=DJ&citizen=ER&citizen=ET&citizen=KE&citizen=MG&citizen=MW&citizen=MU&citizen=MZ&citizen=RW&citizen=SC&citizen=SO&citizen=UG&citizen=TZ&citizen=ZM&citizen=ZW&citizen=DZ&citizen=EG&citizen=LY&citizen=MA&citizen=SS&citizen=SD&citizen=TN&citizen=EH&citizen=BW&citizen=LS&citizen=NA&citizen=ZA&citizen=SZ&citizen=BJ&citizen=BF&citizen=CV&citizen=CI&citizen=GM&citizen=GH&citizen=GN&citizen=GW&citizen=LR&citizen=ML&citizen=MR&citizen=NE&citizen=NG&citizen=SN&citizen=SL&citizen=TG&citizen=AG&citizen=BS&citizen=BB&citizen=CU&citizen=DM&citizen=DO&citizen=GD&citizen=HT&citizen=JM&citizen=KN&citizen=LC&citizen=VC&citizen=TT&citizen=BZ&citizen=CR&citizen=SV&citizen=GT&citizen=HN&citizen=MX&citizen=NI&citizen=PA&citizen=AR&citizen=BO&citizen=BR&citizen=CL&citizen=CO&citizen=EC&citizen=GY&citizen=PY&citizen=PE&citizen=SR&citizen=UY&citizen=VE&citizen=KZ&citizen=KG&citizen=TJ&citizen=TM&citizen=UZ&citizen=CN&citizen=JP&citizen=MN&citizen=KP&citizen=KR&citizen=TW&citizen=AF&citizen=BD&citizen=BT&citizen=IN&citizen=IR&citizen=MV&citizen=NP&citizen=PK&citizen=LK&citizen=BN&citizen=KH&citizen=ID&citizen=LA&citizen=MY&citizen=MM&citizen=PH&citizen=SG&citizen=TH&citizen=TL&citizen=VN&citizen=AM&citizen=AZ&citizen=BH&citizen=IQ&citizen=IL&citizen=JO&citizen=KW&citizen=LB&citizen=PS&citizen=OM&citizen=QA&citizen=SA&citizen=SY&citizen=AE&citizen=YE&citizen=AU&citizen=NZ&citizen=FJ&citizen=PG&citizen=SB&citizen=VU&citizen=KI&citizen=MH&citizen=FM&citizen=NR&citizen=PW&citizen=CK&citizen=WS&citizen=TO&citizen=TV&citizen=RNC&citizen=STLS&citizen=TOTAL&citizen=UNK&sex=T&sex=M&sex=F&sex=UNK&age=TOTAL&age=Y_LT14&age=Y14-17&age=Y_LT18&age=Y18-34&age=Y35-64&age=Y_GE65&age=UNK&decision=TOTAL&decision=TOTAL_POS&decision=REJECTED&lang=en"
    response = requests.get(api_URL)
    data = response.json()

    dimensions = data['dimension']
    values = data['value']


    rows = []


    dim_names = list(dimensions.keys())


    for index, value in values.items():
        row = {}
        

        idx = int(index)
        

        temp_idx = idx
        for dim_name in reversed(dim_names):
            dim_size = len(dimensions[dim_name]['category']['index'])
            dim_idx = temp_idx % dim_size
            temp_idx = temp_idx // dim_size
            
    
            dim_keys = list(dimensions[dim_name]['category']['index'].keys())
            category_key = dim_keys[dim_idx]
            
        
            if 'label' in dimensions[dim_name]['category']:
                row[dim_name] = dimensions[dim_name]['category']['label'].get(category_key, category_key)
            else:
                row[dim_name] = category_key
        
    
        row['value'] = value
        rows.append(row)


    df = pd.DataFrame(rows)
    asylum_drop = df.drop(["unit","freq"], axis =1 )
    asylum_drop = asylum_drop[asylum_drop["value"] != 0]
    asylum_drop = asylum_drop[(asylum_drop["age"] != "Total") & (asylum_drop["sex"] != "Total") & (asylum_drop["decision"] != "Total") & (asylum_drop["geo"] != "European Union - 27 countries (from 2020)")]
    return asylum_drop

In [25]:

y2023_df = api_func("2023")
y2023_df.head()

Unnamed: 0,time,geo,decision,age,sex,citizen,value
6540,2023,Germany,Rejected,From 14 to 17 years,Females,Afghanistan,50
6543,2023,Greece,Rejected,From 14 to 17 years,Females,Afghanistan,15
6547,2023,France,Rejected,From 14 to 17 years,Females,Afghanistan,10
6560,2023,Norway,Rejected,From 14 to 17 years,Females,Afghanistan,5
6564,2023,Sweden,Rejected,From 14 to 17 years,Females,Afghanistan,5


In [26]:
df_drop = y2023_df.drop(["time", "age", "sex","citizen"], axis=1)
display(df_drop)

Unnamed: 0,geo,decision,value
6540,Germany,Rejected,50
6543,Greece,Rejected,15
6547,France,Rejected,10
6560,Norway,Rejected,5
6564,Sweden,Rejected,5
...,...,...,...
540018,Germany,Total positive decisions,5
540028,Ireland,Total positive decisions,20
540051,Germany,Rejected,10
540117,Germany,Total positive decisions,5


In [27]:
accepted = df_drop[df_drop["decision"] == "Total positive decisions"]
rejected = df_drop[df_drop["decision"] == "Rejected"]
accepted = accepted.rename(columns={"value": "accepted"})
rejected = rejected.rename(columns={"value": "rejected"})
accepted = accepted.drop(["decision"], axis =1)
rejected = rejected.drop(["decision"], axis =1)
display(accepted)
display(rejected)

Unnamed: 0,geo,accepted
6600,Austria,80
6601,Belgium,30
6603,Switzerland,105
6605,Czechia,5
6606,Germany,775
...,...,...
539830,Ireland,25
540018,Germany,5
540028,Ireland,20
540117,Germany,5


Unnamed: 0,geo,rejected
6540,Germany,50
6543,Greece,15
6547,France,10
6560,Norway,5
6564,Sweden,5
...,...,...
539655,Germany,5
539656,Denmark,5
539754,Germany,5
539952,Germany,5


In [28]:
accepted_grouped = accepted.groupby(["geo"], as_index=False).sum()
rejected_grouped = rejected.groupby(["geo"], as_index=False).sum()
merged = pd.merge(
    accepted_grouped,
    rejected_grouped,
    on=["geo"],
    how="outer"
).fillna(0)
merged["acceptance_rate"] = merged["accepted"]/(merged["accepted"]+ merged["rejected"])
display(merged)

Unnamed: 0,geo,accepted,rejected,acceptance_rate
0,Austria,67090,29405,0.695269
1,Belgium,31940,36990,0.463369
2,Bulgaria,15300,5940,0.720339
3,Croatia,85,160,0.346939
4,Cyprus,8055,15425,0.343058
5,Czechia,790,1420,0.357466
6,Denmark,2795,1175,0.70403
7,Estonia,7960,200,0.97549
8,Finland,3380,2295,0.595595
9,France,114660,215465,0.347323


In [None]:
pop_data = pd.read_csv("pop.csv", skiprows=4)
print(pop_data.head())
print(type(pop_data))
print(f"Shape: {pop_data.shape}")

                  Country Name Country Code     Indicator Name Indicator Code  \
0                        Aruba          ABW  Population, total    SP.POP.TOTL   
1  Africa Eastern and Southern          AFE  Population, total    SP.POP.TOTL   
2                  Afghanistan          AFG  Population, total    SP.POP.TOTL   
3   Africa Western and Central          AFW  Population, total    SP.POP.TOTL   
4                       Angola          AGO  Population, total    SP.POP.TOTL   

          1960         1961         1962         1963         1964  \
0      54922.0      55578.0      56320.0      57002.0      57619.0   
1  130072080.0  133534923.0  137171659.0  140945536.0  144904094.0   
2    9035043.0    9214083.0    9404406.0    9604487.0    9814318.0   
3   97630925.0   99706674.0  101854756.0  104089175.0  106388440.0   
4    5231654.0    5301583.0    5354310.0    5408320.0    5464187.0   

          1965  ...         2016         2017         2018         2019  \
0      58190.0  .

In [None]:
cleaned = pop_data.drop(["Indicator Name","Indicator Code", "Country Code"], axis=1)
cleaned.head()

Number of countries/regions: 266


In [35]:
cleaned_2023 = cleaned[['Country Name', '2023']]
print(cleaned_2023.head())

                  Country Name         2023
0                        Aruba     107359.0
1  Africa Eastern and Southern  750503764.0
2                  Afghanistan   41454761.0
3   Africa Western and Central  509398589.0
4                       Angola   36749906.0


In [None]:
final_data = pd.merge(
    merged,  
    cleaned_2023,  
    left_on='geo',  
    right_on='Country Name',  
    how='inner' 
)
final_data=final_data.drop(["Country Name","acceptance_rate"], axis =1 )
final_data.head()

Unnamed: 0,geo,accepted,rejected,2023
0,Austria,67090,29405,9131761.0
1,Belgium,31940,36990,11787423.0
2,Bulgaria,15300,5940,6446596.0
3,Croatia,85,160,3859686.0
4,Cyprus,8055,15425,1344976.0


In [43]:
final_data["normalized acceptance"] = final_data["accepted"]/final_data["2023"]
final_data["normalized rejection"] = final_data["rejected"]/final_data["2023"]
final_data.head()

Unnamed: 0,geo,accepted,rejected,2023,normalized acceptance,normalized rejection
0,Austria,67090,29405,9131761.0,0.007347,0.00322
1,Belgium,31940,36990,11787423.0,0.00271,0.003138
2,Bulgaria,15300,5940,6446596.0,0.002373,0.000921
3,Croatia,85,160,3859686.0,2.2e-05,4.1e-05
4,Cyprus,8055,15425,1344976.0,0.005989,0.011469


In [44]:
asylum_acpt_capita= final_data.drop(["accepted","rejected","2023"], axis=1)
asylum_acpt_capita.head(20)

Unnamed: 0,geo,normalized acceptance,normalized rejection
0,Austria,0.007347,0.00322
1,Belgium,0.00271,0.003138
2,Bulgaria,0.002373,0.000921
3,Croatia,2.2e-05,4.1e-05
4,Cyprus,0.005989,0.011469
5,Czechia,7.3e-05,0.000131
6,Denmark,0.00047,0.000198
7,Estonia,0.005809,0.000146
8,Finland,0.000605,0.000411
9,France,0.001679,0.003155


In [46]:
asylum_acpt_capita.to_csv('final_asylum_data.csv', index=False)