In [1]:
'''
Given: Coo, Coa, dec_total
Return: dec_type
'''

'\nGiven: Coo, Coa, dec_total\nReturn: dec_type\n'

In [2]:
import requests
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [3]:
def get_data(url):
    
    category = str(url)
    response = requests.get(category)
    data = response.json()
    data_dict = data["items"]
    
    data = pd.DataFrame.from_records(data_dict)
    
    return data

In [4]:
asylum_decisions = get_data('https://api.unhcr.org/population/v1/asylum-decisions/?&yearFrom=2010&yearTo=2025&coo_all=TRUE&limit=10000000&coa_all=TRUE')

In [5]:
asylum_decisions.to_csv('asylum_decisions.csv', header=True, index=False)

In [6]:
asylum_decisions = pd.read_csv('asylum_decisions.csv')

In [7]:
asylum_decisions = asylum_decisions.dropna()
asylum_decisions[['year', 'dec_recognized', 'dec_other', 'dec_rejected', 'dec_closed', 'dec_total']] = asylum_decisions[['year', 'dec_recognized', 'dec_other', 'dec_rejected', 'dec_closed', 'dec_total']].astype(int)
asylum_decisions = asylum_decisions.drop(['dec_pc', 'coo_iso', 'coa', 'coa_iso', 'procedure_type', 'dec_level', 'dec_pc'], axis=1)
asylum_decisions = asylum_decisions[asylum_decisions.coo != 'UKN']

In [8]:
asylum_decisions["dec_rejected"] = asylum_decisions['dec_total'] - asylum_decisions['dec_recognized']

In [9]:
asylum_decisions = asylum_decisions.drop(['coo_id', 'coo', 'coa_id', 'dec_other', 'dec_closed'], axis = 1)
asylum_decisions

Unnamed: 0,year,coo_name,coa_name,dec_recognized,dec_rejected,dec_total
0,2010,Afghanistan,Australia,11,0,11
1,2010,Albania,Australia,0,11,11
2,2010,Egypt,Australia,22,30,52
3,2010,Bahrain,Australia,0,5,5
4,2010,Bangladesh,Australia,10,28,38
...,...,...,...,...,...,...
65811,2023,Iran (Islamic Rep. of),Indonesia,5,0,5
65812,2023,Sudan,Jordan,0,5,5
65813,2023,Yemen,Syrian Arab Rep.,5,0,5
65814,2023,Pakistan,Thailand,0,5,5


In [10]:
label_encoder = LabelEncoder()
asylum_decisions['coo_name'] = label_encoder.fit_transform(asylum_decisions['coo_name'])
asylum_decisions['coa_name'] = label_encoder.fit_transform(asylum_decisions['coa_name'])
asylum_decisions

Unnamed: 0,year,coo_name,coa_name,dec_recognized,dec_rejected,dec_total
0,2010,Afghanistan,Australia,11,0,11
1,2010,Albania,Australia,0,11,11
2,2010,Egypt,Australia,22,30,52
3,2010,Bahrain,Australia,0,5,5
4,2010,Bangladesh,Australia,10,28,38
...,...,...,...,...,...,...
65811,2023,Iran (Islamic Rep. of),Indonesia,5,0,5
65812,2023,Sudan,Jordan,0,5,5
65813,2023,Yemen,Syrian Arab Rep.,5,0,5
65814,2023,Pakistan,Thailand,0,5,5


In [11]:
asylum_decisions = asylum_decisions[asylum_decisions['dec_total'] > 0]

In [12]:
asylum_decisions['acceptance_rate'] = asylum_decisions['dec_recognized'] / asylum_decisions['dec_total'] 
asylum_decisions

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  asylum_decisions['acceptance_rate'] = asylum_decisions['dec_recognized'] / asylum_decisions['dec_total']


Unnamed: 0,year,coo_name,coa_name,dec_recognized,dec_rejected,dec_total,acceptance_rate
0,2010,Afghanistan,Australia,11,0,11,1.000000
1,2010,Albania,Australia,0,11,11,0.000000
2,2010,Egypt,Australia,22,30,52,0.423077
3,2010,Bahrain,Australia,0,5,5,0.000000
4,2010,Bangladesh,Australia,10,28,38,0.263158
...,...,...,...,...,...,...,...
65811,2023,Iran (Islamic Rep. of),Indonesia,5,0,5,1.000000
65812,2023,Sudan,Jordan,0,5,5,0.000000
65813,2023,Yemen,Syrian Arab Rep.,5,0,5,1.000000
65814,2023,Pakistan,Thailand,0,5,5,0.000000


In [13]:
X = asylum_decisions[['year', 'coo_name', 'coa_name']]
y = asylum_decisions['acceptance_rate']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)
rf_regressor.fit(X_train, y_train)
y_pred = rf_regressor.predict(X_test)


mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(mse)
print(r2)

ValueError: could not convert string to float: 'Guinea'

In [None]:
asylum_decisions.corr()


Unnamed: 0,year,coo_name,coa_name,dec_recognized,dec_rejected,dec_total,acceptance_rate
year,1.0,0.016919,-0.027269,0.018922,0.020052,0.024457,0.047026
coo_name,0.016919,1.0,-0.002724,-0.001217,-0.001754,-0.001979,0.034162
coa_name,-0.027269,-0.002724,1.0,0.009916,0.009093,0.01158,-0.015971
dec_recognized,0.018922,-0.001217,0.009916,1.0,0.159265,0.505588,0.122023
dec_rejected,0.020052,-0.001754,0.009093,0.159265,1.0,0.932285,-0.028081
dec_total,0.024457,-0.001979,0.01158,0.505588,0.932285,1.0,0.020169
acceptance_rate,0.047026,0.034162,-0.015971,0.122023,-0.028081,0.020169,1.0
