## Merge data

In [1]:
import pandas as pd 

file1 = pd.read_csv("refugee-data/Cleaned_people_of_concern.csv")
file2 = pd.read_csv("refugee-data/cleaned_asylum_seekers.csv")

In [2]:
results = pd.merge(file1, file2, on=['Year', 'Country / territory of asylum/residence','Origin'])

In [3]:
results

Unnamed: 0,Unnamed: 0_x,Year,Country / territory of asylum/residence,Origin,Refugees (incl. refugee-like situations),Asylum-seekers (pending cases),Returned refugees,Internally displaced persons (IDPs),Returned IDPs,Stateless persons,...,Tota pending start-year,of which UNHCR-assisted(start-year),Applied during year,decisions_recognized,decisions_other,Rejected,Otherwise closed,Total decisions,Total pending end-year,of which UNHCR-assisted(end-year)
0,1,2001,Afghanistan,Iran (Islamic Rep. of),3.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,110.0,21.0,0.0,68.0,21.0,110.0,0.0,0.0
1,2,2001,Afghanistan,Iraq,3.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,15.0,3.0,0.0,7.0,5.0,15.0,0.0,0.0
2,4,2001,Angola,Burundi,18.0,3.0,0.0,0.0,0.0,0.0,...,2.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0
3,5,2001,Angola,Cameroon,0.0,1.0,0.0,0.0,0.0,0.0,...,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
4,6,2001,Angola,Dem. Rep. of the Congo,11933.0,636.0,1.0,0.0,0.0,0.0,...,595.0,595.0,129.0,24.0,0.0,58.0,6.0,88.0,636.0,596.0
5,7,2001,Angola,Congo,51.0,227.0,0.0,0.0,0.0,0.0,...,235.0,235.0,5.0,1.0,0.0,12.0,0.0,13.0,227.0,227.0
6,8,2001,Angola,Comoros,0.0,3.0,0.0,0.0,0.0,0.0,...,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0
7,9,2001,Angola,Cuba,2.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0
8,11,2001,Angola,Guinea,0.0,2.0,0.0,0.0,0.0,0.0,...,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0
9,12,2001,Angola,Guinea-Bissau,0.0,3.0,0.0,0.0,0.0,0.0,...,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0


In [4]:
results.to_csv("refugee-data/merge_data.csv")

## Convert data, drop data

### Drop attributes

In [8]:
results.drop(['Unnamed: 0_x', 'Refugees (incl. refugee-like situations)', 'Asylum-seekers (pending cases)',
              'Returned refugees', 'Internally displaced persons (IDPs)', 'Returned IDPs', 'Stateless persons',
             'Others of concern', 'Tota pending start-year', 'of which UNHCR-assisted(start-year)', 'Applied during year',
             'Total Population', 'Unnamed: 0_y', 'Otherwise closed', 'Total decisions', 'Total pending end-year', 'of which UNHCR-assisted(end-year)'], axis = 1)

Unnamed: 0,Year,Country / territory of asylum/residence,Origin,RSD procedure type / level,decisions_recognized,decisions_other,Rejected
0,2001,Afghanistan,Iran (Islamic Rep. of),U / FI,21.0,0.0,68.0
1,2001,Afghanistan,Iraq,U / FI,3.0,0.0,7.0
2,2001,Angola,Burundi,U / FI,0.0,0.0,0.0
3,2001,Angola,Cameroon,U / FI,0.0,0.0,0.0
4,2001,Angola,Dem. Rep. of the Congo,U / FI,24.0,0.0,58.0
5,2001,Angola,Congo,U / FI,1.0,0.0,12.0
6,2001,Angola,Comoros,U / FI,0.0,0.0,0.0
7,2001,Angola,Cuba,U / FI,2.0,0.0,0.0
8,2001,Angola,Guinea,U / FI,0.0,0.0,0.0
9,2001,Angola,Guinea-Bissau,U / FI,0.0,0.0,0.0


### Convert Dimension 

In [5]:
results['acceptance_rate'] = results

ValueError: Wrong number of items passed 24, placement implies 1

## Calculate Probability

In [39]:
# Number of target country (afghanistan)
n_target_country = results['Country / territory of asylum/residence'][results['Country / territory of asylum/residence'] == 'Afghanistan'].count()
print('Number of target country:', n_target_country)

# Probability(target country)
total_target_country = len(results.index)
print('Total target country:', total_target_country)
P_afghanistan = n_target_country/total_target_country
print('Probability(target country):', P_afghanistan)

# Probability(accept)
total_decision = results['decisions_recognized'].sum() + results['decisions_other'].sum() + results['Rejected'].sum()
P_accepted = (results['decisions_recognized'].sum()) / total_decision
print('Total decision:', total_decision)
print('Probability accepted:', P_accepted)

# Probability (target country | accept) (Origin Iraq) 
origin_to_target_country_count = results['decisions_recognized'][(results['Country / territory of asylum/residence'] == 'Afghanistan') 
                                                                                    & (results['Origin'] == 'Iraq')].sum()
print('Number of accepted and from accepted from origin and target:', origin_to_target_country_count)

P_target_country_intersect_accept = origin_to_target_country_count/total_decision
print('Probability of target country intersect accept:', P_target_country_intersect_accept)

P_target_country_given_accept = P_target_country_intersect_accept / P_accepted
print('Probability (target country|accept):',P_target_country_given_accept)

# Probability (accept|target country)
P_accept_given_target_country = (P_target_country_given_accept * P_accepted) / P_afghanistan 
print('Probability (accept|target country):', P_accept_given_target_country)

Number of target country: 72
Total target country: 110712
Probability(target country): 0.0006503360069369175
Total decision: 11604360.0
Probability accepted: 0.29966228210775947
Number of accepted and from accepted from origin and target: 11.0
Probability of target country intersect accept: 9.479195750562719e-07
Probability (target country|accept): 3.163292918911287e-06
Probability (accept|target country): 0.0014575843332448608


## Create dictionary to store results 

In [31]:
target_country_dict = {}

# for i in range(len(results['Country / territory of asylum/residence'])):
for i in range(5):
#     print(results['Country / territory of asylum/residence'][i])
    target_country_dict[results['Country / territory of asylum/residence'][i]] = {'origin': results['Origin'][i]}
    print(target_country_dict)

print('final', target_country_dict)

{'Afghanistan': {'origin': 'Iran (Islamic Rep. of)'}, 'Angola': {'origin': 'Dem. Rep. of the Congo'}}
{'Afghanistan': {'origin': 'Iraq'}, 'Angola': {'origin': 'Dem. Rep. of the Congo'}}
{'Afghanistan': {'origin': 'Iraq'}, 'Angola': {'origin': 'Burundi'}}
{'Afghanistan': {'origin': 'Iraq'}, 'Angola': {'origin': 'Cameroon'}}
{'Afghanistan': {'origin': 'Iraq'}, 'Angola': {'origin': 'Dem. Rep. of the Congo'}}
final {'Afghanistan': {'origin': 'Iraq'}, 'Angola': {'origin': 'Dem. Rep. of the Congo'}}
