# Recipient Country Counts

This notebook assesses volume of refugees by recipient country based on travel duration

In [1]:
import json
import pandas as pd

## Read in Locations

In [42]:
df = pd.read_csv('inputs/locations.csv')
df.head()

Unnamed: 0,#name,region,country,latitude,longitude,location_type,conflict_date,population
0,Donetsk,Donetsk,Ukraine,48.023,37.80224,conflict_zone,0.0,1024700.0
1,Kadiyivka,Luhansk,Ukraine,48.56818,38.64352,conflict_zone,1.0,84425.0
2,Mariupol,Donetsk,Ukraine,47.09514,37.54131,conflict_zone,3.0,481626.0
3,Schastia,Luhansk,Ukraine,48.7412,39.2354,conflict_zone,3.0,11743.0
4,Uman,Cherkasy,Ukraine,48.7484,30.2218,conflict_zone,5.0,87658.0


In [51]:
conflicts = df[df['location_type']=='conflict_zone']
camps = df[df['location_type']=='camp']

In [44]:
conflicts.shape

(60, 8)

In [45]:
with open('outputs/ukraine_border_crossing_directions.json','r') as f:
    conflict_exit_routes = json.loads(f.read())
    
with open('outputs/ukraine_border_crossing_directions_transit.json','r') as f:
    conflict_exit_routes_transit = json.loads(f.read())    

In [50]:
conflict_exit_routes_transit['Chuhuiv'][0]['name']

'Yahodyn'

In [52]:
def get_exit_route(row, mode):
    if mode == 'driving':
        routes = conflict_exit_routes
    elif mode == 'transit':
        routes = conflict_exit_routes_transit
    
    try:
        dest = routes[row['#name']][0]['name']
        dest = camps[camps['#name']==dest].country.values[0]
    except:
        dest = None
    row[f'{mode}_destination'] = dest
    return row

In [53]:
conflicts = conflicts.apply(lambda row: get_exit_route(row, 'driving'), axis=1)
conflicts = conflicts.apply(lambda row: get_exit_route(row, 'transit'), axis=1)

In [54]:
def transit_mixed(row):
    if row.transit_destination == None:
        dest = row.driving_destination
    else:
        dest = row.transit_destination
    row['mixed_destination'] = dest
    return row

In [55]:
conflicts = conflicts.apply(lambda row: transit_mixed(row), axis=1)

In [56]:
conflicts.head()

Unnamed: 0,#name,region,country,latitude,longitude,location_type,conflict_date,population,driving_destination,transit_destination,mixed_destination
0,Donetsk,Donetsk,Ukraine,48.023,37.80224,conflict_zone,0.0,1024700.0,Moldova,,Moldova
1,Kadiyivka,Luhansk,Ukraine,48.56818,38.64352,conflict_zone,1.0,84425.0,Moldova,,Moldova
2,Mariupol,Donetsk,Ukraine,47.09514,37.54131,conflict_zone,3.0,481626.0,Moldova,,Moldova
3,Schastia,Luhansk,Ukraine,48.7412,39.2354,conflict_zone,3.0,11743.0,Moldova,,Moldova
4,Uman,Cherkasy,Ukraine,48.7484,30.2218,conflict_zone,5.0,87658.0,Moldova,,Moldova


In [57]:
df = pd.read_csv('inputs/unhcr_refugee_counts_4.11.21.csv')

In [58]:
df.Population = df.Population.apply(lambda x: int(x.replace(',','')))

In [59]:
df

Unnamed: 0,Location name,Source,Data date,Population
0,Poland,Government,10 Apr 2022,2622117
1,Romania,Government,10 Apr 2022,692501
2,Hungary,Government,10 Apr 2022,424367
3,Republic of Moldova,Government,10 Apr 2022,411365
4,Russian Federation,Government,9 Apr 2022,404418
5,Slovakia,Government,10 Apr 2022,317781
6,Belarus,Government,10 Apr 2022,20739


In [60]:
df[~df['Location name'].isin(['Russian Federation','Belarus'])]

Unnamed: 0,Location name,Source,Data date,Population
0,Poland,Government,10 Apr 2022,2622117
1,Romania,Government,10 Apr 2022,692501
2,Hungary,Government,10 Apr 2022,424367
3,Republic of Moldova,Government,10 Apr 2022,411365
5,Slovakia,Government,10 Apr 2022,317781


In [61]:
ref_total = df[~df['Location name'].isin(['Russian Federation','Belarus'])].Population.sum()
conflict_total_driving = int(conflicts[conflicts['driving_destination'].notnull()].population.sum())
conflict_total_transit = int(conflicts[conflicts['transit_destination'].notnull()].population.sum())
conflict_total_mixed = int(conflicts[conflicts['mixed_destination'].notnull()].population.sum())

In [62]:
conflicts['pop_adjusted_driving'] = conflicts.population * (ref_total/conflict_total_driving)
conflicts['pop_adjusted_transit'] = conflicts.population * (ref_total/conflict_total_transit)
conflicts['pop_adjusted_mixed'] = conflicts.population * (ref_total/conflict_total_mixed)

In [63]:
pd.DataFrame(conflicts.groupby(['driving_destination'])['pop_adjusted_driving'].sum()).round()

Unnamed: 0_level_0,pop_adjusted_driving
driving_destination,Unnamed: 1_level_1
Moldova,4264139.0
Poland,178004.0
Romania,25988.0


In [64]:
pd.DataFrame(conflicts.groupby(['transit_destination'])['pop_adjusted_transit'].sum()).round()

Unnamed: 0_level_0,pop_adjusted_transit
transit_destination,Unnamed: 1_level_1
Moldova,2137346.0
Poland,2330785.0


In [67]:
pd.DataFrame(conflicts.groupby(['mixed_destination'])['pop_adjusted_mixed'].sum()).round()

Unnamed: 0_level_0,pop_adjusted_mixed
mixed_destination,Unnamed: 1_level_1
Moldova,3339519.0
Poland,1102624.0
Romania,25988.0


In [68]:
ukr_model_results = pd.read_csv('outputs/ukraine_model_results.csv')

In [69]:
ukr_model_results

Unnamed: 0,country,pct_tot,predicted_shares
0,Hungary,0.094222,0.208548
1,Moldova,0.093682,0.256713
2,Poland,0.58641,0.39149
3,Romania,0.154561,0.362491
4,Slovakia,0.071125,0.356556


In [70]:
transit_res = pd.DataFrame(conflicts.groupby(['transit_destination'])['pop_adjusted_transit'].sum()).round().reset_index()

In [71]:
transit_res = transit_res.rename(columns={'transit_destination': 'country'})

In [72]:
results = ukr_model_results[['country','predicted_shares']]

In [73]:
results

Unnamed: 0,country,predicted_shares
0,Hungary,0.208548
1,Moldova,0.256713
2,Poland,0.39149
3,Romania,0.362491
4,Slovakia,0.356556


In [74]:
transit_res

Unnamed: 0,country,pop_adjusted_transit
0,Moldova,2137346.0
1,Poland,2330785.0


In [75]:
transit_res['transit_predicted_shares'] = transit_res.pop_adjusted_transit/transit_res.pop_adjusted_transit.sum()

In [76]:
transit_res

Unnamed: 0,country,pop_adjusted_transit,transit_predicted_shares
0,Moldova,2137346.0,0.478353
1,Poland,2330785.0,0.521647


In [77]:
results = pd.merge(results, transit_res, left_on='country', right_on='country', how='left')
results = results.fillna(0)

In [78]:
results = results[['country','predicted_shares','transit_predicted_shares']]

In [79]:
df_ = df[~df['Location name'].isin(['Russian Federation','Belarus'])]

In [80]:
df_ = df_.replace('Republic of Moldova','Moldova')\
    .rename(columns={'Location name': 'country', 'Population': 'refugees_actual'})[['country','refugees_actual']]

In [81]:
results = pd.merge(results,df_,how='left',left_on='country',right_on='country')

In [95]:
def naive_weighted(row, total_refugees):
    predicted = (row.predicted_shares*.5 + row.transit_predicted_shares*.5)*total_refugees
    row['refugees_predicted'] = int(predicted.round())
    return row

In [96]:
def attraction_only(row, total_refugees):
    predicted = (row.predicted_shares*total_refugees)
    row['refugees_predicted_attactions'] = int(predicted.round())
    return row

In [97]:
results

Unnamed: 0,country,predicted_shares,transit_predicted_shares,refugees_actual,refugees_predicted,refugees_predicted_attactions
0,Hungary,0.208548,0.0,424367,279545,931818
1,Moldova,0.256713,0.478353,411365,1840250,1147028
2,Poland,0.39149,0.521647,2622117,2156318,1749227
3,Romania,0.362491,0.0,692501,485897,1619657
4,Slovakia,0.356556,0.0,317781,477942,1593139


In [98]:
total_refugees = results.refugees_actual.sum()
results = results.apply(lambda row: naive_weighted(row, total_refugees), axis=1)
results = results.apply(lambda row: attraction_only(row, total_refugees), axis=1)

In [99]:
results

Unnamed: 0,country,predicted_shares,transit_predicted_shares,refugees_actual,refugees_predicted,refugees_predicted_attactions
0,Hungary,0.208548,0.0,424367,465909,931818
1,Moldova,0.256713,0.478353,411365,1642187,1147028
2,Poland,0.39149,0.521647,2622117,2040006,1749227
3,Romania,0.362491,0.0,692501,809829,1619657
4,Slovakia,0.356556,0.0,317781,796570,1593139
