# Generate Estimated Refugee Numbers and Routes 

## 1. Data Collection

In [766]:
import json
import pandas as pd
from fuzzywuzzy import process
from sklearn.preprocessing import MinMaxScaler
from statsmodels.iolib.smpickle import load_pickle
import numpy as np


### Parameters
Set these values as parameters for the model.
Set conflict city and number of cities for conflict and camp countries


In [767]:
conflict_country= "Honduras"
percent_of_pop_leaving=.10
flight_mode="driving"
conflict_start=2021
excluded_countries=[]
added_countries=[]
number_conflict_cities=20
number_camp_cities=10

In [768]:
conflict_start=conflict_start-1

### Helper functions

In [769]:
# Helper Encoder for json
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NpEncoder, self).default(obj)

## 2. Collect relevant data 

read in country border data

In [770]:
country_border = open('../refugee_data/country_border_data.json')
countries_that_border = json.load(country_border)

get a list of touching countries

In [771]:
touching_list=[]
touching_list=countries_that_border[conflict_country]
touching_list

['El Salvador', 'Guatemala', 'Nicaragua']

remove any countries that are to be excluded.


In [772]:
indexed_list={}
for i,c in enumerate(touching_list):
    indexed_list[i]=c
    
for i,ex in enumerate(excluded_countries):
    country,value, ind=process.extractOne(ex, indexed_list)
    if value>89:
        touching_list.pop(ind)
    print(country, value, ind)
    print(touching_list)

add any countries we need to 

In [773]:
for country_v in added_countries:
    touching_list.append(country_v)
touching_list

['El Salvador', 'Guatemala', 'Nicaragua']

create a dataframe to build upon.


In [774]:
touching_df = pd.DataFrame (touching_list, columns = ['bording_countries'])
touching_df['conflict']=conflict_country

read in historic_pop

In [775]:
historic_pop=pd.read_csv('../refugee_data/historic_pop.csv')    

Get historic populations from year before conflict


In [776]:
options=historic_pop["Country Name"]
touching_df['historic_pop']=None
historic_pop_cols=historic_pop.columns

indexed_col={}
for i,c in enumerate(historic_pop_cols):
    indexed_col[i]=c

column,ratio_year, year_column_idx = process.extractOne(str(conflict_start), indexed_col)


for kk ,border in touching_df.iterrows():
    country,ratio,ind =process.extractOne(border["bording_countries"], options)
    touching_df.loc[kk,"historic_pop"]=historic_pop.at[ind,column]



Get historic population of conflict country

In [777]:
country,ratio,ind =process.extractOne(conflict_country, options)
conflict_country_historic_pop=int(historic_pop.at[ind,column])
conflict_country_historic_pop

9904608

read in historic gdp per cap data. Get data from historic gdp per cap given the year.


In [778]:
gdp_per_cap_historic=pd.read_csv("../refugee_data/gdp_per_cap.csv")

In [779]:
options=gdp_per_cap_historic["Country Name"]
touching_df['historic_gdp_per_cap']=None

historic_gdp_cols=gdp_per_cap_historic.columns
indexed_gdp_col={}
for i,c in enumerate(historic_gdp_cols):
    indexed_gdp_col[i]=c

    
column,ratio_year, year_column_idx = process.extractOne(str(conflict_start), indexed_gdp_col)

for kk ,border in touching_df.iterrows():
    country,ratio,ind =process.extractOne(border["bording_countries"], options)
    touching_df.loc[kk,"historic_gdp_per_cap"]=gdp_per_cap_historic.at[ind,column]

calculate historic gdp for later use if needed


In [780]:
touching_df['calculated_historic_gdp']=None
for kk ,row in touching_df.iterrows():
    try:
        touching_df.loc[kk,"calculated_historic_gdp"]=int(row['historic_pop'])*float(row['historic_gdp_per_cap'])
    except Exception as e:
        print(e, f'GDP per cap for {row["bording_countries"]} data was {row["historic_gdp_per_cap"]}' )

read in V-Dem data and subset the dataset to only enclude columns we care about

In [781]:
Dem=pd.read_csv("../refugee_data/country_dem.csv")
columnList=["country_name","year","v2xeg_eqdr","v2x_libdem"]
country_dem=Dem[columnList]

collect liberal democracy and access to justic for women data for each country given conflict year


In [782]:
touching_df["v2x_libdem"]=None
touching_df["v2xeg_eqdr"]=None

options=country_dem['country_name'].unique()

for kk ,row in touching_df.iterrows():
    country,ratio =process.extractOne(row["bording_countries"], options)
    lib=country_dem.loc[(country_dem["country_name"]==country) & (country_dem["year"]==int(conflict_start))]['v2x_libdem']
    eqdr=country_dem.loc[(country_dem["country_name"]==country) & (country_dem["year"]==int(conflict_start))]['v2xeg_eqdr']
    touching_df.loc[kk,"v2xeg_eqdr"]=eqdr.to_list()[0]
    touching_df.loc[kk,"v2x_libdem"]=lib.to_list()[0]

read in historic gdp from world bank not calculated manually.

In [783]:
historic_GDP=pd.read_csv('../refugee_data/GDP_historic.csv')

Get data from historic gdp per cap given the year.


In [784]:
options=historic_GDP["Country Name"]
touching_df['historic_GDP']=None
historic_GDP_cols=historic_GDP.columns
indexed_GDP_col={}
for i,c in enumerate(historic_GDP_cols):
    indexed_GDP_col[i]=c

    
column,ratio_year, year_column_idx = process.extractOne(str(conflict_start), indexed_GDP_col)

for kk ,border in touching_df.iterrows():
    country,ratio,ind =process.extractOne(border["bording_countries"], options)
    touching_df.loc[kk,"historic_GDP"]=historic_GDP.at[ind,column]

In [785]:
touching_df

Unnamed: 0,bording_countries,conflict,historic_pop,historic_gdp_per_cap,calculated_historic_gdp,v2x_libdem,v2xeg_eqdr,historic_GDP
0,El Salvador,Honduras,6486201,3798.63652082321,24638720000.000027,0.37,0.273,24638720000.0
1,Guatemala,Honduras,16858333,4603.33961670975,77604632170.58531,0.394,0.21,77604632170.5853
2,Nicaragua,Honduras,6624554,1905.26115155921,12621505382.606173,0.058,0.428,12621505382.6062


Normalize the columns we will run the model on that are not already an index. For our current model that is only historic_GDP.

In [786]:
cols_to_scale = ['historic_GDP']
touching_df=touching_df.rename(columns={'bording_countries':'country'})

In [787]:
scaler = MinMaxScaler()
for col in cols_to_scale:
    print(col)
    normed = pd.DataFrame()
    
    for y, x in touching_df.groupby('conflict'):
        print(y,x)
        norm_ = [i[0] for i in scaler.fit_transform(x[col].values.reshape(-1,1))]
        countries = x['country']
        conflict_ = x['conflict']
        res = pd.DataFrame(tuple(zip(countries,conflict_,norm_)), columns=['country','conflict',f"{col}_norm"])
        normed = normed.append(res)
    normalized_data = pd.merge(touching_df, normed, left_on=['country','conflict'], right_on=['country','conflict'], how='right')

historic_GDP
Honduras        country  conflict historic_pop historic_gdp_per_cap  \
0  El Salvador  Honduras      6486201     3798.63652082321   
1    Guatemala  Honduras     16858333     4603.33961670975   
2    Nicaragua  Honduras      6624554     1905.26115155921   

  calculated_historic_gdp v2x_libdem v2xeg_eqdr        historic_GDP  
0      24638720000.000027       0.37      0.273       24638720000.0  
1      77604632170.585327      0.394       0.21  77604632170.585297  
2      12621505382.606171      0.058      0.428  12621505382.606199  


  normed = normed.append(res)


In [788]:
normalized_data

Unnamed: 0,country,conflict,historic_pop,historic_gdp_per_cap,calculated_historic_gdp,v2x_libdem,v2xeg_eqdr,historic_GDP,historic_GDP_norm
0,El Salvador,Honduras,6486201,3798.63652082321,24638720000.000027,0.37,0.273,24638720000.0,0.184928
1,Guatemala,Honduras,16858333,4603.33961670975,77604632170.58531,0.394,0.21,77604632170.5853,1.0
2,Nicaragua,Honduras,6624554,1905.26115155921,12621505382.606173,0.058,0.428,12621505382.6062,0.0


## 3. Modeling
Load the prebuild model 

In [789]:
new_results = load_pickle("../refugee_model_results.pickle")

set independant variables 


In [790]:
features_cols = [
                    'historic_GDP_norm', 
                    'v2x_libdem',
]
features_normalized = normalized_data[features_cols]
normalized_data[['country',"conflict"]+features_cols]

Unnamed: 0,country,conflict,historic_GDP_norm,v2x_libdem
0,El Salvador,Honduras,0.184928,0.37
1,Guatemala,Honduras,1.0,0.394
2,Nicaragua,Honduras,0.0,0.058


Currently if a country does not have data for a column we are dropping that country. If you need to not drop a country you need to fill in the missing data manually or some other method.

In [791]:
normalized_data=normalized_data.dropna()

Select only features that are needed and use the model to predict 


In [792]:
features_to_predict=normalized_data[features_cols]
shares = new_results.predict(features_to_predict)
normalized_data['predicted_shares'] = shares

save results to a csv file for next step.

In [793]:
border_countries_results = normalized_data[['country',"conflict","historic_pop","historic_GDP_norm","v2x_libdem",'predicted_shares']]
border_countries_results.to_csv(f'outputs/{conflict_country}_{flight_mode}_output_results.csv',index=False)

In [794]:
border_countries_results

Unnamed: 0,country,conflict,historic_pop,historic_GDP_norm,v2x_libdem,predicted_shares
0,El Salvador,Honduras,6486201,0.184928,0.37,0.192316
1,Guatemala,Honduras,16858333,1.0,0.394,0.393016
2,Nicaragua,Honduras,6624554,0.0,0.058,0.023352


# 4. Find Border crossing for haven countries

This will look up boarder crossings from a dataset we found on wikipeadia. If you don't want to collect border crossing manuall use this code.

In [795]:
with open('../refugee_data/border_crossing_geopoints.json') as json_file:
    border_geo = json.load(json_file)


In [796]:
border_geo["Honduras"]

[{'crossing_name': 'Virginia', 'latitude': '14.0167', 'longitude': '-88.5667'}]

In [797]:
array_of_crossings_df = pd.DataFrame(columns=['country','crossing_name','latitude','longitude'])
array_of_crossings_df
#haven countries
for kk,border in border_countries_results.iterrows():
    print(border['country'])
    for crossing in border_geo[border['country']]:
        array_of_crossings_df.loc[len(array_of_crossings_df.index)] = [border['country'],crossing['crossing_name'], crossing['latitude'],crossing['longitude']]
# conflict country
for crossing in border_geo[conflict_country]:
    array_of_crossings_df.loc[len(array_of_crossings_df.index)] = [conflict_country,crossing['crossing_name'], crossing['latitude'],crossing['longitude']]

El Salvador
Guatemala
Nicaragua


In [798]:
array_of_crossings_df

Unnamed: 0,country,crossing_name,latitude,longitude
0,El Salvador,Citalá,14.366666666,-89.216666666
1,El Salvador,San Lorenzo,14.033333333,-89.783333333
2,Guatemala,Esquipulas,14.616666666,-89.2
3,Guatemala,Melchor de Mencos,17.06666667,-89.15
4,Guatemala,La Mesilla,15.616666666,-91.983333333
5,Guatemala,Ayutla,14.683333333,-92.133333333
6,Nicaragua,San Ramón,12.916666666,-85.833333333
7,Nicaragua,El Castillo,11.01777778,-84.40111111
8,Nicaragua,Waspam,14.740833333,-83.971944444
9,Nicaragua,El Castillo,11.01777778,-84.40111111


If you want to upload manually generated border crossing location do that here. 

In [799]:
array_of_crossings_df=pd.read_csv('../refugee_data/manual_honduras_crossings.csv')

In [800]:
array_of_crossings_df

Unnamed: 0,country,crossing_name,longitude,latitude
0,Guatemala,Corinto,-88.372008,15.5956
1,Guatemala,Aldea Caparja,-89.2262,14.8574
2,Guatemala,Aguas Calientes,-89.28317,14.531062
3,El Salvador,Citala,-89.209463,14.371879
4,El Salvador,El Amatilo,-87.773412,13.59183
5,Nicaragua,Aduana El Espino,-86.724237,13.445553
6,Nicaragua,Frontera Las Manos,-86.570047,13.792918


## 4. Find Largest Cities in Conflict and Destination Countries

In [801]:
from functools import partial
import geopandas as gpd
import pandas as pd
import math
import numpy as np
import shapely
from shapely.geometry import Point
import googlemaps
import pgeocode
import pyproj
from fuzzywuzzy import process


Read in cities data with all cities with population over 15000 people. Then subset columns 

In [802]:
CITY_FILE = "../refugee_data/cities15000.txt"
city_df = pd.read_csv(
    CITY_FILE, 
    sep="\t", 
    header=0,
    names=[
         "geonameid", 
 "name", 
 "asciiname", 
 "alternatenames", 
 "latitude", 
 "longitude", 
 "feature class", 
 "feature code", 
 "country code", 
 "cc2", 
 "admin1 code", 
 "admin2 code", 
 "admin3 code", 
 "admin4 code", 
 "population", 
 "elevation", 
 "dem", 
 "timezone", 
 "modification date", 

    ]
)
#subset_cols=["name","latitude","longitude", "country code", "population"]
#city_df=city_df[subset_cols]

In [803]:
#city_df[city_df["country code"]=="RO"].sort_values("population",ascending=False).head(10)

In [804]:
subset_cols=["name","latitude","longitude", "country code", "population"]
city_df=city_df[subset_cols]

Read in the results from the collect_to_normalized notebook that we saved in the output folder

In [805]:
border_countries_results

Unnamed: 0,country,conflict,historic_pop,historic_GDP_norm,v2x_libdem,predicted_shares
0,El Salvador,Honduras,6486201,0.184928,0.37,0.192316
1,Guatemala,Honduras,16858333,1.0,0.394,0.393016
2,Nicaragua,Honduras,6624554,0.0,0.058,0.023352


Read in county codes data so we merge the two data sources 

In [806]:
codes=  pd.read_csv("../refugee_data/wikipedia-iso-country-codes.csv")

In [807]:
codes.head()

Unnamed: 0,English short name lower case,Alpha-2 code,Alpha-3 code,Numeric code,ISO 3166-2
0,Afghanistan,AF,AFG,4,ISO 3166-2:AF
1,Åland Islands,AX,ALA,248,ISO 3166-2:AX
2,Albania,AL,ALB,8,ISO 3166-2:AL
3,Algeria,DZ,DZA,12,ISO 3166-2:DZ
4,American Samoa,AS,ASM,16,ISO 3166-2:AS


Add a column for country_code to our border_countries dataframe

In [808]:
options=codes["English short name lower case"]

for kk ,border in border_countries_results.iterrows():
    country,ratio,ind =process.extractOne(border["country"], options)
    border_countries_results.loc[kk,"country_code"]=codes.at[ind,"Alpha-2 code"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  border_countries_results.loc[kk,"country_code"]=codes.at[ind,"Alpha-2 code"]


We need the code for the conflict country as well.

In [809]:
country,ratio,ind =process.extractOne(border_countries_results["conflict"][0], options)
conflict_code=codes.at[ind,"Alpha-2 code"]
conflict_code

'HN'

Collect largest n cities in conflict country. This can be changed by the parameter number_conflict_cities. We also will add two column for country and location_type for future use. 

In [810]:
filtered_df=city_df[city_df["country code"]==conflict_code]
filtered_df=filtered_df.sort_values(by="population", ascending=False)
largest_conflict_cities=filtered_df[0:number_conflict_cities]
largest_conflict_cities['country']=conflict_country
largest_conflict_cities['location_type']="conflict_zone"
largest_conflict_cities

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  largest_conflict_cities['country']=conflict_country
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  largest_conflict_cities['location_type']="conflict_zone"


Unnamed: 0,name,latitude,longitude,country code,population,country,location_type
9716,Tegucigalpa,14.0818,-87.20681,HN,850848,Honduras,conflict_zone
9720,San Pedro Sula,15.50417,-88.025,HN,489466,Honduras,conflict_zone
9734,Choloma,15.61444,-87.95302,HN,139100,Honduras,conflict_zone
9726,La Ceiba,15.75971,-86.78221,HN,130218,Honduras,conflict_zone
9728,El Progreso,15.4,-87.8,HN,100810,Honduras,conflict_zone
9733,Ciudad Choluteca,13.30028,-87.19083,HN,75872,Honduras,conflict_zone
9731,Comayagua,14.45139,-87.6375,HN,58784,Honduras,conflict_zone
9711,Puerto Cortez,15.82562,-87.92968,HN,48013,Honduras,conflict_zone
9725,La Lima,15.43333,-87.91667,HN,45955,Honduras,conflict_zone
9730,Danlí,14.03333,-86.58333,HN,44799,Honduras,conflict_zone


Do the same for largest camp cities for each bordering country


In [811]:
largest_camp_cities = pd.DataFrame(columns = city_df.columns)

for kk ,border in border_countries_results.iterrows():
    filtered_df=city_df[city_df["country code"]==border['country_code']]
    filtered_df['country']=border['country']
    filtered_df=filtered_df.sort_values(by="population", ascending=False)
    largest_camp_cities_f=filtered_df[0:number_camp_cities]
    largest_camp_cities=largest_camp_cities.append(largest_camp_cities_f)
largest_camp_cities["location_type"]='camp'
largest_camp_cities

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['country']=border['country']
  largest_camp_cities=largest_camp_cities.append(largest_camp_cities_f)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['country']=border['country']
  largest_camp_cities=largest_camp_cities.append(largest_camp_cities_f)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-

Unnamed: 0,name,latitude,longitude,country code,population,country,location_type
20274,San Salvador,13.68935,-89.18718,SV,525990,El Salvador,camp
20267,Soyapango,13.71024,-89.13989,SV,329708,El Salvador,camp
20273,Santa Ana,13.99417,-89.55972,SV,176661,El Salvador,camp
20276,San Miguel,13.48333,-88.18333,SV,161880,El Salvador,camp
20284,Mejicanos,13.72397,-89.188,SV,160317,El Salvador,camp
20282,Santa Tecla,13.67694,-89.27972,SV,124694,El Salvador,camp
20295,Apopa,13.80722,-89.17917,SV,112158,El Salvador,camp
20289,Delgado,13.72171,-89.16874,SV,71594,El Salvador,camp
20269,Sonsonate,13.71889,-89.72417,SV,59468,El Salvador,camp
20278,San Marcos,13.66108,-89.18252,SV,54615,El Salvador,camp


Merge and update column names

In [812]:
for kk ,conflict_city in largest_conflict_cities.iterrows():
    largest_camp_cities=largest_camp_cities.append(conflict_city)
locations=largest_camp_cities.rename(columns={"name":"#name"})

  largest_camp_cities=largest_camp_cities.append(conflict_city)
  largest_camp_cities=largest_camp_cities.append(conflict_city)
  largest_camp_cities=largest_camp_cities.append(conflict_city)
  largest_camp_cities=largest_camp_cities.append(conflict_city)
  largest_camp_cities=largest_camp_cities.append(conflict_city)
  largest_camp_cities=largest_camp_cities.append(conflict_city)
  largest_camp_cities=largest_camp_cities.append(conflict_city)
  largest_camp_cities=largest_camp_cities.append(conflict_city)
  largest_camp_cities=largest_camp_cities.append(conflict_city)
  largest_camp_cities=largest_camp_cities.append(conflict_city)
  largest_camp_cities=largest_camp_cities.append(conflict_city)
  largest_camp_cities=largest_camp_cities.append(conflict_city)
  largest_camp_cities=largest_camp_cities.append(conflict_city)
  largest_camp_cities=largest_camp_cities.append(conflict_city)
  largest_camp_cities=largest_camp_cities.append(conflict_city)
  largest_camp_cities=largest_camp_citie

In [813]:
locations.head(50)

Unnamed: 0,#name,latitude,longitude,country code,population,country,location_type
20274,San Salvador,13.68935,-89.18718,SV,525990,El Salvador,camp
20267,Soyapango,13.71024,-89.13989,SV,329708,El Salvador,camp
20273,Santa Ana,13.99417,-89.55972,SV,176661,El Salvador,camp
20276,San Miguel,13.48333,-88.18333,SV,161880,El Salvador,camp
20284,Mejicanos,13.72397,-89.188,SV,160317,El Salvador,camp
20282,Santa Tecla,13.67694,-89.27972,SV,124694,El Salvador,camp
20295,Apopa,13.80722,-89.17917,SV,112158,El Salvador,camp
20289,Delgado,13.72171,-89.16874,SV,71594,El Salvador,camp
20269,Sonsonate,13.71889,-89.72417,SV,59468,El Salvador,camp
20278,San Marcos,13.66108,-89.18252,SV,54615,El Salvador,camp


In [814]:
city_df

Unnamed: 0,name,latitude,longitude,country code,population
0,Andorra la Vella,42.50779,1.52109,AD,20430
1,Umm Al Quwain City,25.56473,55.55517,AE,62747
2,Ras Al Khaimah City,25.78953,55.94320,AE,351943
3,Zayed City,23.65416,53.70522,AE,63482
4,Khawr Fakkān,25.33132,56.34199,AE,40677
...,...,...,...,...,...
25688,Bulawayo,-20.15000,28.58333,ZW,699385
25689,Bindura,-17.30192,31.33056,ZW,37423
25690,Beitbridge,-22.21667,30.00000,ZW,26459
25691,Epworth,-17.89000,31.14750,ZW,123250


save to inputs folder with unique file name

In [815]:
locations.to_csv(f'inputs/{conflict_country}_{flight_mode}_locations.csv',index=False)

## 5.  Route Generation

In [816]:
import json
import time
import requests

# google libraries
import googlemaps
import polyline

# mapping and shape utils
import folium
from folium import plugins

# data processing
import pandas as pd
import geopandas as gpd

import math

First, you need to enable the Google Directions API.

In [817]:
gmaps = googlemaps.Client(key='AIzaSyAPHPIx3pHfUVoCYZRVaZYy2l5WRowVitA')

Add custom basemaps to folium

In [818]:
basemaps = {
    'Google Maps': folium.TileLayer(
        tiles = 'https://mt1.google.com/vt/lyrs=m&x={x}&y={y}&z={z}',
        attr = 'Google',
        name = 'Google Maps',
        overlay = True,
        control = True
    ),
    'Google Satellite': folium.TileLayer(
        tiles = 'https://mt1.google.com/vt/lyrs=s&x={x}&y={y}&z={z}',
        attr = 'Google',
        name = 'Google Satellite',
        overlay = True,
        control = True
    ),
    'Google Terrain': folium.TileLayer(
        tiles = 'https://mt1.google.com/vt/lyrs=p&x={x}&y={y}&z={z}',
        attr = 'Google',
        name = 'Google Terrain',
        overlay = True,
        control = True
    ),
    'Google Satellite Hybrid': folium.TileLayer(
        tiles = 'https://mt1.google.com/vt/lyrs=y&x={x}&y={y}&z={z}',
        attr = 'Google',
        name = 'Google Satellite',
        overlay = True,
        control = True
    ),
    'Esri Satellite': folium.TileLayer(
        tiles = 'https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}',
        attr = 'Esri',
        name = 'Esri Satellite',
        overlay = True,
        control = True
    )
}

 use locations generated above

In [819]:
locations.head()

Unnamed: 0,#name,latitude,longitude,country code,population,country,location_type
20274,San Salvador,13.68935,-89.18718,SV,525990,El Salvador,camp
20267,Soyapango,13.71024,-89.13989,SV,329708,El Salvador,camp
20273,Santa Ana,13.99417,-89.55972,SV,176661,El Salvador,camp
20276,San Miguel,13.48333,-88.18333,SV,161880,El Salvador,camp
20284,Mejicanos,13.72397,-89.188,SV,160317,El Salvador,camp


In [820]:
conflicts = locations[locations["location_type"]=="conflict_zone"]

# set camps to crossing data or if you want to use largest cities use commented out code 
# camps = locations[locations["location_type"]=="camp"]

camps=array_of_crossings_df

In [821]:
attractions = border_countries_results.copy()

In [822]:
attractions

Unnamed: 0,country,conflict,historic_pop,historic_GDP_norm,v2x_libdem,predicted_shares,country_code
0,El Salvador,Honduras,6486201,0.184928,0.37,0.192316,SV
1,Guatemala,Honduras,16858333,1.0,0.394,0.393016,GT
2,Nicaragua,Honduras,6624554,0.0,0.058,0.023352,NI


In [823]:
camps

Unnamed: 0,country,crossing_name,longitude,latitude
0,Guatemala,Corinto,-88.372008,15.5956
1,Guatemala,Aldea Caparja,-89.2262,14.8574
2,Guatemala,Aguas Calientes,-89.28317,14.531062
3,El Salvador,Citala,-89.209463,14.371879
4,El Salvador,El Amatilo,-87.773412,13.59183
5,Nicaragua,Aduana El Espino,-86.724237,13.445553
6,Nicaragua,Frontera Las Manos,-86.570047,13.792918


In [824]:
def get_closest(loc_lat, loc_lon, targets, mode):
    chunk_size = 25
    list_targets = [targets[i:i+chunk_size] for i in range(0,targets.shape[0],chunk_size)]
    output = None
    closest_seconds = 100000000000
    closest_loc = None
    for i in list_targets:
        results = gmaps.distance_matrix(origins=[(loc_lat, loc_lon)],
                                        destinations=list(tuple(zip(i.latitude, i.longitude))), mode=mode)
        for idx, val in enumerate(results["rows"][0]["elements"]):
            if val["status"] == "ZERO_RESULTS":
                continue
            attraction = attractions[attractions["country"] == i.iloc[idx]["country"]].predicted_shares.iloc[0]
            seconds = val["duration"]["value"]*(1/math.sqrt(attraction))
            if seconds <= closest_seconds:
                closest_seconds = seconds
                closest_loc = i.iloc[idx]
                output = val
    return closest_loc, output

Get conflict exit routes from each conflict city to the closest haven city. 

In [825]:
conflict_exit_routes = {}
NoneType = type(None)
for kk, conflict in conflicts.iterrows():

    closest_crossing, crossing_val = get_closest(conflict.latitude, conflict.longitude, camps, flight_mode)
    if isinstance(closest_crossing, type(None)):
        print(f'{conflict["#name"]} No routes found')
    conflict_exit_routes[conflict["#name"]] = dict(crossing=closest_crossing, 
                                                   crossing_v=crossing_val)

for kk, vv in conflict_exit_routes.items():
    if not isinstance(vv['crossing'], type(None)):
        vv['crossing'] = dict(vv['crossing'])

with open(f'outputs/{conflict_country}_exit_routes_{flight_mode}.json','w') as f:
    f.write(json.dumps(conflict_exit_routes, cls=NpEncoder))

 Directions and Plotting. 
 Calculate all directions from each conflict city to each haven city

In [826]:
all_directions = {}
for kk, conflict in conflicts.iterrows():
    conflict_name = conflict['#name']
    print(f"Getting directions for conflict: {conflict_name}")

    if conflict_name in conflict_exit_routes:
        try:
            xing = conflict_exit_routes[conflict_name]['crossing']
            try:
                directions_result = gmaps.directions((conflict.latitude, conflict.longitude),
                                                 (xing['latitude'], xing['longitude']),
                                                 mode=flight_mode)
                directions_result[0]['name']=xing['crossing_name']
                directions_result[0]['country']=xing['country']
            except Exception as e:
                print(e)
                directions_result = None
            all_directions[conflict_name] = directions_result
        except Exception as e:
            print(e)


Getting directions for conflict: Tegucigalpa
Getting directions for conflict: San Pedro Sula
Getting directions for conflict: Choloma
Getting directions for conflict: La Ceiba
Getting directions for conflict: El Progreso
Getting directions for conflict: Ciudad Choluteca
Getting directions for conflict: Comayagua
Getting directions for conflict: Puerto Cortez
Getting directions for conflict: La Lima
Getting directions for conflict: Danlí
Getting directions for conflict: Siguatepeque
Getting directions for conflict: Juticalpa
Getting directions for conflict: Villanueva
Getting directions for conflict: Tocoa
Getting directions for conflict: Tela
Getting directions for conflict: Santa Rosa de Copán
Getting directions for conflict: Olanchito
Getting directions for conflict: San Lorenzo
Getting directions for conflict: Cofradía
Getting directions for conflict: El Paraíso


In [827]:
with open(f'outputs/{conflict_country}_border_crossing_directions_{flight_mode}.json','w') as f:
    f.write(json.dumps(all_directions))
 

## 6. Mapping

Calculate population percentiles of conflict cities and assign them a stroke value

In [828]:
locations=pd.read_csv(f'inputs/{conflict_country}_{flight_mode}_locations.csv')

In [829]:
conflicts = locations[locations["location_type"]=="conflict_zone"]


In [830]:
c_desc = conflicts.population.describe()

In [831]:
def bucket_population(population):
    if population <= c_desc['25%']:
        stroke = 2.5
    elif population <= c_desc['50%']:
        stroke = 5
    elif population <= c_desc['75%']:
        stroke = 7.5
    else:
        stroke = 10
    return stroke
        

In [832]:
conflicts['stroke'] = conflicts['population'].apply(lambda x: bucket_population(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  conflicts['stroke'] = conflicts['population'].apply(lambda x: bucket_population(x))


In [833]:
# Create Map
map = folium.Map(location=[conflicts.latitude.mean(),conflicts.longitude.mean()], zoom_start=6)

# Plot conflict starting points
for kk, start in conflicts.iterrows():
    start_m = folium.Marker([start.latitude, start.longitude], popup=start['#name'], 
                            icon=folium.Icon(icon='glyphicon glyphicon-fire', color='darkred'))
    start_m.add_to(map)
    
# Plot ending locations
for kk, vv in camps.iterrows():
    icon = 'glyphicon glyphicon-flag'
    color = 'orange'
    popup_text = f"<b>City Name: </b>{vv['crossing_name']}<br>"\
                 f"<b>Country: </b>{vv['country']}"
    popup = folium.Popup(popup_text, max_width=300,min_width=150)
    xing = folium.Marker([vv.latitude, vv.longitude], popup=popup, 
                          icon=folium.Icon(icon=icon, color=color))
    xing.add_to(map)

# plot exit routes (driving)
if "driving" in flight_mode:
    fg_d = folium.FeatureGroup("Driving")
    for kk, vv in all_directions.items():
        stroke = int(conflicts[conflicts['#name']==kk]['stroke'])
        population = "{:,}".format(int(conflicts[conflicts['#name']==kk]['population']))
        directions = all_directions[kk]
        if not isinstance(directions, type(None)):
            distance = directions[0]['legs'][0]['distance']['text']
            duration = directions[0]['legs'][0]['duration']['text']
            end_location = directions[0]['name']
            end_country = camps[camps['crossing_name']==end_location].country.values[0]
            tooltip = f"Travel between <b>{kk}</b> and <b>{end_location}, {end_country}</b> by car is <b>"\
                      f"{distance}</b> and takes <b>{duration}</b>.</br></br>"\
                      f"<b>{population}</b> people are effected by this conflict."
            polyline_ = polyline.decode(directions[0]['overview_polyline']['points'])
            polyline_m = folium.PolyLine(polyline_, color='#4A89F3', tooltip=tooltip, weight=stroke)
            polyline_m.add_to(fg_d)
    fg_d.add_to(map)
        
# plot exit routes (transit)
if "transit" in flight_mode:
    fg_t = folium.FeatureGroup("Transit")
    for kk, vv in all_directions.items():
        stroke = int(conflicts[conflicts['#name']==kk]['stroke'])
        population = "{:,}".format(int(conflicts[conflicts['#name']==kk]['population']))
        directions = all_directions[kk]
        if not isinstance(directions, type(None)):
            if len(directions) > 0:
                distance = directions[0]['legs'][0]['distance']['text']
                duration = directions[0]['legs'][0]['duration']['text']
                end_location = directions[0]['name']
                end_country = camps[camps['crossing_name']==end_location].country.values[0]
                tooltip = f"Travel between <b>{kk}</b> and <b>{end_location}, {end_country}</b> by transit is <b>"\
                          f"{distance}</b> and takes <b>{duration}</b>.</br></br>"\
                          f"<b>{population}</b> people are effected by this conflict."
                polyline_ = polyline.decode(directions[0]['overview_polyline']['points'])
                polyline_m = folium.PolyLine(polyline_, color='#7570b3', tooltip=tooltip, weight=stroke)
                polyline_m.add_to(fg_t)        
        

    fg_t.add_to(map)
# folium.Choropleth(
#     manual_routes,
#     line_weight=3,
#     line_color='#f03b20',
#     name='Manually generated routes'
# ).add_to(map)
        
# Add custom basemaps
basemaps['Google Satellite Hybrid'].add_to(map)
# basemaps['Esri Satellite'].add_to(map)
# basemaps['Google Satellite'].add_to(map)
basemaps['Google Maps'].add_to(map)

# Add a layer control panel to the map.
map.add_child(folium.LayerControl())

# Add fullscreen button
plugins.Fullscreen().add_to(map)

<folium.plugins.fullscreen.Fullscreen at 0x7fda4d659a30>

In [834]:
def add_legend(map):
    legend_html = """
    <style>
    @import url('https://fonts.googleapis.com/css2?family=Roboto:wght@100;300;400&display=swap');
    </style>
    
     <div style="
     padding-left:5px; padding-top:5px;
     position: fixed; 
     bottom: 50px; left: 50px; width: 160px; height: 120px;   
     border:2px solid grey; z-index:9999; border-radius: 15px;
     
     background-color:white;
     opacity: .85;
     
     font-size:14px;
     font-weight: bold;
     font-family: 'Roboto', sans-serif;
     ">

     <div class="awesome-marker-icon-darkred awesome-marker" style="margin-top: 10px; margin-left:5px;">
         <i class="fa-rotate-0 glyphicon glyphicon-glyphicon glyphicon-fire icon-white"></i>
     </div>
     <div style="margin-left:40px; margin-top:20px">Conflict Area</div>
     
     <div class="awesome-marker-icon-orange awesome-marker" style="margin-top: 60px; margin-left:5px;">
         <i class="fa-rotate-0 glyphicon glyphicon-glyphicon glyphicon-flag icon-white"></i>
     </div>
     <div style="margin-left:40px; margin-top:25px">Destination City</div>     
     
     
      </div> """.format( title = "Legend html")
    map.get_root().html.add_child(folium.Element( legend_html ))
    return map

In [835]:
map = add_legend(map)

In [836]:
display(map)

In [837]:
# save map
map.save(f'maps/{conflict_country}_{flight_mode}_Map.html')

In [838]:
conflicts['#name'].unique()

array(['Tegucigalpa', 'San Pedro Sula', 'Choloma', 'La Ceiba',
       'El Progreso', 'Ciudad Choluteca', 'Comayagua', 'Puerto Cortez',
       'La Lima', 'Danlí', 'Siguatepeque', 'Juticalpa', 'Villanueva',
       'Tocoa', 'Tela', 'Santa Rosa de Copán', 'Olanchito', 'San Lorenzo',
       'Cofradía', 'El Paraíso'], dtype=object)

## 7. Recipient Country Refugee Counts


In [839]:
locations.head(50)

Unnamed: 0,#name,latitude,longitude,country code,population,country,location_type
0,San Salvador,13.68935,-89.18718,SV,525990,El Salvador,camp
1,Soyapango,13.71024,-89.13989,SV,329708,El Salvador,camp
2,Santa Ana,13.99417,-89.55972,SV,176661,El Salvador,camp
3,San Miguel,13.48333,-88.18333,SV,161880,El Salvador,camp
4,Mejicanos,13.72397,-89.188,SV,160317,El Salvador,camp
5,Santa Tecla,13.67694,-89.27972,SV,124694,El Salvador,camp
6,Apopa,13.80722,-89.17917,SV,112158,El Salvador,camp
7,Delgado,13.72171,-89.16874,SV,71594,El Salvador,camp
8,Sonsonate,13.71889,-89.72417,SV,59468,El Salvador,camp
9,San Marcos,13.66108,-89.18252,SV,54615,El Salvador,camp


In [840]:
conflicts = locations[locations['location_type']=='conflict_zone']
#camps = locations[locations['location_type']=='camp']

In [841]:
conflicts

Unnamed: 0,#name,latitude,longitude,country code,population,country,location_type
30,Tegucigalpa,14.0818,-87.20681,HN,850848,Honduras,conflict_zone
31,San Pedro Sula,15.50417,-88.025,HN,489466,Honduras,conflict_zone
32,Choloma,15.61444,-87.95302,HN,139100,Honduras,conflict_zone
33,La Ceiba,15.75971,-86.78221,HN,130218,Honduras,conflict_zone
34,El Progreso,15.4,-87.8,HN,100810,Honduras,conflict_zone
35,Ciudad Choluteca,13.30028,-87.19083,HN,75872,Honduras,conflict_zone
36,Comayagua,14.45139,-87.6375,HN,58784,Honduras,conflict_zone
37,Puerto Cortez,15.82562,-87.92968,HN,48013,Honduras,conflict_zone
38,La Lima,15.43333,-87.91667,HN,45955,Honduras,conflict_zone
39,Danlí,14.03333,-86.58333,HN,44799,Honduras,conflict_zone


In [842]:
conflict_exit_routes['Tegucigalpa']['crossing']

{'country': 'El Salvador',
 'crossing_name': 'El Amatilo',
 'longitude': -87.773412,
 'latitude': 13.59183}

Use routes gerated above called conflict_exit_routes

In [843]:
def get_exit_route(row, mode):
    try:
        print(row)
        dest = conflict_exit_routes[row['#name']]['crossing']['crossing_name']
        dest = camps[camps['crossing_name']==dest].country.values[0]
    except:
        dest = None
    row[f'{mode}_destination'] = dest
    return row

In [844]:
def get_exit_city(row, mode):
    try:
        dest = conflict_exit_routes[row['#name']]['crossing']['crossing_name']
    except:
        dest = None
    row[f'{mode}_destination_city'] = dest
    return row

In [845]:
conflicts = conflicts.apply(lambda row: get_exit_route(row, flight_mode), axis=1)

conflicts = conflicts.apply(lambda row: get_exit_city(row, flight_mode), axis=1)


#name              Tegucigalpa
latitude               14.0818
longitude            -87.20681
country code                HN
population              850848
country               Honduras
location_type    conflict_zone
Name: 30, dtype: object
#name            San Pedro Sula
latitude               15.50417
longitude               -88.025
country code                 HN
population               489466
country                Honduras
location_type     conflict_zone
Name: 31, dtype: object
#name                  Choloma
latitude              15.61444
longitude            -87.95302
country code                HN
population              139100
country               Honduras
location_type    conflict_zone
Name: 32, dtype: object
#name                 La Ceiba
latitude              15.75971
longitude            -86.78221
country code                HN
population              130218
country               Honduras
location_type    conflict_zone
Name: 33, dtype: object
#name              El Progres

In [846]:
conflicts.groupby([f'{flight_mode}_destination']).country.count()

driving_destination
El Salvador     6
Guatemala      12
Nicaragua       2
Name: country, dtype: int64

In [847]:
border_countries=  border_countries_results.copy()

First sum the total population across twenty conflicts. Then get the percentage of total people in conflict zones at each conflict.For example if the total population of the 20 conflict zones is 1 million people and conflict zone A has a population of 200,000 people. Then it has 20% of people in conflict. If the total country population is 5 million, and 10% are expected to flee due to the conflict then we "route" 10% * 20% * 5,000,000 = 100,000 people through conflict zone A.  

In [848]:
#total population of conflict country. Times the percent we want to leave due to conflict
conflict_country_historic_pop=int(conflict_country_historic_pop)
conflicts["pop_percent_of_conflict_cities"]=conflicts["population"]/conflicts["population"].sum()
conflicts[f"refugee_estimated_leaving_via_{flight_mode}"]=conflicts["pop_percent_of_conflict_cities"]*(conflict_country_historic_pop*percent_of_pop_leaving)

In [849]:
conflicts

Unnamed: 0,#name,latitude,longitude,country code,population,country,location_type,driving_destination,driving_destination_city,pop_percent_of_conflict_cities,refugee_estimated_leaving_via_driving
30,Tegucigalpa,14.0818,-87.20681,HN,850848,Honduras,conflict_zone,El Salvador,El Amatilo,0.375234,371654.087891
31,San Pedro Sula,15.50417,-88.025,HN,489466,Honduras,conflict_zone,Guatemala,Corinto,0.21586,213800.866646
32,Choloma,15.61444,-87.95302,HN,139100,Honduras,conflict_zone,Guatemala,Corinto,0.061345,60759.481865
33,La Ceiba,15.75971,-86.78221,HN,130218,Honduras,conflict_zone,Guatemala,Corinto,0.057428,56879.785834
34,El Progreso,15.4,-87.8,HN,100810,Honduras,conflict_zone,Guatemala,Corinto,0.044458,44034.24419
35,Ciudad Choluteca,13.30028,-87.19083,HN,75872,Honduras,conflict_zone,El Salvador,El Amatilo,0.03346,33141.217887
36,Comayagua,14.45139,-87.6375,HN,58784,Honduras,conflict_zone,El Salvador,El Amatilo,0.025924,25677.10555
37,Puerto Cortez,15.82562,-87.92968,HN,48013,Honduras,conflict_zone,Guatemala,Corinto,0.021174,20972.286145
38,La Lima,15.43333,-87.91667,HN,45955,Honduras,conflict_zone,Guatemala,Corinto,0.020267,20073.34284
39,Danlí,14.03333,-86.58333,HN,44799,Honduras,conflict_zone,Nicaragua,Frontera Las Manos,0.019757,19568.397039


In [850]:
conflicts[f'refugee_estimated_leaving_via_{flight_mode}'].sum()

990460.8000000002

In [851]:
COL=["#name","country",f"{flight_mode}_destination",f'{flight_mode}_destination_city',f"refugee_estimated_leaving_via_{flight_mode}"]

In [852]:
reduced_conflicts=conflicts[COL]

In [853]:
reduced_conflicts=reduced_conflicts.rename(columns={"#name":"origin city", "country":"origin country",f"{flight_mode}_destination":"destination country",f"{flight_mode}_destination_city":"destination city",f"refugee_estimated_leaving_via_{flight_mode}":"total refugees"})

In [854]:
reduced_conflicts.to_csv(f'outputs/{conflict_country}_{flight_mode}_total_refugees.csv',index=False)

In [855]:
reduced_conflicts

Unnamed: 0,origin city,origin country,destination country,destination city,total refugees
30,Tegucigalpa,Honduras,El Salvador,El Amatilo,371654.087891
31,San Pedro Sula,Honduras,Guatemala,Corinto,213800.866646
32,Choloma,Honduras,Guatemala,Corinto,60759.481865
33,La Ceiba,Honduras,Guatemala,Corinto,56879.785834
34,El Progreso,Honduras,Guatemala,Corinto,44034.24419
35,Ciudad Choluteca,Honduras,El Salvador,El Amatilo,33141.217887
36,Comayagua,Honduras,El Salvador,El Amatilo,25677.10555
37,Puerto Cortez,Honduras,Guatemala,Corinto,20972.286145
38,La Lima,Honduras,Guatemala,Corinto,20073.34284
39,Danlí,Honduras,Nicaragua,Frontera Las Manos,19568.397039


Summarize for country level numbers

In [856]:
country_level_refugee=pd.DataFrame(data=reduced_conflicts.groupby(['destination country'])["total refugees"].sum())

In [857]:
country_level_refugee['total refugees']=country_level_refugee["total refugees"].round()

In [858]:
country_level_refugee

Unnamed: 0_level_0,total refugees
destination country,Unnamed: 1_level_1
El Salvador,473767.0
Guatemala,488923.0
Nicaragua,27771.0
