# Generate Estimated Refugee Numbers and Routes 

## 1. Data Collection

In [792]:
import json
import pandas as pd
from fuzzywuzzy import process
from sklearn.preprocessing import MinMaxScaler
from statsmodels.iolib.smpickle import load_pickle
import numpy as np
from fuzzywuzzy import fuzz


### Parameters
Set these values as parameters for the model.
Set conflict city and number of cities for conflict and camp countries


In [876]:
conflict_country= "Mali"
percent_of_pop_leaving=.10
flight_mode="driving"
conflict_start=2021
excluded_countries=[]
added_countries=[]
number_conflict_cities=20
number_camp_cities=5

In [794]:
conflict_start=conflict_start-1

### Helper functions

In [795]:
# Helper Encoder for json
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NpEncoder, self).default(obj)

## 2. Collect relevant data 

read in country border data

In [796]:
country_border = open('../refugee_data/country_border_data.json')
countries_that_border = json.load(country_border)

get a list of touching countries

In [797]:
touching_list=[]
touching_list=countries_that_border[conflict_country]
touching_list

['Algeria',
 'Burkina Faso',
 'Cote d’Ivoire',
 'Guinea',
 'Mauritania',
 'Niger',
 'Senegal']

remove any countries that are to be excluded.


In [798]:
indexed_list={}
for i,c in enumerate(touching_list):
    indexed_list[i]=c
    
for i,ex in enumerate(excluded_countries):
    country,value, ind=process.extractOne(ex, indexed_list)
    if value>89:
        touching_list.pop(ind)
    print(country, value, ind)
    print(touching_list)

add any countries we need to 

In [799]:
for country_v in added_countries:
    touching_list.append(country_v)
touching_list

['Algeria',
 'Burkina Faso',
 'Cote d’Ivoire',
 'Guinea',
 'Mauritania',
 'Niger',
 'Senegal']

create a dataframe to build upon.


In [800]:
touching_df = pd.DataFrame (touching_list, columns = ['bording_countries'])
touching_df['conflict']=conflict_country

read in historic_pop

In [801]:
historic_pop=pd.read_csv('../refugee_data/historic_pop.csv')    

Get historic populations from year before conflict


In [802]:
options=historic_pop["Country Name"]
touching_df['historic_pop']=None
historic_pop_cols=historic_pop.columns

indexed_col={}
for i,c in enumerate(historic_pop_cols):
    indexed_col[i]=c

column,ratio_year, year_column_idx = process.extractOne(str(conflict_start), indexed_col)


for kk ,border in touching_df.iterrows():
    country,ratio,ind =process.extractOne(border["bording_countries"], options)
    touching_df.loc[kk,"historic_pop"]=historic_pop.at[ind,column]



Get historic population of conflict country

In [803]:
country,ratio,ind =process.extractOne(conflict_country, options)
conflict_country_historic_pop=int(historic_pop.at[ind,column])
conflict_country_historic_pop

20250834

read in historic gdp per cap data. Get data from historic gdp per cap given the year.


In [804]:
gdp_per_cap_historic=pd.read_csv("../refugee_data/gdp_per_cap.csv")

In [805]:
options=gdp_per_cap_historic["Country Name"]
touching_df['historic_gdp_per_cap']=None

historic_gdp_cols=gdp_per_cap_historic.columns
indexed_gdp_col={}
for i,c in enumerate(historic_gdp_cols):
    indexed_gdp_col[i]=c

    
column,ratio_year, year_column_idx = process.extractOne(str(conflict_start), indexed_gdp_col)

for kk ,border in touching_df.iterrows():
    country,ratio,ind =process.extractOne(border["bording_countries"], options)
    touching_df.loc[kk,"historic_gdp_per_cap"]=gdp_per_cap_historic.at[ind,column]

calculate historic gdp for later use if needed


In [806]:
touching_df['calculated_historic_gdp']=None
for kk ,row in touching_df.iterrows():
    try:
        touching_df.loc[kk,"calculated_historic_gdp"]=int(row['historic_pop'])*float(row['historic_gdp_per_cap'])
    except Exception as e:
        print(e, f'GDP per cap for {row["bording_countries"]} data was {row["historic_gdp_per_cap"]}' )

read in V-Dem data and subset the dataset to only enclude columns we care about

In [807]:
Dem=pd.read_csv("../refugee_data/country_dem.csv")
columnList=["country_name","year","v2xeg_eqdr","v2x_libdem"]
country_dem=Dem[columnList]

collect liberal democracy and access to justic for women data for each country given conflict year


In [808]:
touching_df["v2x_libdem"]=None
touching_df["v2xeg_eqdr"]=None

options=country_dem['country_name'].unique()

for kk ,row in touching_df.iterrows():
    country,ratio =process.extractOne(row["bording_countries"], options)
    lib=country_dem.loc[(country_dem["country_name"]==country) & (country_dem["year"]==int(conflict_start))]['v2x_libdem']
    eqdr=country_dem.loc[(country_dem["country_name"]==country) & (country_dem["year"]==int(conflict_start))]['v2xeg_eqdr']
    touching_df.loc[kk,"v2xeg_eqdr"]=eqdr.to_list()[0]
    touching_df.loc[kk,"v2x_libdem"]=lib.to_list()[0]

read in historic gdp from world bank not calculated manually.

In [809]:
historic_GDP=pd.read_csv('../refugee_data/GDP_historic.csv')

Get data from historic gdp per cap given the year.


In [810]:
options=historic_GDP["Country Name"]
touching_df['historic_GDP']=None
historic_GDP_cols=historic_GDP.columns
indexed_GDP_col={}
for i,c in enumerate(historic_GDP_cols):
    indexed_GDP_col[i]=c

    
column,ratio_year, year_column_idx = process.extractOne(str(conflict_start), indexed_GDP_col)

for kk ,border in touching_df.iterrows():
    country,ratio,ind =process.extractOne(border["bording_countries"], options)
    touching_df.loc[kk,"historic_GDP"]=historic_GDP.at[ind,column]

In [811]:
touching_df

Unnamed: 0,bording_countries,conflict,historic_pop,historic_gdp_per_cap,calculated_historic_gdp,v2x_libdem,v2xeg_eqdr,historic_GDP
0,Algeria,Mali,43851043,3306.85820838104,145009181490.61996,0.151,0.72,145009181490.62
1,Burkina Faso,Mali,20903278,857.932729650223,17933606353.177456,0.504,0.341,17933606353.1775
2,Cote d’Ivoire,Mali,26378275,2325.72370502247,61348579465.10159,0.314,0.317,61348579465.1017
3,Guinea,Mali,13132792,1194.03786469445,15681050917.156357,0.125,0.164,15681050917.1563
4,Mauritania,Mali,4649660,1701.99116304908,7913680231.182785,0.179,0.29,7913680231.1828
5,Niger,Mali,24206636,567.669892261611,13741378450.136036,0.401,0.299,13741378450.136
6,Senegal,Mali,16743930,1471.83096170711,24644234594.65653,0.572,0.564,24644234594.6566


Normalize the columns we will run the model on that are not already an index. For our current model that is only historic_GDP.

In [812]:
cols_to_scale = ['historic_GDP']
touching_df=touching_df.rename(columns={'bording_countries':'country'})

In [813]:
scaler = MinMaxScaler()
for col in cols_to_scale:
    print(col)
    normed = pd.DataFrame()
    
    for y, x in touching_df.groupby('conflict'):
        print(y,x)
        norm_ = [i[0] for i in scaler.fit_transform(x[col].values.reshape(-1,1))]
        countries = x['country']
        conflict_ = x['conflict']
        res = pd.DataFrame(tuple(zip(countries,conflict_,norm_)), columns=['country','conflict',f"{col}_norm"])
        normed = normed.append(res)
    normalized_data = pd.merge(touching_df, normed, left_on=['country','conflict'], right_on=['country','conflict'], how='right')

historic_GDP
Mali          country conflict historic_pop historic_gdp_per_cap  \
0        Algeria     Mali     43851043     3306.85820838104   
1   Burkina Faso     Mali     20903278     857.932729650223   
2  Cote d’Ivoire     Mali     26378275     2325.72370502247   
3         Guinea     Mali     13132792     1194.03786469445   
4     Mauritania     Mali      4649660     1701.99116304908   
5          Niger     Mali     24206636     567.669892261611   
6        Senegal     Mali     16743930     1471.83096170711   

  calculated_historic_gdp v2x_libdem v2xeg_eqdr         historic_GDP  
0     145009181490.619965      0.151       0.72  145009181490.619995  
1      17933606353.177456      0.504      0.341   17933606353.177502  
2      61348579465.101593      0.314      0.317     61348579465.1017  
3      15681050917.156357      0.125      0.164     15681050917.1563  
4       7913680231.182785      0.179       0.29      7913680231.1828  
5      13741378450.136036      0.401      0.299    

  normed = normed.append(res)


In [814]:
normalized_data

Unnamed: 0,country,conflict,historic_pop,historic_gdp_per_cap,calculated_historic_gdp,v2x_libdem,v2xeg_eqdr,historic_GDP,historic_GDP_norm
0,Algeria,Mali,43851043,3306.85820838104,145009181490.61996,0.151,0.72,145009181490.62,1.0
1,Burkina Faso,Mali,20903278,857.932729650223,17933606353.177456,0.504,0.341,17933606353.1775,0.073087
2,Cote d’Ivoire,Mali,26378275,2325.72370502247,61348579465.10159,0.314,0.317,61348579465.1017,0.389764
3,Guinea,Mali,13132792,1194.03786469445,15681050917.156357,0.125,0.164,15681050917.1563,0.056657
4,Mauritania,Mali,4649660,1701.99116304908,7913680231.182785,0.179,0.29,7913680231.1828,0.0
5,Niger,Mali,24206636,567.669892261611,13741378450.136036,0.401,0.299,13741378450.136,0.042508
6,Senegal,Mali,16743930,1471.83096170711,24644234594.65653,0.572,0.564,24644234594.6566,0.122036


## 3. Modeling
Load the prebuild model 

In [815]:
new_results = load_pickle("../refugee_model_results.pickle")

set independant variables 


In [816]:
features_cols = [
                    'historic_GDP_norm', 
                    'v2x_libdem',
]
features_normalized = normalized_data[features_cols]
normalized_data[['country',"conflict"]+features_cols]

Unnamed: 0,country,conflict,historic_GDP_norm,v2x_libdem
0,Algeria,Mali,1.0,0.151
1,Burkina Faso,Mali,0.073087,0.504
2,Cote d’Ivoire,Mali,0.389764,0.314
3,Guinea,Mali,0.056657,0.125
4,Mauritania,Mali,0.0,0.179
5,Niger,Mali,0.042508,0.401
6,Senegal,Mali,0.122036,0.572


Currently if a country does not have data for a column we are dropping that country. If you need to not drop a country you need to fill in the missing data manually or some other method.

In [817]:
normalized_data=normalized_data.dropna()

Select only features that are needed and use the model to predict 


In [818]:
features_to_predict=normalized_data[features_cols]
shares = new_results.predict(features_to_predict)
normalized_data['predicted_shares'] = shares

save results to a csv file for next step.

In [819]:
border_countries_results = normalized_data[['country',"conflict","historic_pop","historic_GDP_norm","v2x_libdem",'predicted_shares']]
border_countries_results.to_csv(f'outputs/{conflict_country}_{flight_mode}_output_results.csv',index=False)

In [820]:
border_countries_results

Unnamed: 0,country,conflict,historic_pop,historic_GDP_norm,v2x_libdem,predicted_shares
0,Algeria,Mali,43851043,1.0,0.151,0.295177
1,Burkina Faso,Mali,20903278,0.073087,0.504,0.220055
2,Cote d’Ivoire,Mali,26378275,0.389764,0.314,0.217778
3,Guinea,Mali,13132792,0.056657,0.125,0.063608
4,Mauritania,Mali,4649660,0.0,0.179,0.07207
5,Niger,Mali,24206636,0.042508,0.401,0.171417
6,Senegal,Mali,16743930,0.122036,0.572,0.258906


## 4. Find Largest Cities in Conflict and Destination Countries

In [821]:
from functools import partial
import geopandas as gpd
import pandas as pd
import math
import numpy as np
import shapely
from shapely.geometry import Point
import googlemaps
import pgeocode
import pyproj
from fuzzywuzzy import process


Read in cities data with all cities with population over 15000 people. Then subset columns 

In [822]:
CITY_FILE = "../refugee_data/cities15000.txt"
city_df = pd.read_csv(
    CITY_FILE, 
    sep="\t", 
    header=0,
    names=[
         "geonameid", 
 "name", 
 "asciiname", 
 "alternatenames", 
 "latitude", 
 "longitude", 
 "feature class", 
 "feature code", 
 "country code", 
 "cc2", 
 "admin1 code", 
 "admin2 code", 
 "admin3 code", 
 "admin4 code", 
 "population", 
 "elevation", 
 "dem", 
 "timezone", 
 "modification date", 

    ]
)
#subset_cols=["name","latitude","longitude", "country code", "population"]
#city_df=city_df[subset_cols]

In [823]:
#city_df[city_df["country code"]=="RO"].sort_values("population",ascending=False).head(10)

In [824]:
subset_cols=["name","latitude","longitude", "country code", "population"]
city_df=city_df[subset_cols]

Read in the results from the collect_to_normalized notebook that we saved in the output folder

In [825]:
border_countries_results

Unnamed: 0,country,conflict,historic_pop,historic_GDP_norm,v2x_libdem,predicted_shares
0,Algeria,Mali,43851043,1.0,0.151,0.295177
1,Burkina Faso,Mali,20903278,0.073087,0.504,0.220055
2,Cote d’Ivoire,Mali,26378275,0.389764,0.314,0.217778
3,Guinea,Mali,13132792,0.056657,0.125,0.063608
4,Mauritania,Mali,4649660,0.0,0.179,0.07207
5,Niger,Mali,24206636,0.042508,0.401,0.171417
6,Senegal,Mali,16743930,0.122036,0.572,0.258906


Read in county codes data so we merge the two data sources 

In [826]:
codes=  pd.read_csv("../refugee_data/wikipedia-iso-country-codes.csv")

In [827]:
codes.head()

Unnamed: 0,English short name lower case,Alpha-2 code,Alpha-3 code,Numeric code,ISO 3166-2
0,Afghanistan,AF,AFG,4,ISO 3166-2:AF
1,Åland Islands,AX,ALA,248,ISO 3166-2:AX
2,Albania,AL,ALB,8,ISO 3166-2:AL
3,Algeria,DZ,DZA,12,ISO 3166-2:DZ
4,American Samoa,AS,ASM,16,ISO 3166-2:AS


Add a column for country_code to our border_countries dataframe

In [828]:
options=codes["English short name lower case"]

for kk ,border in border_countries_results.iterrows():
    country,ratio,ind =process.extractOne(border["country"], options)
    border_countries_results.loc[kk,"country_code"]=codes.at[ind,"Alpha-2 code"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  border_countries_results.loc[kk,"country_code"]=codes.at[ind,"Alpha-2 code"]


We need the code for the conflict country as well.

In [829]:
country,ratio,ind =process.extractOne(border_countries_results["conflict"][0], options)
conflict_code=codes.at[ind,"Alpha-2 code"]
conflict_code

'ML'

Collect largest n cities in conflict country. This can be changed by the parameter number_conflict_cities. We also will add two column for country and location_type for future use. 

In [830]:
filtered_df=city_df[city_df["country code"]==conflict_code]
filtered_df=filtered_df.sort_values(by="population", ascending=False)
largest_conflict_cities=filtered_df[0:]
largest_conflict_cities['country']=conflict_country
largest_conflict_cities['location_type']="conflict_zone"


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  largest_conflict_cities['country']=conflict_country
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  largest_conflict_cities['location_type']="conflict_zone"


In [831]:
largest_conflict_cities

Unnamed: 0,name,latitude,longitude,country code,population,country,location_type
15421,Bamako,12.65,-8.0,ML,1297281,Mali,conflict_zone
15405,Ségou,13.44032,-6.25947,ML,153349,Mali,conflict_zone
15404,Sikasso,11.31755,-5.66654,ML,144786,Mali,conflict_zone
15408,Mopti,14.4843,-4.18296,ML,108456,Mali,conflict_zone
15410,Koutiala,12.39173,-5.46421,ML,99353,Mali,conflict_zone
15417,Gao,16.27167,-0.04472,ML,87000,Mali,conflict_zone
15414,Kayes,14.44693,-11.44448,ML,78406,Mali,conflict_zone
15409,Markala,13.68427,-6.07352,ML,53738,Mali,conflict_zone
15412,Kolokani,13.5728,-8.0339,ML,48774,Mali,conflict_zone
15415,Kati,12.74409,-8.07257,ML,42922,Mali,conflict_zone


Do the same for largest camp cities for each bordering country


In [832]:
largest_camp_cities = pd.DataFrame(columns = city_df.columns)
print(largest_camp_cities)
for kk ,border in border_countries_results.iterrows():
    filtered_df=city_df[city_df["country code"]==border['country_code']]
    filtered_df['country']=border['country']
    filtered_df=filtered_df.sort_values(by="population", ascending=False)
    largest_camp_cities_f=filtered_df[0:number_camp_cities]
    largest_camp_cities=largest_camp_cities.append(largest_camp_cities_f)
largest_camp_cities["location_type"]='camp'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['country']=border['country']
  largest_camp_cities=largest_camp_cities.append(largest_camp_cities_f)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['country']=border['country']
  largest_camp_cities=largest_camp_cities.append(largest_camp_cities_f)


Empty DataFrame
Columns: [name, latitude, longitude, country code, population]
Index: []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['country']=border['country']
  largest_camp_cities=largest_camp_cities.append(largest_camp_cities_f)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['country']=border['country']
  largest_camp_cities=largest_camp_cities.append(largest_camp_cities_f)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-

Merge and update column names

In [833]:
for kk ,border in largest_conflict_cities.iterrows():
    largest_camp_cities=largest_camp_cities.append(border)
locations=largest_camp_cities.rename(columns={"name":"#name"})

  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_c

In [834]:
locations.head(50)

Unnamed: 0,#name,latitude,longitude,country code,population,country,location_type
6866,Algiers,36.73225,3.08746,DZ,1977663,Algeria,camp
6647,Boumerdas,36.76639,3.47717,DZ,786499,Algeria,camp
6720,Oran,35.69906,-0.63588,DZ,645984,Algeria,camp
6669,Tébessa,35.40417,8.12417,DZ,634332,Algeria,camp
6802,Constantine,36.365,6.61472,DZ,450097,Algeria,camp
1109,Ouagadougou,12.36566,-1.53388,BF,1086505,Burkina Faso,camp
1129,Bobo-Dioulasso,11.17715,-4.2979,BF,360106,Burkina Faso,camp
1114,Koudougou,12.25263,-2.36272,BF,87347,Burkina Faso,camp
1108,Ouahigouya,13.58278,-2.42158,BF,61096,Burkina Faso,camp
1130,Banfora,10.63333,-4.76667,BF,60288,Burkina Faso,camp


save to inputs folder with unique file name

In [835]:
locations.to_csv(f'inputs/{conflict_country}_{flight_mode}_locations.csv',index=False)

## 5.  Route Generation

In [836]:
import json
import time
import requests

# google libraries
import googlemaps
import polyline

# mapping and shape utils
import folium
from folium import plugins

# data processing
import pandas as pd
import geopandas as gpd

import math

First, you need to enable the Google Directions API.

In [837]:
gmaps = googlemaps.Client(key='AIzaSyAPHPIx3pHfUVoCYZRVaZYy2l5WRowVitA')

Add custom basemaps to folium

In [838]:
basemaps = {
    'Google Maps': folium.TileLayer(
        tiles = 'https://mt1.google.com/vt/lyrs=m&x={x}&y={y}&z={z}',
        attr = 'Google',
        name = 'Google Maps',
        overlay = True,
        control = True
    ),
    'Google Satellite': folium.TileLayer(
        tiles = 'https://mt1.google.com/vt/lyrs=s&x={x}&y={y}&z={z}',
        attr = 'Google',
        name = 'Google Satellite',
        overlay = True,
        control = True
    ),
    'Google Terrain': folium.TileLayer(
        tiles = 'https://mt1.google.com/vt/lyrs=p&x={x}&y={y}&z={z}',
        attr = 'Google',
        name = 'Google Terrain',
        overlay = True,
        control = True
    ),
    'Google Satellite Hybrid': folium.TileLayer(
        tiles = 'https://mt1.google.com/vt/lyrs=y&x={x}&y={y}&z={z}',
        attr = 'Google',
        name = 'Google Satellite',
        overlay = True,
        control = True
    ),
    'Esri Satellite': folium.TileLayer(
        tiles = 'https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}',
        attr = 'Esri',
        name = 'Esri Satellite',
        overlay = True,
        control = True
    )
}

 use locations generated above

In [839]:
locations.head()

Unnamed: 0,#name,latitude,longitude,country code,population,country,location_type
6866,Algiers,36.73225,3.08746,DZ,1977663,Algeria,camp
6647,Boumerdas,36.76639,3.47717,DZ,786499,Algeria,camp
6720,Oran,35.69906,-0.63588,DZ,645984,Algeria,camp
6669,Tébessa,35.40417,8.12417,DZ,634332,Algeria,camp
6802,Constantine,36.365,6.61472,DZ,450097,Algeria,camp


In [840]:
conflicts = locations[locations["location_type"]=="conflict_zone"]

In [841]:
camps = locations[locations["location_type"]=="camp"]

In [842]:
attractions = border_countries_results.copy()

In [843]:
attractions

Unnamed: 0,country,conflict,historic_pop,historic_GDP_norm,v2x_libdem,predicted_shares,country_code
0,Algeria,Mali,43851043,1.0,0.151,0.295177,DZ
1,Burkina Faso,Mali,20903278,0.073087,0.504,0.220055,BF
2,Cote d’Ivoire,Mali,26378275,0.389764,0.314,0.217778,CI
3,Guinea,Mali,13132792,0.056657,0.125,0.063608,GN
4,Mauritania,Mali,4649660,0.0,0.179,0.07207,MR
5,Niger,Mali,24206636,0.042508,0.401,0.171417,NE
6,Senegal,Mali,16743930,0.122036,0.572,0.258906,SN


In [844]:
camps

Unnamed: 0,#name,latitude,longitude,country code,population,country,location_type
6866,Algiers,36.73225,3.08746,DZ,1977663,Algeria,camp
6647,Boumerdas,36.76639,3.47717,DZ,786499,Algeria,camp
6720,Oran,35.69906,-0.63588,DZ,645984,Algeria,camp
6669,Tébessa,35.40417,8.12417,DZ,634332,Algeria,camp
6802,Constantine,36.365,6.61472,DZ,450097,Algeria,camp
1109,Ouagadougou,12.36566,-1.53388,BF,1086505,Burkina Faso,camp
1129,Bobo-Dioulasso,11.17715,-4.2979,BF,360106,Burkina Faso,camp
1114,Koudougou,12.25263,-2.36272,BF,87347,Burkina Faso,camp
1108,Ouahigouya,13.58278,-2.42158,BF,61096,Burkina Faso,camp
1130,Banfora,10.63333,-4.76667,BF,60288,Burkina Faso,camp


In [846]:
crossing_locations=[]
for kk,conflict in conflicts.iterrows():
    for country in touching_list:
        try:
            result=gmaps.directions(f'{conflict["#name"]}, {conflict["country"]}', country,mode="driving")
            if result:
                for idx, i in enumerate(result[0]['legs'][0]['steps']):
                    instr = i['html_instructions']
                    if 'Entering' in instr: 
                        country_split=instr.split('Entering')[1].split("<")[0]
                        ratio=fuzz.ratio(country_split,camp['country'])
                        if ratio>80:
                            crossing_data={"latitude":i['end_location']['lat'],"longitude":i['end_location']['lng'],"country":f"{country}" }
                            if crossing_data not in crossing_locations:
                                crossing_locations.append(crossing_data)
        except Exception as e:
            print(e)
            
    for kk, camp in camps.iterrows():
        try:
            result=gmaps.directions(f'{conflict["#name"]}, {conflict["country"]}',f'{camp["#name"]}, {camp["country"]}' ,mode="driving")
            if result:
                for idx, i in enumerate(result[0]['legs'][0]['steps']):
                    instr = i['html_instructions']
                    if 'Entering' in instr: 
                        country_split=instr.split('Entering')[1].split("<")[0]
                        ratio=fuzz.ratio(country_split,camp['country'])
                        if ratio>80:
                            crossing_data={"latitude":i['end_location']['lat'],"longitude":i['end_location']['lng'],"country":f"{camp['country']}" }
                            if crossing_data not in crossing_locations:
                                crossing_locations.append(crossing_data)
        except Exception as e:
            print(e)

NOT_FOUND
NOT_FOUND
NOT_FOUND
NOT_FOUND


In [847]:
crossing_locations_df = pd.DataFrame (crossing_locations, columns = ['latitude', "longitude","country"])
crossing_locations_df

Unnamed: 0,latitude,longitude,country
0,21.185692,1.045257,Algeria
1,13.111674,-4.347421,Burkina Faso
2,11.119193,-5.360715,Burkina Faso
3,10.56281,-6.427474,Cote d’Ivoire
4,11.951002,-8.785012,Guinea
5,15.69255,-9.321718,Mauritania
6,15.500003,-8.591652,Mauritania
7,13.992996,0.387244,Niger
8,14.456328,-12.204163,Senegal
9,12.063125,-4.669205,Burkina Faso


In [848]:
def get_closest(loc_lat, loc_lon, targets, mode):
    chunk_size = 25
    list_targets = [targets[i:i+chunk_size] for i in range(0,targets.shape[0],chunk_size)]
    output = None
    closest_seconds = 100000000000
    closest_loc = None
    for i in list_targets:
        results = gmaps.distance_matrix(origins=[(loc_lat, loc_lon)],
                                        destinations=list(tuple(zip(i.latitude, i.longitude))), mode=mode)
        
        for idx, val in enumerate(results["rows"][0]["elements"]):
            if val["status"] == "ZERO_RESULTS":
                continue
            
            attraction = attractions[attractions["country"] == i.iloc[idx]["country"]].predicted_shares.iloc[0]
            seconds = val["duration"]["value"]*(1/math.sqrt(attraction))
            if seconds <= closest_seconds:
                closest_seconds = seconds
                closest_loc = i.iloc[idx]
                output = val
    return closest_loc, output

Get conflict exit routes from each conflict city to the closest haven city. 

In [849]:
conflict_exit_routes = {}
NoneType = type(None)
for kk, conflict in conflicts.iterrows():
    closest_crossing, crossing_val = get_closest(conflict.latitude, conflict.longitude, crossing_locations_df, flight_mode)
    
    if isinstance(closest_crossing, type(None)):
        print(f'{conflict["#name"]} No routes found')
    conflict_exit_routes[conflict["#name"]] = dict(crossing=closest_crossing, 
                                                   crossing_v=crossing_val)
for kk, vv in conflict_exit_routes.items():
    if not isinstance(vv['crossing'], type(None)):
        vv['crossing'] = dict(vv['crossing'])

with open(f'outputs/{conflict_country}_exit_routes_{flight_mode}.json','w') as f:
    f.write(json.dumps(conflict_exit_routes, cls=NpEncoder))

Sagalo No routes found


 Directions and Plotting. 
 Calculate all directions from each conflict city to each haven city

In [850]:
all_directions = {}
for kk, conflict in conflicts.iterrows():
    conflict_name = conflict['#name']
    print(f"Getting directions for conflict: {conflict_name}")

    if conflict_name in conflict_exit_routes:
        try:
            xing = conflict_exit_routes[conflict_name]['crossing']
            try:
                directions_result = gmaps.directions((conflict.latitude, conflict.longitude),
                                                 (xing['latitude'], xing['longitude']),
                                                 mode=flight_mode)
                directions_result[0]['name']=xing['country']
                directions_result[0]['country']=xing['country']
            except Exception as e:
                print(e)
                directions_result = None
            all_directions[conflict_name] = directions_result
        except Exception as e:
            print(e)


Getting directions for conflict: Bamako
Getting directions for conflict: Ségou
Getting directions for conflict: Sikasso
Getting directions for conflict: Mopti
Getting directions for conflict: Koutiala
Getting directions for conflict: Gao
Getting directions for conflict: Kayes
Getting directions for conflict: Markala
Getting directions for conflict: Kolokani
Getting directions for conflict: Kati
Getting directions for conflict: Bougouni
Getting directions for conflict: Timbuktu
Getting directions for conflict: Banamba
Getting directions for conflict: Bafoulabé
Getting directions for conflict: San
Getting directions for conflict: Koulikoro
Getting directions for conflict: Djénné
Getting directions for conflict: Yorosso
Getting directions for conflict: Kangaba
Getting directions for conflict: Sagalo
'NoneType' object is not subscriptable
Getting directions for conflict: Ké-Macina


In [851]:
with open(f'outputs/{conflict_country}_border_crossing_directions_{flight_mode}.json','w') as f:
    f.write(json.dumps(all_directions))
 

## 6. Mapping

Calculate population percentiles of conflict cities and assign them a stroke value

In [852]:
locations=pd.read_csv(f'inputs/{conflict_country}_{flight_mode}_locations.csv')

In [853]:
conflicts = locations[locations["location_type"]=="conflict_zone"]
camps

Unnamed: 0,#name,latitude,longitude,country code,population,country,location_type
6866,Algiers,36.73225,3.08746,DZ,1977663,Algeria,camp
6647,Boumerdas,36.76639,3.47717,DZ,786499,Algeria,camp
6720,Oran,35.69906,-0.63588,DZ,645984,Algeria,camp
6669,Tébessa,35.40417,8.12417,DZ,634332,Algeria,camp
6802,Constantine,36.365,6.61472,DZ,450097,Algeria,camp
1109,Ouagadougou,12.36566,-1.53388,BF,1086505,Burkina Faso,camp
1129,Bobo-Dioulasso,11.17715,-4.2979,BF,360106,Burkina Faso,camp
1114,Koudougou,12.25263,-2.36272,BF,87347,Burkina Faso,camp
1108,Ouahigouya,13.58278,-2.42158,BF,61096,Burkina Faso,camp
1130,Banfora,10.63333,-4.76667,BF,60288,Burkina Faso,camp


In [854]:
c_desc = conflicts.population.describe()

In [855]:
def bucket_population(population):
    if population <= c_desc['25%']:
        stroke = 2.5
    elif population <= c_desc['50%']:
        stroke = 5
    elif population <= c_desc['75%']:
        stroke = 7.5
    else:
        stroke = 10
    return stroke
        

In [856]:
conflicts['stroke'] = conflicts['population'].apply(lambda x: bucket_population(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  conflicts['stroke'] = conflicts['population'].apply(lambda x: bucket_population(x))


In [857]:
colors=[  'lightblue', 'orange',  'lightred','darkpurple', 'darkgreen', 'darkblue', 'lightgray', 'black','cadetblue', 'pink', 'beige', 'darkred', 'lightgreen', 'green', 'red', 'white', 'blue', 'purple', 'gray']
country_colors={}
for i,c in enumerate(touching_list):
    country_colors[c]=colors[i]
    

In [858]:
# Create Map
map = folium.Map(location=[conflicts.latitude.mean(),conflicts.longitude.mean()], zoom_start=6)

#plot crossings
for i, crossing in enumerate(crossing_locations):
    crossing_m = folium.Marker([crossing["latitude"],crossing["longitude"]], popup=f'{crossing["country"]}_crossing', 
                        icon=folium.Icon(icon='glyphicon glyphicon-road', color=country_colors[crossing['country']]))
    crossing_m.add_to(map)

# Plot conflict starting points
for kk, start in conflicts.iterrows():
    start_m = folium.Marker([start.latitude, start.longitude], popup=start['#name'], 
                            icon=folium.Icon(icon='glyphicon glyphicon-fire', color='darkred'))
    start_m.add_to(map)
    

# plot exit routes (driving)
if "driving" in flight_mode:
    fg_d = folium.FeatureGroup("Driving")
    for kk, vv in all_directions.items():
        stroke = int(conflicts[conflicts['#name']==kk]['stroke'])
        population = "{:,}".format(int(conflicts[conflicts['#name']==kk]['population']))
        directions = all_directions[kk]
        if not isinstance(directions, type(None)):
            distance = directions[0]['legs'][0]['distance']['text']
            duration = directions[0]['legs'][0]['duration']['text']
            end_location = directions[0]['name']
            
            end_country = end_location
            tooltip = f"Travel between <b>{kk}</b> and <b>{end_location}, {end_country}</b> by car is <b>"\
                      f"{distance}</b> and takes <b>{duration}</b>.</br></br>"\
                      f"<b>{population}</b> people are effected by this conflict."
            polyline_ = polyline.decode(directions[0]['overview_polyline']['points'])
            polyline_m = folium.PolyLine(polyline_, color='#4A89F3', tooltip=tooltip, weight=stroke)
            polyline_m.add_to(fg_d)
    fg_d.add_to(map)
        
# plot exit routes (transit)
if "transit" in flight_mode:
    fg_t = folium.FeatureGroup("Transit")
    for kk, vv in all_directions.items():
        stroke = int(conflicts[conflicts['#name']==kk]['stroke'])
        population = "{:,}".format(int(conflicts[conflicts['#name']==kk]['population']))
        directions = all_directions[kk]
        if not isinstance(directions, type(None)):
            if len(directions) > 0:
                distance = directions[0]['legs'][0]['distance']['text']
                duration = directions[0]['legs'][0]['duration']['text']
                end_location = directions[0]['name']
                end_country = camps[camps['#name']==end_location].country.values[0]
                tooltip = f"Travel between <b>{kk}</b> and <b>{end_location}, {end_country}</b> by transit is <b>"\
                          f"{distance}</b> and takes <b>{duration}</b>.</br></br>"\
                          f"<b>{population}</b> people are effected by this conflict."
                polyline_ = polyline.decode(directions[0]['overview_polyline']['points'])
                polyline_m = folium.PolyLine(polyline_, color='#7570b3', tooltip=tooltip, weight=stroke)
                polyline_m.add_to(fg_t)        
        

    fg_t.add_to(map)

    
basemaps['Google Satellite Hybrid'].add_to(map)
# basemaps['Esri Satellite'].add_to(map)
# basemaps['Google Satellite'].add_to(map)
basemaps['Google Maps'].add_to(map)

# Add a layer control panel to the map.
map.add_child(folium.LayerControl())

# Add fullscreen button
plugins.Fullscreen().add_to(map)

<folium.plugins.fullscreen.Fullscreen at 0x7f7966a122e0>

In [859]:
def add_legend(map):
    legend_html = """
    <style>
    @import url('https://fonts.googleapis.com/css2?family=Roboto:wght@100;300;400&display=swap');
    </style>
    
     <div style="
     padding-left:5px; padding-top:5px;
     position: fixed; 
     bottom: 50px; left: 50px; width: 160px; height: 120px;   
     border:2px solid grey; z-index:9999; border-radius: 15px;
     
     background-color:white;
     opacity: .85;
     
     font-size:14px;
     font-weight: bold;
     font-family: 'Roboto', sans-serif;
     ">

     <div class="awesome-marker-icon-darkred awesome-marker" style="margin-top: 10px; margin-left:5px;">
         <i class="fa-rotate-0 glyphicon glyphicon-glyphicon glyphicon-fire icon-white"></i>
     </div>
     <div style="margin-left:40px; margin-top:20px">Conflict Area</div>
     
     <div class="awesome-marker-icon-gray awesome-marker" style="margin-top: 60px; margin-left:5px;">
         <i class="fa-rotate-0 glyphicon glyphicon-glyphicon glyphicon-road icon-white"></i>
     </div>
     <div style="margin-left:40px; margin-top:25px">Destination Crossing</div>     
     
     
      </div> """.format( title = "Legend html")
    map.get_root().html.add_child(folium.Element( legend_html ))
    return map

In [860]:
map = add_legend(map)

In [861]:
display(map)

In [862]:
# save map
map.save(f'maps/{conflict_country}_{flight_mode}_Map.html')

## 7. Recipient Country Refugee Counts


In [877]:
conflicts = locations[locations['location_type']=='conflict_zone']


Use routes gerated above called conflict_exit_routes

In [878]:
def get_exit_route(row, mode):
    lat=None
    lng=None
    dest=None
    try:
        dest = conflict_exit_routes[row['#name']]['crossing']['country']
        lat = conflict_exit_routes[row['#name']]['crossing']['latitude']
        lng = conflict_exit_routes[row['#name']]['crossing']['longitude']
    except Exception as e:
        print(e)
        dest = None
    row[f'{mode}_destination'] = dest
    row[f'latitude'] = lat
    row[f'longitude'] = lng
    return row

In [879]:
conflicts = conflicts.apply(lambda row: get_exit_route(row, flight_mode), axis=1)

'NoneType' object is not subscriptable


In [880]:
border_countries=  border_countries_results.copy()

In [881]:
border_countries

Unnamed: 0,country,conflict,historic_pop,historic_GDP_norm,v2x_libdem,predicted_shares,country_code
0,Algeria,Mali,43851043,1.0,0.151,0.295177,DZ
1,Burkina Faso,Mali,20903278,0.073087,0.504,0.220055,BF
2,Cote d’Ivoire,Mali,26378275,0.389764,0.314,0.217778,CI
3,Guinea,Mali,13132792,0.056657,0.125,0.063608,GN
4,Mauritania,Mali,4649660,0.0,0.179,0.07207,MR
5,Niger,Mali,24206636,0.042508,0.401,0.171417,NE
6,Senegal,Mali,16743930,0.122036,0.572,0.258906,SN


First sum the total population across twenty conflicts. Then get the percentage of total people in conflict zones at each conflict.For example if the total population of the 20 conflict zones is 1 million people and conflict zone A has a population of 200,000 people. Then it has 20% of people in conflict. If the total country population is 5 million, and 10% are expected to flee due to the conflict then we "route" 10% * 20% * 5,000,000 = 100,000 people through conflict zone A.  

In [882]:
#total population of conflict country. Times the percent we want to leave due to conflict
conflict_country_historic_pop=int(conflict_country_historic_pop)
conflicts["pop_percent_of_conflict_cities"]=conflicts["population"]/conflicts["population"].sum()
conflicts[f"refugee_estimated_leaving_via_{flight_mode}"]=conflicts["pop_percent_of_conflict_cities"]*(conflict_country_historic_pop*percent_of_pop_leaving)

In [883]:
conflicts.head(5)

Unnamed: 0,#name,latitude,longitude,country code,population,country,location_type,driving_destination,pop_percent_of_conflict_cities,refugee_estimated_leaving_via_driving
35,Bamako,11.951002,-8.785012,ML,1297281,Mali,conflict_zone,Guinea,0.545819,1105329.0
36,Ségou,13.111674,-4.347421,ML,153349,Mali,conflict_zone,Burkina Faso,0.06452,130658.8
37,Sikasso,11.119193,-5.360715,ML,144786,Mali,conflict_zone,Burkina Faso,0.060917,123362.8
38,Mopti,13.111674,-4.347421,ML,108456,Mali,conflict_zone,Burkina Faso,0.045632,92408.34
39,Koutiala,12.063125,-4.669205,ML,99353,Mali,conflict_zone,Burkina Faso,0.041802,84652.26


Reduce the columns and change column names

In [884]:
COL=["#name","country",f"{flight_mode}_destination","latitude","longitude",f"refugee_estimated_leaving_via_{flight_mode}"]
reduced_conflicts=conflicts[COL]
reduced_conflicts=reduced_conflicts.rename(columns={"#name":"origin city", "country":"origin country",f"{flight_mode}_destination":"destination country",f"refugee_estimated_leaving_via_{flight_mode}":"total refugees"})

Save file

In [885]:
reduced_conflicts.to_csv(f'outputs/{conflict_country}_{flight_mode}_total_refugees.csv',index=False)

In [886]:
reduced_conflicts

Unnamed: 0,origin city,origin country,destination country,latitude,longitude,total refugees
35,Bamako,Mali,Guinea,11.951002,-8.785012,1105329.0
36,Ségou,Mali,Burkina Faso,13.111674,-4.347421,130658.8
37,Sikasso,Mali,Burkina Faso,11.119193,-5.360715,123362.8
38,Mopti,Mali,Burkina Faso,13.111674,-4.347421,92408.34
39,Koutiala,Mali,Burkina Faso,12.063125,-4.669205,84652.26
40,Gao,Mali,Niger,14.962609,0.757572,74127.07
41,Kayes,Mali,Senegal,14.456328,-12.204163,66804.68
42,Markala,Mali,Burkina Faso,13.111674,-4.347421,45786.67
43,Kolokani,Mali,Cote d’Ivoire,10.56281,-6.427474,41557.17
44,Kati,Mali,Guinea,11.951002,-8.785012,36571.06


Summarize for country level numbers

In [887]:
country_level_refugee=pd.DataFrame(data=reduced_conflicts.groupby(['destination country'])["total refugees"].sum())

In [888]:
country_level_refugee['total refugees']=country_level_refugee["total refugees"].round()

In [889]:
country_level_refugee

Unnamed: 0_level_0,total refugees
destination country,Unnamed: 1_level_1
Burkina Faso,573021.0
Cote d’Ivoire,97826.0
Guinea,1176962.0
Niger,74127.0
Senegal,89659.0
