# Data Collection

In [None]:
import json
import pandas as pd
from fuzzywuzzy import process
from sklearn.preprocessing import MinMaxScaler
from statsmodels.iolib.smpickle import load_pickle

Set these values as parameters for the model.
Set conflict city and number of cities for conflict and camp countries


In [None]:
conflict_country= "Honduras"
percent_of_pop_leaving=100
flight_mode=["driving"]
conflict_start=2021-1
excluded_countries=[]
added_countries=[]
number_conflict_cities=20
number_camp_cities=10

read in country border data

In [None]:
country_border = open('../refugee_data/country_border_data.json')
countries_that_border = json.load(country_border)

get a list of touching countries

In [None]:
touching_list=[]
touching_list=countries_that_border[conflict_country]
touching_list

remove any countries that are to be excluded.


In [None]:
indexed_list={}
for i,c in enumerate(touching_list):
    indexed_list[i]=c
    
for i,ex in enumerate(excluded_countries):
    country,value, ind=process.extractOne(ex, indexed_list)
    if value>89:
        touching_list.pop(ind)
    print(country, value, ind)
    print(touching_list)

add any countries we need to 

In [None]:
for country_v in added_countries:
    touching_list.append(country_v)
touching_list

create a dataframe to build upon.


In [None]:
touching_df = pd.DataFrame (touching_list, columns = ['bording_countries'])
touching_df['conflict']=conflict_country

read in historic_pop

In [None]:
historic_pop=pd.read_csv('../refugee_data/historic_pop.csv')    

Get historic populations from year before conflict


In [None]:
options=historic_pop["Country Name"]
touching_df['historic_pop']=None
historic_pop_cols=historic_pop.columns

indexed_col={}
for i,c in enumerate(historic_pop_cols):
    indexed_col[i]=c

column,ratio_year, year_column_idx = process.extractOne(str(conflict_start), indexed_col)


for kk ,border in touching_df.iterrows():
    country,ratio,ind =process.extractOne(border["bording_countries"], options)
    touching_df.loc[kk,"historic_pop"]=historic_pop.at[ind,column]

read in historic gdp per cap data. Get data from historic gdp per cap given the year.


In [None]:
gdp_per_cap_historic=pd.read_csv("../refugee_data/gdp_per_cap.csv")

In [None]:
options=gdp_per_cap_historic["Country Name"]
touching_df['historic_gdp_per_cap']=None

historic_gdp_cols=gdp_per_cap_historic.columns
indexed_gdp_col={}
for i,c in enumerate(historic_gdp_cols):
    indexed_gdp_col[i]=c

    
column,ratio_year, year_column_idx = process.extractOne(str(conflict_start), indexed_gdp_col)

for kk ,border in touching_df.iterrows():
    country,ratio,ind =process.extractOne(border["bording_countries"], options)
    touching_df.loc[kk,"historic_gdp_per_cap"]=gdp_per_cap_historic.at[ind,column]

calculate historic gdp for later use if needed


In [None]:
touching_df['calculated_historic_gdp']=None
for kk ,row in touching_df.iterrows():
    try:
        touching_df.loc[kk,"calculated_historic_gdp"]=int(row['historic_pop'])*float(row['historic_gdp_per_cap'])
    except Exception as e:
        print(e, f'GDP per cap for {row["bording_countries"]} data was {row["historic_gdp_per_cap"]}' )

read in V-Dem data and subset the dataset to only enclude columns we care about

In [None]:
Dem=pd.read_csv("../refugee_data/V-Dem-CY-Core-v12.csv")
columnList=["country_name","year","v2xeg_eqdr","v2x_libdem"]
country_dem=Dem[columnList]

collect liberal democracy and access to justic for women data for each country given conflict year


In [None]:
touching_df["v2x_libdem"]=None
touching_df["v2xeg_eqdr"]=None

options=country_dem['country_name'].unique()

for kk ,row in touching_df.iterrows():
    country,ratio =process.extractOne(row["bording_countries"], options)
    lib=country_dem.loc[(country_dem["country_name"]==country) & (country_dem["year"]==int(conflict_start))]['v2x_libdem']
    eqdr=country_dem.loc[(country_dem["country_name"]==country) & (country_dem["year"]==int(conflict_start))]['v2xeg_eqdr']
    touching_df.loc[kk,"v2xeg_eqdr"]=eqdr.to_list()[0]
    touching_df.loc[kk,"v2x_libdem"]=lib.to_list()[0]

read in historic gdp from world bank not calculated manually.

In [None]:
historic_GDP=pd.read_csv('../refugee_data/GDP_historic.csv')

Get data from historic gdp per cap given the year.


In [None]:
options=historic_GDP["Country Name"]
touching_df['historic_GDP']=None
historic_GDP_cols=historic_GDP.columns
indexed_GDP_col={}
for i,c in enumerate(historic_GDP_cols):
    indexed_GDP_col[i]=c

    
column,ratio_year, year_column_idx = process.extractOne(str(conflict_start), indexed_GDP_col)

for kk ,border in touching_df.iterrows():
    country,ratio,ind =process.extractOne(border["bording_countries"], options)
    touching_df.loc[kk,"historic_GDP"]=historic_GDP.at[ind,column]

In [None]:
touching_df

Normalize the columns we will run the model on that are not already an index. For our current model that is only historic_GDP.

In [None]:
cols_to_scale = ['historic_GDP']
touching_df=touching_df.rename(columns={'bording_countries':'country'})

In [None]:
scaler = MinMaxScaler()
for col in cols_to_scale:
    print(col)
    normed = pd.DataFrame()
    
    for y, x in touching_df.groupby('conflict'):
        print(y,x)
        norm_ = [i[0] for i in scaler.fit_transform(x[col].values.reshape(-1,1))]
        countries = x['country']
        conflict_ = x['conflict']
        res = pd.DataFrame(tuple(zip(countries,conflict_,norm_)), columns=['country','conflict',f"{col}_norm"])
        normed = normed.append(res)
    normalized_data = pd.merge(touching_df, normed, left_on=['country','conflict'], right_on=['country','conflict'], how='right')

In [None]:
normalized_data

Load the prebuild model 

In [None]:
new_results = load_pickle("../refugee_model_results.pickle")

set independant variables 


In [None]:
features_cols = [
                    'historic_GDP_norm', 
                    'v2x_libdem',
]
features_normalized = normalized_data[features_cols]
normalized_data[['country',"conflict"]+features_cols]

Currently if a country does not have data for a column we are dropping that country. If you need to not drop a country you need to fill in the missing data manually or some other method.

In [None]:
normalized_data=normalized_data.dropna()

Select only features that are needed and use the model to predict 


In [None]:
features_to_predict=normalized_data[features_cols]
shares = new_results.predict(features_to_predict)
normalized_data['predicted_shares'] = shares

save results to a csv file for next step.

In [None]:
output_results = normalized_data[['country',"conflict","historic_pop",'predicted_shares']]
output_results.to_csv(f'outputs/{conflict_country}_output_results.csv',index=False)

In [None]:
output_results

# Largest Cities

In [None]:
from functools import partial
import geopandas as gpd
import pandas as pd
import math
import numpy as np
import shapely
from shapely.geometry import Point
import googlemaps
import pgeocode
import pyproj
from fuzzywuzzy import process


Read in cities data with all cities with population over 15000 people. Then subset columns 

In [None]:
CITY_FILE = "../refugee_data/cities15000.txt"
city_df = pd.read_csv(
    CITY_FILE, 
    sep="\t", 
    header=0,
    names=[
         "geonameid", 
 "name", 
 "asciiname", 
 "alternatenames", 
 "latitude", 
 "longitude", 
 "feature class", 
 "feature code", 
 "country code", 
 "cc2", 
 "admin1 code", 
 "admin2 code", 
 "admin3 code", 
 "admin4 code", 
 "population", 
 "elevation", 
 "dem", 
 "timezone", 
 "modification date", 

    ]
)
subset_cols=["name","latitude","longitude", "country code", "population"]
city_df=city_df[subset_cols]

Read in the results from the collect_to_normalized notebook that we saved in the output folder

In [None]:
border_countries=  pd.read_csv(f"outputs/{conflict_country}_output_results.csv")

Read in county codes data so we merge the two data sources 

In [None]:
codes=  pd.read_csv("../refugee_data/wikipedia-iso-country-codes.csv")

Add a column for country_code to our border_countries dataframe

In [None]:
options=codes["English short name lower case"]

for kk ,border in border_countries.iterrows():
    country,ratio,ind =process.extractOne(border["country"], options)
    border_countries.loc[kk,"country_code"]=codes.at[ind,"Alpha-2 code"]

We need the code for the conflict country as well.

In [None]:
country,ratio,ind =process.extractOne(border_countries["conflict"][0], options)
conflict_code=codes.at[ind,"Alpha-2 code"]
conflict_code

Collect largest n cities in conflict country. This can be changed by the parameter number_conflict_cities. We also will add two column for country and location_type for future use. 

In [None]:
filtered_df=city_df[city_df["country code"]==conflict_code]
filtered_df=filtered_df.sort_values(by="population", ascending=False)
largest_conflict_cities=filtered_df[0:number_conflict_cities-1]
largest_conflict_cities['country']=conflict_country
largest_conflict_cities['location_type']="conflict_zone"
largest_conflict_cities

Do the same for largest camp cities for each bordering country


In [None]:
largest_camp_cities = pd.DataFrame(columns = city_df.columns)

for kk ,border in border_countries.iterrows():
    filtered_df=city_df[city_df["country code"]==border['country_code']]
    filtered_df['country']=border['country']
    filtered_df=filtered_df.sort_values(by="population", ascending=False)
    largest_camp_cities_f=filtered_df[0:number_camp_cities-1]
    largest_camp_cities=largest_camp_cities.append(largest_camp_cities_f)
largest_camp_cities["location_type"]='camp'
largest_camp_cities

Merge and save the data

In [None]:

for kk ,border in largest_conflict_cities.iterrows():
    largest_camp_cities=largest_camp_cities.append(border)
locations=largest_camp_cities.rename(columns={"name":"#name"})

In [None]:
locations.head()

save to inputs folder with unique file name

In [None]:
locations.to_csv(f'inputs/{conflict_country}_locations.csv',index=False)

# Ukraine Refugee Mapping

In [None]:
import json
import time
import requests

# google libraries
import googlemaps
import polyline

# mapping and shape utils
import folium
from folium import plugins

# data processing
import pandas as pd
import geopandas as gpd

import math

First, you need to enable the Google Directions API.

In [None]:
gmaps = googlemaps.Client(key='AIzaSyAPHPIx3pHfUVoCYZRVaZYy2l5WRowVitA')

Add custom basemaps to folium

In [None]:
basemaps = {
    'Google Maps': folium.TileLayer(
        tiles = 'https://mt1.google.com/vt/lyrs=m&x={x}&y={y}&z={z}',
        attr = 'Google',
        name = 'Google Maps',
        overlay = True,
        control = True
    ),
    'Google Satellite': folium.TileLayer(
        tiles = 'https://mt1.google.com/vt/lyrs=s&x={x}&y={y}&z={z}',
        attr = 'Google',
        name = 'Google Satellite',
        overlay = True,
        control = True
    ),
    'Google Terrain': folium.TileLayer(
        tiles = 'https://mt1.google.com/vt/lyrs=p&x={x}&y={y}&z={z}',
        attr = 'Google',
        name = 'Google Terrain',
        overlay = True,
        control = True
    ),
    'Google Satellite Hybrid': folium.TileLayer(
        tiles = 'https://mt1.google.com/vt/lyrs=y&x={x}&y={y}&z={z}',
        attr = 'Google',
        name = 'Google Satellite',
        overlay = True,
        control = True
    ),
    'Esri Satellite': folium.TileLayer(
        tiles = 'https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}',
        attr = 'Esri',
        name = 'Esri Satellite',
        overlay = True,
        control = True
    )
}

## Read in Locations

In [None]:
df = pd.read_csv(f'inputs/{conflict_country}_locations.csv')
df.head(50)

In [None]:
conflicts = df[df["location_type"]=="conflict_zone"]

In [None]:
camps = df[df["location_type"]=="camp"]

In [None]:
attractions = pd.read_csv(f'outputs/{conflict_country}_output_results.csv')

In [None]:
attractions

In [None]:
def get_closest(loc_lat, loc_lon, targets, mode):
    chunk_size = 25
    list_targets = [targets[i:i+chunk_size] for i in range(0,targets.shape[0],chunk_size)]
    output = None
    closest_seconds = 100000000000
    closest_loc = None
    for i in list_targets:
        results = gmaps.distance_matrix(origins=[(loc_lat, loc_lon)],
                                        destinations=list(tuple(zip(i.latitude, i.longitude))), mode=mode)
        for idx, val in enumerate(results["rows"][0]["elements"]):
            if val["status"] == "ZERO_RESULTS":
                continue
            attraction = attractions[attractions["country"] == i.iloc[idx]["country"]].predicted_shares.iloc[0]
            seconds = val["duration"]["value"]*(1/math.sqrt(attraction))
            if seconds <= closest_seconds:
                closest_seconds = seconds
                closest_loc = i.iloc[idx]
                output = val
    return closest_loc, output

In [None]:
# conflict_exit_routes_transit = {}

In [None]:
# mode="transit"
# for kk, conflict in conflicts.iterrows():
#     if conflict["#name"] not in conflict_exit_routes_transit:
#         closest_crossing, crossing_val = get_closest(conflict.latitude, conflict.longitude, camps, mode)
#         if isinstance(closest_crossing, type(None)):
#             print(f'{conflict["#name"]} No routes found')
#         conflict_exit_routes_transit[conflict["#name"]] = dict(crossing=closest_crossing, 
#                                                        crossing_v=crossing_val)

In [None]:
conflict_exit_routes = {}

In [None]:
# conflict_exit_routes_transit[conflict["#name"]]["crossing"]

Helper Encoder for json

In [None]:
import numpy as np

class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NpEncoder, self).default(obj)

In [None]:
NoneType = type(None)
for mode in flight_mode:
    for kk, conflict in conflicts.iterrows():
        if isinstance(conflict_exit_routes_transit[conflict["#name"]]["crossing"], NoneType): 
            closest_crossing, crossing_val = get_closest(conflict.latitude, conflict.longitude, camps, mode)
            if isinstance(closest_crossing, type(None)):
                print(f'{conflict["#name"]} No routes found')
            conflict_exit_routes[conflict["#name"]] = dict(crossing=closest_crossing, 
                                                           crossing_v=crossing_val)
    for kk, vv in conflict_exit_routes.items():
        if not isinstance(vv['crossing'], type(None)):
            vv['crossing'] = dict(vv['crossing'])
    
    with open(f'outputs/{conflict_country}_exit_routes_{mode}_hybrid.json','w') as f:
        f.write(json.dumps(conflict_exit_routes, cls=NpEncoder))

In [None]:
conflict_exit_routes


## Directions and Plotting

In [None]:
all_directions = {}
flight_mode=["driving"]
for mode in flight_mode:
    for kk, conflict in conflicts.iterrows():
        conflict_name = conflict['#name']
        print(f"Getting directions for conflict: {conflict_name}")

        if conflict_name in conflict_exit_routes:
            try:
                xing = conflict_exit_routes[conflict_name]['crossing']
                try:
                    directions_result = gmaps.directions((conflict.latitude, conflict.longitude),
                                                     (xing['latitude'], xing['longitude']),
                                                     mode=mode)
                    directions_result[0]['name']=xing['#name']
                    directions_result[0]['country']=xing['country']
                except Exception as e:
                    print(e)
                    directions_result = None
                all_directions[conflict_name] = directions_result
            except Exception as e:
                print(e)


In [None]:
all_directions

In [None]:
with open(f'outputs/{conflict_country}_border_crossing_directions_{mode}_hybrid.json','w') as f:
    f.write(json.dumps(all_directions))
 

## Nearest Camp Map

Min/max normalization for population to obtain an appropriate line strkoe

In [None]:
c_desc = conflicts.population.describe()

In [None]:
def bucket_population(population):
    if population <= c_desc['25%']:
        stroke = 2.5
    elif population <= c_desc['50%']:
        stroke = 5
    elif population <= c_desc['75%']:
        stroke = 7.5
    else:
        stroke = 10
    return stroke
        

In [None]:
conflicts['stroke'] = conflicts['population'].apply(lambda x: bucket_population(x))

In [None]:
# Create Map
map = folium.Map(location=[conflicts.latitude.mean(),conflicts.longitude.mean()], zoom_start=6)

# Plot conflict starting points
for kk, start in conflicts.iterrows():
    start_m = folium.Marker([start.latitude, start.longitude], popup=start['#name'], 
                            icon=folium.Icon(icon='glyphicon glyphicon-fire', color='darkred'))
    start_m.add_to(map)
    
# Plot ending locations
for kk, vv in camps.iterrows():
    icon = 'glyphicon glyphicon-flag'
    color = 'orange'
    popup_text = f"<b>City Name: </b>{vv['#name']}<br>"\
                 f"<b>Country: </b>{vv['country']}"
    popup = folium.Popup(popup_text, max_width=300,min_width=150)
    xing = folium.Marker([vv.latitude, vv.longitude], popup=popup, 
                          icon=folium.Icon(icon=icon, color=color))
    xing.add_to(map)

# plot exit routes (driving)
if "driving" in flight_mode:
    fg_d = folium.FeatureGroup("Driving")
    for kk, vv in all_directions.items():
        stroke = int(conflicts[conflicts['#name']==kk]['stroke'])
        population = "{:,}".format(int(conflicts[conflicts['#name']==kk]['population']))
        directions = all_directions[kk]
        if not isinstance(directions, type(None)):
            distance = directions[0]['legs'][0]['distance']['text']
            duration = directions[0]['legs'][0]['duration']['text']
            end_location = directions[0]['name']
            end_country = camps[camps['#name']==end_location].country.values[0]
            tooltip = f"Travel between <b>{kk}</b> and <b>{end_location}, {end_country}</b> by car is <b>"\
                      f"{distance}</b> and takes <b>{duration}</b>.</br></br>"\
                      f"<b>{population}</b> people are effected by this conflict."
            polyline_ = polyline.decode(directions[0]['overview_polyline']['points'])
            polyline_m = folium.PolyLine(polyline_, color='#4A89F3', tooltip=tooltip, weight=stroke)
            polyline_m.add_to(fg_d)
    fg_d.add_to(map)
        
# plot exit routes (transit)
if "transit" in flight_mode:
    fg_t = folium.FeatureGroup("Transit")
    for kk, vv in all_directions.items():
        stroke = int(conflicts[conflicts['#name']==kk]['stroke'])
        population = "{:,}".format(int(conflicts[conflicts['#name']==kk]['population']))
        directions = all_directions[kk]
        if not isinstance(directions, type(None)):
            if len(directions) > 0:
                distance = directions[0]['legs'][0]['distance']['text']
                duration = directions[0]['legs'][0]['duration']['text']
                end_location = directions[0]['name']
                end_country = camps[camps['#name']==end_location].country.values[0]
                tooltip = f"Travel between <b>{kk}</b> and <b>{end_location}, {end_country}</b> by transit is <b>"\
                          f"{distance}</b> and takes <b>{duration}</b>.</br></br>"\
                          f"<b>{population}</b> people are effected by this conflict."
                polyline_ = polyline.decode(directions[0]['overview_polyline']['points'])
                polyline_m = folium.PolyLine(polyline_, color='#7570b3', tooltip=tooltip, weight=stroke)
                polyline_m.add_to(fg_t)        
        

    fg_t.add_to(map)
# folium.Choropleth(
#     manual_routes,
#     line_weight=3,
#     line_color='#f03b20',
#     name='Manually generated routes'
# ).add_to(map)
        
# Add custom basemaps
basemaps['Google Satellite Hybrid'].add_to(map)
# basemaps['Esri Satellite'].add_to(map)
# basemaps['Google Satellite'].add_to(map)
basemaps['Google Maps'].add_to(map)

# Add a layer control panel to the map.
map.add_child(folium.LayerControl())

# Add fullscreen button
plugins.Fullscreen().add_to(map)

In [None]:
def add_legend(map):
    legend_html = """
    <style>
    @import url('https://fonts.googleapis.com/css2?family=Roboto:wght@100;300;400&display=swap');
    </style>
    
     <div style="
     padding-left:5px; padding-top:5px;
     position: fixed; 
     bottom: 50px; left: 50px; width: 160px; height: 120px;   
     border:2px solid grey; z-index:9999; border-radius: 15px;
     
     background-color:white;
     opacity: .85;
     
     font-size:14px;
     font-weight: bold;
     font-family: 'Roboto', sans-serif;
     ">

     <div class="awesome-marker-icon-darkred awesome-marker" style="margin-top: 10px; margin-left:5px;">
         <i class="fa-rotate-0 glyphicon glyphicon-glyphicon glyphicon-fire icon-white"></i>
     </div>
     <div style="margin-left:40px; margin-top:20px">Conflict Area</div>
     
     <div class="awesome-marker-icon-orange awesome-marker" style="margin-top: 60px; margin-left:5px;">
         <i class="fa-rotate-0 glyphicon glyphicon-glyphicon glyphicon-flag icon-white"></i>
     </div>
     <div style="margin-left:40px; margin-top:25px">Destination City</div>     
     
     
      </div> """.format( title = "Legend html")
    map.get_root().html.add_child(folium.Element( legend_html ))
    return map

In [None]:
map = add_legend(map)

In [None]:
display(map)

In [None]:
# save map
map.save(f'{conflict_country}_Border_Crossing_Map_UPDATED.html')

In [None]:
conflicts['#name'].unique()

# Recipient Country Counts


In [None]:
df = pd.read_csv(f'inputs/{conflict_country}_locations.csv')
df.head(50)

In [None]:
conflicts = df[df['location_type']=='conflict_zone']
camps = df[df['location_type']=='camp']

In [None]:
conflicts

Use routes gerated above called conflict_exit_routes

In [None]:
def get_exit_route(row, mode):
    if mode == 'driving':
        routes = conflict_exit_routes
    elif mode == 'transit':
        routes = conflict_exit_routes_transit
    elif mode == 'hybrid_transit':
        routes = conflict_exit_routes_hybrid_transit
    elif mode == 'hybrid_driving':
        routes = conflict_exit_routes_hybrid_driving          
    try:
        print('sep')
        print(type(row))
   
        print(routes[row['#name']][0]['name'])
        dest = routes[row['#name']]["country"]
        print(dest)
        print('after')
        dest = camps[camps['#name']==dest].country.values
    except Exception as e:
        print(e)
        dest = None
    row[f'{mode}_destination'] = dest
    return row

In [None]:
conflict_exit_routes

In [None]:
conflicts = conflicts.apply(lambda row: get_exit_route(row, "driving"), axis=1)
    

In [None]:
conflicts

In [None]:
camps

In [None]:
border_countries=  pd.read_csv(f"outputs/{conflict_country}_output_results.csv")

In [None]:
border_countries

In [None]:
conflicts = conflicts.apply(lambda row: get_exit_route(row, 'driving'), axis=1)


In [None]:
conflict_exit_routes