In [243]:
from functools import partial
import geopandas as gpd
import pandas as pd
import math
import numpy as np
import shapely
from shapely.geometry import Point
import googlemaps
import pgeocode
import pyproj
from fuzzywuzzy import process


Set conflict city and number of cities for conflict and camp countries

In [244]:
conflict_country="Honduras"
number_conflict_cities=20
number_camp_cities=10

Read in cities data with all cities with population over 15000 people. Then subset columns 

In [245]:
CITY_FILE = "refugee_data/cities15000.txt"

In [246]:
city_df = pd.read_csv(
    CITY_FILE, 
    sep="\t", 
    header=0,
    names=[
         "geonameid", 
 "name", 
 "asciiname", 
 "alternatenames", 
 "latitude", 
 "longitude", 
 "feature class", 
 "feature code", 
 "country code", 
 "cc2", 
 "admin1 code", 
 "admin2 code", 
 "admin3 code", 
 "admin4 code", 
 "population", 
 "elevation", 
 "dem", 
 "timezone", 
 "modification date", 

    ]
)

In [247]:
subset_cols=["name","latitude","longitude", "country code", "population"]
city_df=city_df[subset_cols]

Read in the results from the collect_to_normalized notebook that we saved in the output folder

In [248]:
border_countries=  pd.read_csv(f"outputs/{conflict_country}_output_results.csv")

Read in county codes data so we merge the two data sources 

In [249]:
codes=  pd.read_csv("refugee_data/wikipedia-iso-country-codes.csv")

Add a column for country_code to our border_countries dataframe

In [250]:
options=codes["English short name lower case"]

for kk ,border in border_countries.iterrows():
    country,ratio,ind =process.extractOne(border["country"], options)
    border_countries.loc[kk,"country_code"]=codes.at[ind,"Alpha-2 code"]

We need the code for the conflict country as well.

In [251]:
country,ratio,ind =process.extractOne(border_countries["conflict"][0], options)
conflict_code=codes.at[ind,"Alpha-2 code"]
conflict_code

'HN'

Collect largest n cities in conflict country. This can be changed by the parameter number_conflict_cities. We also will add two column for country and location_type for future use. 

In [252]:
filtered_df=city_df[city_df["country code"]==conflict_code]
filtered_df=filtered_df.sort_values(by="population", ascending=False)
largest_conflict_cities=filtered_df[0:number_conflict_cities-1]
largest_conflict_cities['country']=conflict_country
largest_conflict_cities['location_type']="conflict_zone"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  largest_conflict_cities['country']=conflict_country
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  largest_conflict_cities['location_type']="conflict_zone"


In [253]:
largest_conflict_cities

Unnamed: 0,name,latitude,longitude,country code,population,country,location_type
9716,Tegucigalpa,14.0818,-87.20681,HN,850848,Honduras,conflict_zone
9720,San Pedro Sula,15.50417,-88.025,HN,489466,Honduras,conflict_zone
9734,Choloma,15.61444,-87.95302,HN,139100,Honduras,conflict_zone
9726,La Ceiba,15.75971,-86.78221,HN,130218,Honduras,conflict_zone
9728,El Progreso,15.4,-87.8,HN,100810,Honduras,conflict_zone
9733,Ciudad Choluteca,13.30028,-87.19083,HN,75872,Honduras,conflict_zone
9731,Comayagua,14.45139,-87.6375,HN,58784,Honduras,conflict_zone
9711,Puerto Cortez,15.82562,-87.92968,HN,48013,Honduras,conflict_zone
9725,La Lima,15.43333,-87.91667,HN,45955,Honduras,conflict_zone
9730,Danlí,14.03333,-86.58333,HN,44799,Honduras,conflict_zone


Do the same for largest camp cities for each bordering country


In [254]:
largest_camp_cities = pd.DataFrame(columns = city_df.columns)

for kk ,border in border_countries.iterrows():
    filtered_df=city_df[city_df["country code"]==border['country_code']]
    filtered_df['country']=border['country']
    filtered_df=filtered_df.sort_values(by="population", ascending=False)
    largest_camp_cities_f=filtered_df[0:number_camp_cities-1]
    largest_camp_cities=largest_camp_cities.append(largest_camp_cities_f)
largest_camp_cities["location_type"]='camp'
largest_camp_cities

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['country']=border['country']
  largest_camp_cities=largest_camp_cities.append(largest_camp_cities_f)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['country']=border['country']
  largest_camp_cities=largest_camp_cities.append(largest_camp_cities_f)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-

Unnamed: 0,name,latitude,longitude,country code,population,country,location_type
20274,San Salvador,13.68935,-89.18718,SV,525990,El Salvador,camp
20267,Soyapango,13.71024,-89.13989,SV,329708,El Salvador,camp
20273,Santa Ana,13.99417,-89.55972,SV,176661,El Salvador,camp
20276,San Miguel,13.48333,-88.18333,SV,161880,El Salvador,camp
20284,Mejicanos,13.72397,-89.188,SV,160317,El Salvador,camp
20282,Santa Tecla,13.67694,-89.27972,SV,124694,El Salvador,camp
20295,Apopa,13.80722,-89.17917,SV,112158,El Salvador,camp
20289,Delgado,13.72171,-89.16874,SV,71594,El Salvador,camp
20269,Sonsonate,13.71889,-89.72417,SV,59468,El Salvador,camp
9678,Guatemala City,14.64072,-90.51327,GT,994938,Guatemala,camp


Merge and save the data

In [226]:

for kk ,border in largest_conflict_cities.iterrows():
    largest_camp_cities=largest_camp_cities.append(border)

  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_camp_cities.append(border)
  largest_camp_cities=largest_c

In [227]:
final_df=largest_camp_cities

In [228]:
# update a column name to match future processing code.
final_df=final_df.rename(columns={"name":"#name"})

In [229]:
final_df

Unnamed: 0,#name,latitude,longitude,country code,population,country,location_type
20274,San Salvador,13.68935,-89.18718,SV,525990,El Salvador,camp
20267,Soyapango,13.71024,-89.13989,SV,329708,El Salvador,camp
20273,Santa Ana,13.99417,-89.55972,SV,176661,El Salvador,camp
20276,San Miguel,13.48333,-88.18333,SV,161880,El Salvador,camp
20284,Mejicanos,13.72397,-89.188,SV,160317,El Salvador,camp
20282,Santa Tecla,13.67694,-89.27972,SV,124694,El Salvador,camp
20295,Apopa,13.80722,-89.17917,SV,112158,El Salvador,camp
20289,Delgado,13.72171,-89.16874,SV,71594,El Salvador,camp
20269,Sonsonate,13.71889,-89.72417,SV,59468,El Salvador,camp
9678,Guatemala City,14.64072,-90.51327,GT,994938,Guatemala,camp


In [232]:
#save
final_df.to_csv(f'inputs/{conflict_country}_locations.csv',index=False)