# OSM Python Query

The objective of this notebook is to query OSM database and return list of city and town centroids for all African countries


#### Open street map search 

From Overpass Turbo, use this API and follow the instructions from the
URL below to quary OSM with python

https://towardsdatascience.com/loading-data-from-openstreetmap-with-python-and-the-overpass-api-513882a27fd0

OSM CRS IS ----  'EPSG:4326'

#### Open Street Map Query for Cities and Villages 
URL for OSM Overpass Turbo: https://overpass-turbo.eu/


URL for OSM Features:
https://wiki.openstreetmap.org/wiki/Map_Features#Populated_settlements.2C_urban

#### Example from Ghana for Towns ... For cities, use *place=city*
    area["name:en"="Ghana"]->.boundaryarea;
    (
    node(area.boundaryarea)[place=town];
    way(area.boundaryarea)[place=town];>;
    rel(area.boundaryarea)[place=town];>>;
    );
    out meta;
    // print results
    out body;
    >;
    out skel qt;

Note: From what I can tell, OSM does not allow for 'Africa' to be used as an 'area' key. 

#### Note: This will pull refugee camps. Really cool. 

In [1]:
### List of African Countries from the UN in OSM wiki

countries = [
#Northern Africa
    'Algeria',
    'Egypt',
    'Libya',
    'Morocco',
    'Tunisia',
    'Western Sahara',

    #Sub-Saharan Africa
    #Eastern Africa
    'Burundi',
    'Comoros',
    'Djibouti',
    'Eritrea',
    'Ethiopia',
    'Kenya',
    'Madagascar',
    'Malawi',
    'Mauritius',
    #Mayotte,
    'Mozambique',
    'Réunion',
    'Rwanda',
    'Somalia',
    'Sudan',
    'South Sudan',
    'Uganda',
    'Tanzania',
    'Zambia',
    'Zimbabwe',
    
    #Middle Africa
    'Angola',
    'Cameroon',
    'Central African Republic',
    'Chad',
    'Congo-Brazzaville',
    'Democratic Republic of the Congo', # Democratic Republic of the Congo needs to be used, not Congo-Kinshasa
    'Equatorial Guinea',
    'Gabon',
    'Sao Tome and Principe',
    
    # Southern Africa
    'Botswana',
    'Lesotho',
    'Namibia',
    'South Africa',
    'Swaziland',
    
    # Western Africa
    'Benin',
    'Burkina Faso',
    'Cape Verde',
    'Côte d\'Ivoire',
    'Gambia',
    'Ghana',
    'Guinea',
    'Guinea-Bissau',
    'Liberia',
    'Mali',
    'Mauritania',
    'Niger',
    'Nigeria',
    'Senegal',
    'Sierra Leone',
    'Togo'
]

In [2]:
len(countries)

55

# Query  OSM Cities and Towns Points for All African Countries

In [3]:
# import time
import requests
import json

# Check time
# checkpoint = time.time()

# Loop through all OSM countries
response_dict = {}
overpass_url = "http://overpass-api.de/api/interpreter"

# put curly in quotes 

# countries = ['Chad','Ghana', "South Africa"]

for country_name in countries:
    overpass_query = """
            [out:json];
            area["name:en"="{}"];  
            (node[place=city](area);
            );
            out center;
            """.format(country_name)

    response = requests.get(overpass_url, params={'data': overpass_query})
    data = response.json()
    response_dict[country_name] = data

# print('elasped time to do the quary:{}'.format(time.time() - checkpoint))    

# Cut this on 2019 - 1 - 19 

#            way[place=town](area);
#            rel[place=town](area);

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [None]:
response_dict

In [None]:
response_dict['Democratic Republic of the Congo']['elements'][1]['tags']['name']

#### write out as json file

In [None]:
temp_data = '../../temp_data/'
erl_v2_data = '/Users/cascade/Github/Pop-ERL/temp_data/ERL19/ERLv2/'

# with open(temp_data+'20181206_osmafrica_cities_all.json', 'w') as fp:
#      json.dump(response_dict, fp)

In [None]:
#response_dict['Algeria']['elements'][1]['id']

# Loop through countries and make an np array w/ id, lat, long
import numpy as np
import pandas as pd

def osm_coords(response_dict):
    
    """
    This function takes an OSM dict object and returns a data frame
    with the id, lat, and long ...
    """
    coords = []
    for country in response_dict:    
        for element in response_dict[country]['elements']:
            country = country
            osm_id = str(element['id']) #keep as string for now
            lat = element['lat']
            lon = element['lon']
            
            if 'name' not in element['tags']: 
                city = 'NA'
            else: 
                city = element['tags']['name']
                    
            coords.append((country, osm_id, city, lat, lon,))

    coords_arr = np.array(coords)
    #coords_arr
    
    # make dataframe
    coords_df = pd.DataFrame(data=coords_arr)
    coords_df.columns = ['country','osm_id','town', 'lat','lon']

    return coords_df

In [None]:
osm_city = osm_coords(response_dict)
osm_city.head()
len(osm_city)

#### write out as csv w/ only id, lat, long

In [None]:
# Write out .csv

# NOTE - on 11-29-18 pulled 872 osm 'city' centroids w/o names to be used for AGU analysis 
# NOTE - on 12-06-18 pulled 871 -note missing- osm 'city' centroids countries to be used for AGU analysis 

# Note - on 01-14-19 pulled 876 osm 'city' level for ERL submission, saved filed as "20190114_osm_africa_cities.csv"
# Note - on 01-14-19 pulled 8175 osm 'town' level for ERL submission, saved filed as "20190114_osm_africa_towns.csv"


## South Sudan Added
# Note - on 02-21-19 pulled 18 osm 'city' level for ERL submission, saved filed as "20190221_osm_S_Sudan_cities.csv"
# Note - on 02-21-19 pulled 193 osm 'town' level for ERL submission, saved filed as "20190221_osm_S_Sudan_towns.csv"

## DRC Added 

# Note - on 04-30-291 pulled 56 osm 'city' level for ERL re-sbmission, saved file as "20190430_osm_DRC_cities.csv" 
# Note - on 04-30-291 pulled 495 osm 'town' level for ERL re-sbmission, saved file as "20190430_osm_DRC_towns.csv" 


#osm_city.to_csv(erl_v2_data+'20190430_osm_DRC_towns.csv', sep=',', index=False, header=True)

# Old Code

In [None]:
# Nesting order: 
#     response_dict = dict ... countries
#         elements  = list ... 
#             each list item is a dict w/ keys type, id, lat, long, tags
#                 tags = dict ... get names
                
                
# nest = response_dict['Chad']['elements'][1]['tags']['name:en'] # return name value 
# type(nest)
# nest

In [None]:
# Prof that not all cities with GPS cords have tag = 'name' ... if goal is to have labeled points, then remove all OSM
# points that do not have labeled 'names' as a tag
# print(len(response_dict_copy1['Sudan']['elements']))

# test = response_dict_copy1['Sudan']['elements']
# print(len(test))

# for city in test:
#     if 'name' not in city['tags']:
#         print('yes')

# for city in test:
#     if 'name' not in city['tags']:
#         test.remove(city)
# print(len(test))
# print(len(response_dict['Sudan']['elements']))
# print(len(response_dict_copy1['Sudan']['elements']))
# print(len(response_dict_copy2['Sudan']['elements']))

#### The code below will remove all points w/o labels

In [None]:
### remove points without names

# print(len(response_dict_copy2['Sudan']['elements']))

# for country in countries:
#     for city in response_dict_copy2[country]['elements']:
#         if 'name' not in city['tags']:
#             response_dict_copy2[country]['elements'].remove(city)

# print(len(response_dict_copy2['Sudan']['elements']))

# for city in response_dict_copy2['Sudan']['elements']:
#      if 'name' not in city['tags']:
#         print('yes')

# name = []
# for city in response_dict_copy2['Sudan']['elements']:
#     city_name = city['tags']['name']
#     name.append(city_name)

#### The code below will make a new dict with key as name and values as lat long and ID

In [None]:
# make dict with : id, lat, lon & name called gps
# good_keys = ['id', 'lat', 'lon', 'name', 'is_in:country', 'place', 'population']

# gps_keys = ['id', 'lat', 'lon']
# gps = {}
# for country_name in countries:
#     gps[country_name] = {}
#     for city_dict in response_dict_copy2[country_name]['elements']:
#         city_id = city_dict['tags']['name'] # will make key 'city name' w/ lat, lon, ids as values
#         gps[country_name].update({city_id:{key : value for key, value in city_dict.items() if key in gps_keys}})

In [None]:
# Copy response dict so we don't have to keep querying
# import copy

# response_dict_copy1 = copy.deepcopy(response_dict) # deep copy
# response_dict_copy2 = copy.deepcopy(response_dict) # deep copy ----> use this copy 

#### The code below will make a new dict with key as name and values as lat long and ID

In [None]:
# make dict with : id, lat, lon & name called gps
# good_keys = ['id', 'lat', 'lon', 'name', 'is_in:country', 'place', 'population']

# gps_keys = ['id', 'lat', 'lon']
# gps = {}
# for country_name in countries:
#     gps[country_name] = {}
#     for city_dict in response_dict_copy2[country_name]['elements']:
#         city_id = city_dict['tags']['name'] # will make key 'city name' w/ lat, lon, ids as values
#         gps[country_name].update({city_id:{key : value for key, value in city_dict.items() if key in gps_keys}})

In [None]:
# Loop through countries and make an np array w/ country, city name, id, lat, long
import numpy as np
import pandas as pd

coords = []
for country in gps:    
    for city in gps[country]:
        country_name = country
        city_name = city
        i_d = gps[country][city].get('id')
        lat = gps[country][city].get('lat')
        lon = gps[country][city].get('lon')
        coords.append((country_name, city_name, i_d, lat, lon))

coords_arr = np.array(coords)
coords_arr

# Try to make it into a dataframe
gps_df = pd.DataFrame(data=coords_arr)
gps_df.columns = ['Country','City','Id','lat','lon']


write it out 
gps_df.to_csv(outpath+'africa_cities.csv', sep=',', index=False, header=True)

In [None]:
# plot them 

# import numpy as np
# import matplotlib.pyplot as plt

# # Collect coords into list
# coords = []
# for element in data['elements']:
#   if element['type'] == 'node':
#     lon = element['lon']
#     lat = element['lat']
#     coords.append((lon, lat))
#   elif 'center' in element:
#     lon = element['center']['lon']
#     lat = element['center']['lat']
#     coords.append((lon, lat))
# # Convert coordinates into numpy array
# X = np.array(coords)
# plt.plot(X[:, 0], X[:, 1], 'o')
# plt.title('Cities in Ghana')
# plt.xlabel('Longitude')
# plt.ylabel('Latitude')
# plt.axis('equal')
# plt.show()

In [None]:
# Prof that not all cities with GPS cords have tag = 'name' ... if goal is to have labeled points, then remove all OSM
# points that do not have labeled 'names' as a tag
# print(len(response_dict_copy1['Sudan']['elements']))

# test = response_dict_copy1['Sudan']['elements']
# print(len(test))

# for city in test:
#     if 'name' not in city['tags']:
#         print('yes')

# for city in test:
#     if 'name' not in city['tags']:
#         test.remove(city)
# print(len(test))
# print(len(response_dict['Sudan']['elements']))
# print(len(response_dict_copy1['Sudan']['elements']))
# print(len(response_dict_copy2['Sudan']['elements']))