### Open street map search 

From Overpass Turbo, use this API and follow the instructions from the
URL below to quary OSM with python

https://towardsdatascience.com/loading-data-from-openstreetmap-with-python-and-the-overpass-api-513882a27fd0

OSM CRS IS ----  'EPSG:4326'

#### Open Street Map Query for Cities and Villages 
URL for OSM Overpass Turbo: https://overpass-turbo.eu/


URL for OSM Features:
https://wiki.openstreetmap.org/wiki/Map_Features#Populated_settlements.2C_urban

#### Example from Ghana for Towns ... For cities, use *place=city*
    area["name:en"="Ghana"]->.boundaryarea;
    (
    node(area.boundaryarea)[place=town];
    way(area.boundaryarea)[place=town];>;
    rel(area.boundaryarea)[place=town];>>;
    );
    out meta;
    // print results
    out body;
    >;
    out skel qt;

Note: From what I can tell, OSM does not allow for 'Africa' to be used as an 'area' key. 

### Note: This will pull refugee camps. Really cool. 

In [79]:
### List of African Countries from the UN

countries = [
#Northern Africa
    'Algeria',
    'Egypt',
    'Libya',
    'Morocco',
    'Tunisia',
    'Western Sahara',

    #Sub-Saharan Africa
    #Eastern Africa
    'Burundi',
    'Comoros',
    'Djibouti',
    'Eritrea',
    'Ethiopia',
    'Kenya',
    'Madagascar',
    'Malawi',
    'Mauritius',
    #Mayotte,
    'Mozambique',
    'Réunion',
    'Rwanda',
    'Somalia',
    'Sudan',
    'Uganda',
    'Tanzania',
    'Zambia',
    'Zimbabwe',
    
    #Middle Africa
    'Angola',
    'Cameroon',
    'Central African Republic',
    'Chad',
    'Congo-Brazzaville',
    'Congo-Kinshasa',
    'Equatorial Guinea',
    'Gabon',
    'Sao Tome and Principe',
    
    # Southern Africa
    'Botswana',
    'Lesotho',
    'Namibia',
    'South Africa',
    'Swaziland',
    
    # Western Africa
    'Benin',
    'Burkina Faso',
    'Cape Verde',
    'Côte d\'Ivoire',
    'Gambia',
    'Ghana',
    'Guinea',
    'Guinea-Bissau',
    'Liberia',
    'Mali',
    'Mauritania',
    'Niger',
    'Nigeria',
    'Senegal',
    'Sierra Leone',
    'Togo'
]

In [None]:
# for element in countries:
#     print ((element), type(element))

#### Pull all OSM City Points for All African Countries

In [80]:
import time
import requests
import json

# Check time
checkpoint = time.time()

# geographies include city, town & village  ... add name:en to search in English for countries
# Cities takes 78 - 143 seconds 

# Attempt to make a loop through all OSM countries
response_dict = {}
overpass_url = "http://overpass-api.de/api/interpreter"

# put curly in quotes 

for country_name in countries:
    overpass_query = """
            [out:json];
            area["name:en"="{}"]; 
            (node[place=city](area);
            way[place=city](area);
            rel[place=city](area);
            );
            out center;
            """.format(country_name)

    response = requests.get(overpass_url, params={'data': overpass_query})
    data = response.json()
    response_dict[country_name] = data

print('elasped time to do the quary:{}'.format(time.time() - checkpoint))    

elasped time to do the quary:92.21606206893921


In [18]:
# Nesting order: 
#     response_dict = dict ... countries
#         elements  = list ... 
#             each list item is a dict w/ keys type, id, lat, long, tags
#                 tags = dict ... get names
                
                
nest = response_dict['Chad']['elements'][1]['tags']['name:en'] # return name value 
type(nest)
nest

'Abéché'

In [81]:
# Copy response dict so we don't have to keep querying
import copy

response_dict_copy1 = copy.deepcopy(response_dict) # deep copy
response_dict_copy2 = copy.deepcopy(response_dict) # deep copy

In [47]:
type(response_dict_copy1)

dict

In [48]:
len(response_dict_copy1['Sudan']['elements'])

68

In [82]:
# Prof that not all cities with GPS cords have tag = 'name' ... if goal is to have labeled points, then remove all OSM
# points that do not have labeled 'names' as a tag
print(len(response_dict_copy1['Sudan']['elements']))

test = response_dict_copy1['Sudan']['elements']
print(len(test))

for city in test:
    if 'name' not in city['tags']:
        print('yes')

for city in test:
    if 'name' not in city['tags']:
        test.remove(city)
print(len(test))
print(len(response_dict_copy1['Sudan']['elements']))
print(len(response_dict_copy2['Sudan']['elements']))

68
68
yes
yes
yes
yes
64
64
68


In [83]:
# check to make sure deep copies are working

print(len(response_dict_copy1['Sudan']['elements']))
print(len(response_dict_copy2['Sudan']['elements']))
print(len(response_dict['Sudan']['elements']))

64
68
68


#### The code below will remove all points w/o labels

In [84]:
### remove points without names
print(len(response_dict_copy2['Sudan']['elements']))

for country in countries:
    for city in response_dict_copy2[country]['elements']:
        if 'name' not in city['tags']:
            response_dict_copy2[country]['elements'].remove(city)

print(len(response_dict_copy2['Sudan']['elements']))


68
64


#### The code below will make a new dict with key as name and values as lat long and ID

In [91]:
# make dict with : id, lat, lon & name called gps
# good_keys = ['id', 'lat', 'lon', 'name', 'is_in:country', 'place', 'population']

gps_keys = ['id', 'lat', 'lon']
gps = {}
for country_name in countries:
    gps[country_name] = {}
    for city_dict in response_dict_copy2[country_name]['elements']:
        city_id = city_dict['tags']['name'] # will make key 'city name' w/ lat, lon, ids as values
        gps[country_name].update({city_id:{key : value for key, value in city_dict.items() if key in gps_keys}})

In [99]:
gps['Ghana']['Accra']['lat']

5.5600141

In [None]:
import pandas as pd
import numpy as np

gps_copy = copy.deepcopy(gps)
ghana_dict = copy.deepcopy(gps['Ghana'])

df = pd.DataFrame.from_dict(gps['Ghana'])

# print(gps.keys())
# print(ghana_dict.keys())
# print(df.head(6))
# print(ghana_dict)

coords = []
for city in ghana_dict:
    name = city
    i_d = ghana_dict[city].get('id')
    lat = ghana_dict[city].get('lat')
    lon = ghana_dict[city].get('lon')
    coords.append((name, i_d, lat, lon))
    # make list
    
coords_arr = np.array(coords)
coords_arr

In [171]:
# Loop through countries and make an np array w/ country, city name, id, lat, long

coords = []
for country in gps:    
    for city in gps[country]:
        country_name = country
        city_name = city
        i_d = gps[country][city].get('id')
        lat = gps[country][city].get('lat')
        lon = gps[country][city].get('lon')
        coords.append((country_name, city_name, i_d, lat, lon))

coords_arr = np.array(coords)
coords_arr

# Try to make it into a dataframe
test = pd.DataFrame(data=coords_arr)
test

Unnamed: 0,0,1,2,3,4
0,Algeria,Constantine,27564946,36.364519,6.60826
1,Algeria,Oran,27565103,35.7032751,-0.6492976
2,Algeria,Tizi Ouzou,253292160,36.7137843,4.0493919
3,Algeria,El Oued,262963217,33.3611766,6.8603492
4,Algeria,Touggourt,262964638,33.1098968,6.066102
5,Algeria,Laghouat,264573224,33.8063518,2.8808616
6,Algeria,Bou Saâda,274946937,35.2133123,4.1809702
7,Algeria,Kouba,288387133,36.7337682,3.0861789
8,Algeria,Médéa,288417925,36.265344,2.766957
9,Algeria,Blida,288418022,36.4701645,2.8287985


In [157]:
for key in ghana_dict: print (key)

Accra
Tamale
Kumasi
Wa
Techiman
Tema
Koforidua
Cape Coast
Ashaiman
Sunyani
Obuasi
Sekondi-Takoradi


In [None]:
# plot them 

import numpy as np
import matplotlib.pyplot as plt

# Collect coords into list
coords = []
for element in data['elements']:
  if element['type'] == 'node':
    lon = element['lon']
    lat = element['lat']
    coords.append((lon, lat))
  elif 'center' in element:
    lon = element['center']['lon']
    lat = element['center']['lat']
    coords.append((lon, lat))
# Convert coordinates into numpy array
X = np.array(coords)
plt.plot(X[:, 0], X[:, 1], 'o')
plt.title('Cities in Ghana')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.axis('equal')
plt.show()