### Open street map search 

From Overpass Turbo, use this API and follow the instructions from the
URL below to quary OSM with python

https://towardsdatascience.com/loading-data-from-openstreetmap-with-python-and-the-overpass-api-513882a27fd0

OSM CRS IS ----  'EPSG:4326'

#### Open Street Map Query for Cities and Villages 
URL for OSM Overpass Turbo: https://overpass-turbo.eu/


URL for OSM Features:
https://wiki.openstreetmap.org/wiki/Map_Features#Populated_settlements.2C_urban

#### Example from Ghana for Towns ... For cities, use *place=city*
    area["name:en"="Ghana"]->.boundaryarea;
    (
    node(area.boundaryarea)[place=town];
    way(area.boundaryarea)[place=town];>;
    rel(area.boundaryarea)[place=town];>>;
    );
    out meta;
    // print results
    out body;
    >;
    out skel qt;

Note: From what I can tell, OSM does not allow for 'Africa' to be used as an 'area' key. 

### Note: This will pull refugee camps. Really cool. 

In [15]:
### List of African Countries from the UN

countries = [
#Northern Africa
    'Algeria',
    'Egypt',
    'Libya',
    'Morocco',
    'Tunisia',
    'Western Sahara',

    #Sub-Saharan Africa
    #Eastern Africa
    'Burundi',
    'Comoros',
    'Djibouti',
    'Eritrea',
    'Ethiopia',
    'Kenya',
    'Madagascar',
    'Malawi',
    'Mauritius',
    #Mayotte,
    'Mozambique',
    'Réunion',
    'Rwanda',
    'Somalia',
    'Sudan',
    'Uganda',
    'Tanzania',
    'Zambia',
    'Zimbabwe',
    
    #Middle Africa
    'Angola',
    'Cameroon',
    'Central African Republic',
    'Chad',
    'Congo-Brazzaville',
    'Congo-Kinshasa',
    'Equatorial Guinea',
    'Gabon',
    'Sao Tome and Principe',
    
    # Southern Africa
    'Botswana',
    'Lesotho',
    'Namibia',
    'South Africa',
    'Swaziland',
    
    # Western Africa
    'Benin',
    'Burkina Faso',
    'Cape Verde',
    'Côte d\'Ivoire',
    'Gambia',
    'Ghana',
    'Guinea',
    'Guinea-Bissau',
    'Liberia',
    'Mali',
    'Mauritania',
    'Niger',
    'Nigeria',
    'Senegal',
    'Sierra Leone',
    'Togo'
]

#### Pull all OSM City Points for All African Countries

In [16]:
import time
import requests
import json

# Check time
checkpoint = time.time()

# geographies include city, town & village  ... add name:en to search in English for countries
# Cities takes 78 - 143 seconds 
# 863 cities

# Towns ... 114 seconds?

# Attempt to make a loop through all OSM countries
response_dict = {}
overpass_url = "http://overpass-api.de/api/interpreter"

# put curly in quotes 

for country_name in countries:
    overpass_query = """
            [out:json];
            area["name:en"="{}"]; 
            (node[place=town](area);
            way[place=town](area);
            rel[place=town](area);
            );
            out center;
            """.format(country_name)

    response = requests.get(overpass_url, params={'data': overpass_query})
    data = response.json()
    response_dict[country_name] = data

print('elasped time to do the quary:{}'.format(time.time() - checkpoint))    

elasped time to do the quary:89.74238896369934


In [None]:
# Nesting order: 
#     response_dict = dict ... countries
#         elements  = list ... 
#             each list item is a dict w/ keys type, id, lat, long, tags
#                 tags = dict ... get names
                
                
# nest = response_dict['Chad']['elements'][1]['tags']['name:en'] # return name value 
# type(nest)
# nest

In [58]:
# Copy response dict so we don't have to keep querying
import copy

response_dict_copy1 = copy.deepcopy(response_dict) # deep copy
response_dict_copy2 = copy.deepcopy(response_dict) # deep copy

In [36]:
response_dict['Sudan']['elements']

[{'id': 90116658,
  'lat': 16.7425686,
  'lon': 33.5415994,
  'tags': {'is_in': 'Sudan', 'name': 'Taragma', 'place': 'town'},
  'type': 'node'},
 {'id': 90118541,
  'lat': 16.8927902,
  'lon': 33.6942156,
  'tags': {'is_in': 'Sudan',
   'name': 'كبوشية',
   'name:en': 'Kabushiyah',
   'place': 'town'},
  'type': 'node'},
 {'id': 90135018,
  'lat': 17.583018,
  'lon': 33.9687021,
  'tags': {'int_name': 'ad-Damir',
   'is_in': 'Africa, Sudan, Nahr an-Nil',
   'is_in:continent': 'Africa',
   'is_in:country': 'Sudan',
   'is_in:state': 'Nahr an Nil',
   'name': 'الدامر',
   'name:ar': 'الدامر',
   'name:en': 'ad-Damer',
   'place': 'town',
   'population': '20000'},
  'type': 'node'},
 {'id': 90149106,
  'lat': 15.4336833,
  'lon': 32.7245005,
  'tags': {'addr:city': 'العيلفون',
   'name': 'العيلفون',
   'name:ar': 'العيلفون',
   'name:en': "Al 'Aylafun",
   'place': 'town',
   'wikidata': 'Q12190621',
   'wikipedia': 'ar:العيلفون'},
  'type': 'node'},
 {'id': 90152580,
  'lat': 15.3161657

In [None]:
# type(response_dict_copy1)

In [None]:
# len(response_dict_copy1['Sudan']['elements'])

In [38]:
# Prof that not all cities with GPS cords have tag = 'name' ... if goal is to have labeled points, then remove all OSM
# points that do not have labeled 'names' as a tag
print(len(response_dict_copy1['Sudan']['elements']))

test = response_dict_copy1['Sudan']['elements']
print(len(test))

# for city in test:
#     if 'name' not in city['tags']:
#         print('yes')

for city in test:
    if 'name' not in city['tags']:
        test.remove(city)
print(len(test))
print(len(response_dict['Sudan']['elements']))
print(len(response_dict_copy1['Sudan']['elements']))
print(len(response_dict_copy2['Sudan']['elements']))

266
266
196
266
196
266


#### The code below will remove all points w/o labels

In [59]:
### remove points without names
print(len(response_dict_copy2['Sudan']['elements']))

# something is strange about this < ----------------------------------------- CASCADE START HERE WHY NEED TO RUN A FEW TIMES
for country in countries:
    for city in response_dict_copy2[country]['elements']:
        if 'name' not in city['tags']:
            response_dict_copy2[country]['elements'].remove(city)

print(len(response_dict_copy2['Sudan']['elements']))

for city in response_dict_copy2['Sudan']['elements']:
     if 'name' not in city['tags']:
        print('yes')

# name = []
# for city in response_dict_copy2['Sudan']['elements']:
#     city_name = city['tags']['name']
#     name.append(city_name)

266
196
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes


In [57]:
for city in response_dict_copy2['Sudan']['elements']:
     if 'name' in city['tags']:
        print('yes')

yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes


#### The code below will make a new dict with key as name and values as lat long and ID

In [30]:
# make dict with : id, lat, lon & name called gps
# good_keys = ['id', 'lat', 'lon', 'name', 'is_in:country', 'place', 'population']

gps_keys = ['id', 'lat', 'lon']
gps = {}
for country_name in countries:
    gps[country_name] = {}
    for city_dict in response_dict_copy2[country_name]['elements']:
        city_id = city_dict['tags']['name'] # will make key 'city name' w/ lat, lon, ids as values
        gps[country_name].update({city_id:{key : value for key, value in city_dict.items() if key in gps_keys}})

In [None]:
gps['Ghana']['Accra']['lat']

In [32]:
import pandas as pd
import numpy as np

gps_copy = copy.deepcopy(gps)
ghana_dict = copy.deepcopy(gps['Ghana'])

df = pd.DataFrame.from_dict(gps['Ghana'])

# print(gps.keys())
# print(ghana_dict.keys())
# print(df.head(6))
# print(ghana_dict)

coords = []
for city in ghana_dict:
    name = city
    i_d = ghana_dict[city].get('id')
    lat = ghana_dict[city].get('lat')
    lon = ghana_dict[city].get('lon')
    coords.append((name, i_d, lat, lon))
    # make list
    
coords_arr = np.array(coords)
coords_arr.shape

(368, 4)

In [33]:
# Loop through countries and make an np array w/ country, city name, id, lat, long

coords = []
for country in gps:    
    for city in gps[country]:
        country_name = country
        city_name = city
        i_d = gps[country][city].get('id')
        lat = gps[country][city].get('lat')
        lon = gps[country][city].get('lon')
        coords.append((country_name, city_name, i_d, lat, lon))

coords_arr = np.array(coords)
coords_arr

# Try to make it into a dataframe
test = pd.DataFrame(data=coords_arr)
test

Unnamed: 0,0,1,2,3,4
0,Algeria,Tamanrasset,89369215,22.7854543,5.5324465
1,Algeria,In Salah,89980948,27.1950331,2.4826132
2,Algeria,Boumerdès,252600742,36.758882,3.470596
3,Algeria,Thenia,253167052,36.724986,3.556935
4,Algeria,Zemmouri,253167208,36.7864064,3.6012209
5,Algeria,Lakhdaria,253291208,36.5639442,3.596907
6,Algeria,Draâ Ben Khedda,253292622,36.7333317,3.9587692
7,Algeria,Dellys,253292625,36.915798,3.913104
8,Algeria,El Menia,258799889,30.5836683,2.8830889
9,Algeria,In Guezzam,262393185,19.5667239,5.7717


In [None]:
for key in ghana_dict: print (key)

In [None]:
# plot them 

import numpy as np
import matplotlib.pyplot as plt

# Collect coords into list
coords = []
for element in data['elements']:
  if element['type'] == 'node':
    lon = element['lon']
    lat = element['lat']
    coords.append((lon, lat))
  elif 'center' in element:
    lon = element['center']['lon']
    lat = element['center']['lat']
    coords.append((lon, lat))
# Convert coordinates into numpy array
X = np.array(coords)
plt.plot(X[:, 0], X[:, 1], 'o')
plt.title('Cities in Ghana')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.axis('equal')
plt.show()