In [1]:
import json
import requests
import pandas as pd
import numpy as np
from config import Cfg

# # opencage API key (https://opencagedata.com/)
# with open("./../config/opencage-api-key.txt", "r") as f:
#     apiKey = f.readline().strip()

apiKey = Cfg.opencage_api_key

In [2]:
def getUserLocation(address):
    requestString = f"https://api.opencagedata.com/geocode/v1/json?q={address}&key={apiKey}"
    r = requests.get(requestString)
    results = r.json()['results']
    return results
    

In [3]:
testAddress = "506 Canyon Drive, Oceanside"
getUserLocation(testAddress)

[{'annotations': {'DMS': {'lat': "33° 12' 6.28200'' N",
    'lng': "117° 21' 18.95040'' W"},
   'FIPS': {'county': '06073', 'state': '06'},
   'MGRS': '11SMS6688873708',
   'Maidenhead': 'DM13he78ik',
   'Mercator': {'x': -13063928.23, 'y': 3898716.53},
   'OSM': {'edit_url': 'https://www.openstreetmap.org/edit?node=595971049#map=17/33.20175/-117.35526',
    'url': 'https://www.openstreetmap.org/?mlat=33.20175&mlon=-117.35526#map=17/33.20175/-117.35526'},
   'UN_M49': {'regions': {'AMERICAS': '019',
     'NORTHERN_AMERICA': '021',
     'US': '840',
     'WORLD': '001'},
    'statistical_groupings': ['MEDC']},
   'callingcode': 1,
   'currency': {'alternate_symbols': ['US$'],
    'decimal_mark': '.',
    'disambiguate_symbol': 'US$',
    'html_entity': '$',
    'iso_code': 'USD',
    'iso_numeric': '840',
    'name': 'United States Dollar',
    'smallest_denomination': 1,
    'subunit': 'Cent',
    'subunit_to_unit': 100,
    'symbol': '$',
    'symbol_first': 1,
    'thousands_separato

In [101]:
# get all cities from races for which we have data

with open('./../data/races/races.jl', 'r') as f:
    races = [json.loads(line.strip()) for line in f.readlines()]
    
locations = set()
for race in races:
    if race["location"] != '---':
        locations.add(race["location"])
len(locations)

176

In [109]:
# Get geo information from city/country using the OpenCage API

retrieve = False

if retrieve:
    # retrieve geographic data from city names
    citiesDict = {}

    # for cityName in locations:
    for cityName in locations:
        if cityName == "Jixian, Tianjin Province":
            toSearch = "Tianjin, China"
        else:
            toSearch = cityName
        requestString = f"https://api.opencagedata.com/geocode/v1/json?q={toSearch}&key={apiKey}"
        r = requests.get(requestString)
        results = r.json()['results']
        citiesDict[cityName] = results
    
    # save geo data
    with open('./../data/geo-data/cities-location-data.json', 'w') as f:
        f.write(json.dumps(citiesDict))
else:
    with open('./../data/geo-data/cities-location-data.json', 'r') as f:
        citiesDict = json.loads(f.read())

In [150]:
# cities may have multiple results, that's fine.
# but make sure there are no "no results"
for city in citiesDict:
    if len(citiesDict[city]) == 0:
        print(f"No result for {city}")

In [180]:
# regions of the world
regions = pd.read_csv("./../data/geo-data/country-regions.csv")

# create clean city data
cities_clean = {}
for city in citiesDict:
    city_info = citiesDict[city]
    # take result with the most confidence
    confidence_list = list(map(lambda x: x['confidence'], city_info))
    indexMaxConf = confidence_list.index(max(confidence_list))
    city_info = city_info[indexMaxConf]
    cities_clean[city] = city_info["geometry"]
    cities_clean[city]['continent'] = city_info['components']['continent']
    cities_clean[city]['country'] = city_info['components']['country']
    cities_clean[city]['country_code'] = city_info['components']['country_code']
    cities_clean[city]['state'] = city_info['components'].get('state')
    cities_clean[city]['state_code'] = city_info['components'].get('state_code')
    cities_clean[city]['county'] = city_info['components'].get('county')
    cities_clean[city]['formatted'] = city_info['formatted']
#     # update lat/lon
#     lat,lon = city_info["geometry"].values()
#     races.loc[races.city==city, "lat"] = lat
#     races.loc[races.city==city, "lng"] = lon
#     # update region
#     countryInfo = [el for el in cityDict[city]["address_components"] if el['types'] == ['country', 'political']]
#     races.loc[races.city==city, "region"] = " ".join(regions[regions.Country==countryInfo[0]["long_name"]].Region.to_string().split()[1:])
    
# # save
# races.to_csv("races-latlon-region.csv", index=False)

In [157]:
cities_clean

{'Monterrey, Mexico': {'lat': 25.6397836,
  'lng': -100.2931016,
  'continent': 'North America'},
 'Panama City': {'lat': 8.9714493,
  'lng': -79.5341802,
  'continent': 'South America'},
 'Bariloche, Argentina': {'lat': -41.1334672,
  'lng': -71.3099936,
  'continent': 'South America'},
 'Putrajaya, Malaysia': {'lat': 2.9069676,
  'lng': 101.6750533,
  'continent': 'Asia'},
 'Haines City, Florida': {'lat': 28.1090591,
  'lng': -81.6271184,
  'continent': 'North America'},
 'Muscat, Al Qurm': {'lat': 23.6096426,
  'lng': 58.4542247,
  'continent': 'Asia'},
 'Pucon, Chile': {'lat': -39.2731173,
  'lng': -71.9777605,
  'continent': 'South America'},
 'Florianopolis, Brazil': {'lat': -27.5973002,
  'lng': -48.5496098,
  'continent': 'South America'},
 'Traverse City, Michigan': {'lat': 44.7606441,
  'lng': -85.6165301,
  'continent': 'North America'},
 'Atlantic City, New Jersey': {'lat': 39.3642852,
  'lng': -74.4229351,
  'continent': 'North America'},
 'Benton Harbor, Michigan': {'lat'

In [116]:
regions

Unnamed: 0,Country,Region,Global South
0,Andorra,Europe,Global North
1,United Arab Emirates,Arab States,Global South
2,Afghanistan,Asia & Pacific,Global South
3,Antigua and Barbuda,South/Latin America,Global South
4,Anguilla,South/Latin America,Global South
5,Albania,Europe,Global North
6,Armenia,CIS,Global South
7,Netherlands Antilles,South/Latin America,Global South
8,Angola,Africa,Global South
9,Antarctica,Asia & Pacific,Global South
