## Get the GPS location of the cities hosting the tournaments

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json

In [2]:
df = pd.read_csv("data/tennis_data_cleaned.csv")

In [3]:
cities = list(df.Location.unique())
cities.sort()
print(cities)

["'s-Hertogenbosch", 'Acapulco', 'Adelaide', 'Amersfoort', 'Antalya', 'Antwerp', 'Atlanta', 'Auckland', 'Bangkok', 'Barcelona', 'Basel', 'Bastad', 'Beijing', 'Belgrade', 'Bogota', 'Brisbane', 'Bucharest', 'Budapest', 'Buenos Aires', 'Cagliari', 'Casablanca', 'Chengdu', 'Chennai', 'Cincinnati', 'Cologne', 'Cordoba', 'Costa Do Sauipe', 'Dallas', 'Delray Beach', 'Doha', 'Dubai', 'Dusseldorf', 'Eastbourne', 'Estoril', 'Geneva', 'Gstaad', 'Halle', 'Hamburg', 'Houston', 'Indian Wells', 'Indianapolis', 'Istanbul', 'Johannesburg', 'Kitzbuhel', 'Kuala Lumpur', 'Las Vegas', 'London', 'Los Angeles', 'Los Cabos', 'Lyon', 'Madrid', 'Mallorca', 'Marbella', 'Marrakech', 'Marseille', 'Melbourne', 'Memphis', 'Metz', 'Miami', 'Monte Carlo', 'Montpellier', 'Montreal', 'Moscow', 'Munich', 'New Haven', 'New York', 'Newport', 'Nice', 'Nottingham', 'Nur-Sultan', 'Oeiras', 'Paris', 'Parma', 'Portschach', 'Pune', 'Queens Club', 'Quito', 'Rio de Janeiro', 'Rome', 'Rotterdam', 'San Diego', 'San Jose', 'Santiago'

## Get city location

In [4]:
import requests
import xml.etree.ElementTree as ET
cities_to_gps = {}

In [6]:
def get_results(ans):
    tree = ET.fromstring(ans.content)

    for child in list(tree)[1]:
        if child.tag == "toponymName":
            typoName = child.text
        if child.tag == "name":
            name = child.text
        if child.tag == "lat":
            lat = float(child.text)
        if child.tag == "lng":
            long = float(child.text)

    return [lat, long], name, typoName

In [7]:
for c in cities:
    ans = requests.get("http://api.geonames.org/search?username=atom&q="+c)
    gps, name, typoname =  get_results(ans)
    cities_to_gps[c] = gps
    print("%s -- %s - %s" % (c, name, typoname)) #to check if the quary was succesfull


's-Hertogenbosch -- 's-Hertogenbosch - 's-Hertogenbosch
Acapulco -- Acapulco de Juárez - Acapulco de Juárez
Adelaide -- Adelaide - Adelaide
Amersfoort -- Amersfoort - Amersfoort
Antalya -- Antalya - Antalya
Antwerp -- Antwerp - Antwerpen
Atlanta -- Atlanta - Atlanta
Auckland -- Auckland - Auckland
Bangkok -- Bangkok - Bangkok
Barcelona -- Barcelona - Barcelona
Basel -- Basel - Basel
Bastad -- Båstads Kommun - Båstads Kommun
Beijing -- Beijing - Beijing
Belgrade -- Belgrade - Belgrade
Bogota -- Bogotá - Bogotá
Brisbane -- Brisbane - Brisbane
Bucharest -- Bucharest - Bucharest
Budapest -- Budapest - Budapest
Buenos Aires -- Buenos Aires - Buenos Aires
Cagliari -- Cagliari - Cagliari
Casablanca -- Casablanca - Casablanca
Chengdu -- Chengdu - Chengdu
Chennai -- Chennai - Chennai
Cincinnati -- Cincinnati - Cincinnati
Cologne -- Cologne - Köln
Cordoba -- Córdoba - Córdoba
Costa Do Sauipe -- Costa Do Sauipe - Costa Do Sauipe
Dallas -- Dallas - Dallas
Delray Beach -- Delray Beach - Delray Beac

In [10]:
cities_locations = pd.DataFrame.from_dict(cities_to_gps, orient='index',

                       columns=['lat', 'long'])

In [11]:
cities_locations.to_csv('data/cities_locations.csv')


In [None]:
with open('data/cities_locations.json', 'w') as outfile:
    outfile.write(json.dumps(cities_to_gps))