### Add in location details to the headlines

In [1]:
import os
import pickle

import pandas as pd
import geonamescache

In [2]:
source_file = os.path.join("data", "headline_locations.parquet")
df = pd.read_parquet(source_file)

In [3]:
df.head()

Unnamed: 0,headline,city,country
0,Zika Outbreak Hits Miami,Miami,
1,Could Zika Reach New York City?,New York City,
2,First Case of Zika in Miami Beach,Miami,
3,"Mystery Virus Spreads in Recife, Brazil",Recife,Brazil
4,Dallas man comes down with case of Zika,Dallas,


In [4]:
gc = geonamescache.GeonamesCache()

In [5]:
gc.get_cities_by_name("Miami")

[{'4164138': {'geonameid': 4164138,
   'name': 'Miami',
   'latitude': 25.77427,
   'longitude': -80.19366,
   'countrycode': 'US',
   'population': 441003,
   'timezone': 'America/New_York',
   'admin1code': 'FL'}}]

In [6]:
city_map = pickle.load(open("data/city_map.pkl", "rb"))
country_map = pickle.load(open("data/country_map.pkl", "rb"))

In [7]:
def get_city_lat_long(city_name):
    """ Return the lat, lon and country code
    If the city can't be found return Nones
    If more than one city is found then return the larges population.
    """
    try:
        city_details = gc.get_cities_by_name(city_name)
        if len(city_details) == 0:
            return (None, None, "Unknown")
        first_details = max([list(c.values())[0] for c in city_details], key = lambda k: k['population'])
        lon = first_details['longitude']
        lat = first_details['latitude']
        country_code = first_details['countrycode']
        return (lat, lon, country_code)
    except:
        return (None, None, None)

In [8]:
get_city_lat_long("Miami")

(25.77427, -80.19366, 'US')

In [11]:
# Add city location details to the dataframe
df['city_details'] = df.apply(lambda x: get_city_lat_long(x['city']), axis=1)
df['lat'] =  df.apply(lambda x: x['city_details'][0], axis=1)
df['lon'] =  df.apply(lambda x: x['city_details'][1], axis=1)
df['country_code'] =  df.apply(lambda x: x['city_details'][2], axis=1)
del df['city_details']

In [12]:
df.head(40)

Unnamed: 0,headline,city,country,lat,lon,country_code
0,Zika Outbreak Hits Miami,Miami,,25.77427,-80.19366,US
1,Could Zika Reach New York City?,New York City,,40.71427,-74.00597,US
2,First Case of Zika in Miami Beach,Miami,,25.77427,-80.19366,US
3,"Mystery Virus Spreads in Recife, Brazil",Recife,Brazil,-8.05389,-34.88111,BR
4,Dallas man comes down with case of Zika,Dallas,,32.78306,-96.80667,US
5,Trinidad confirms first Zika case,Trinidad,,-14.83333,-64.9,BO
6,Zika Concerns are Spreading in Houston,Houston,,29.76328,-95.36327,US
7,Geneve Scientists Battle to Find Cure,Geneve,,,,Unknown
8,The CDC in Atlanta is Growing Worried,Atlanta,,33.749,-84.38798,US
9,Zika Infested Monkeys in Sao Paulo,Sao Paulo,,,,Unknown
