# Geocoding API calls for news locations

In [1]:
# Setup dependencies
import pandas as pd
import requests

# Import API key
from config import API_KEY

In [2]:
# Import cleaned news dataset
path = "static/data/locations_dates.csv"
df = pd.read_csv(path, converters={"keywords": eval})
df

Unnamed: 0,nid,glocation,pub_date,year,month,weekday
0,2,Louisiana,2015-01-01,2015,January,Thursday
1,3,United States,2015-01-01,2015,January,Thursday
2,5,Boston (Mass),2015-01-01,2015,January,Thursday
3,6,Killeen (Tex),2015-01-01,2015,January,Thursday
4,11,"East Village (Manhattan, NY)",2015-01-01,2015,January,Thursday
...,...,...,...,...,...,...
16283,50563,Denver (Colo),2015-11-13,2015,November,Friday
16284,50564,Acadia National Park (Me),2015-11-13,2015,November,Friday
16285,50570,San Francisco (Calif),2015-11-13,2015,November,Friday
16286,50571,New York State,2015-11-13,2015,November,Friday


In [3]:
# Create new dataframe with only unique glocation values
df_unique = df.drop_duplicates('glocation', keep='first')
df_unique = df_unique.reset_index(drop=True)

In [4]:
# Create columns to add data after API call
df_unique['country'] = " "
df_unique['state'] = " "
df_unique['latitude'] = " "
df_unique['longitude'] = " "
df_unique = df_unique[['nid', 'glocation', 'country', 'state', 'latitude', 'longitude', 'pub_date', 'year', 'month', 'weekday']]
df_unique

Unnamed: 0,nid,glocation,country,state,latitude,longitude,pub_date,year,month,weekday
0,2,Louisiana,,,,,2015-01-01,2015,January,Thursday
1,3,United States,,,,,2015-01-01,2015,January,Thursday
2,5,Boston (Mass),,,,,2015-01-01,2015,January,Thursday
3,6,Killeen (Tex),,,,,2015-01-01,2015,January,Thursday
4,11,"East Village (Manhattan, NY)",,,,,2015-01-01,2015,January,Thursday
...,...,...,...,...,...,...,...,...,...,...
1343,50437,Massena (NY),,,,,2015-11-12,2015,November,Thursday
1344,50471,Kearny (NJ),,,,,2015-11-12,2015,November,Thursday
1345,50511,Sharjah (United Arab Emirates),,,,,2015-11-12,2015,November,Thursday
1346,50546,Troy (NY),,,,,2015-11-13,2015,November,Friday


In [8]:
# API call from Google Geocoding API for glocations

for i in range(len(df_unique)):
    glocation = df['glocation'][i]
    r = requests.get("https://maps.googleapis.com/maps/api/geocode/json", params={
        "address": glocation,
        "key": API_KEY
    })

    lat = r.json()["results"][0]['geometry']['location']['lat']
    df_unique['latitude'][i] = lat
    lng = r.json()["results"][0]['geometry']['location']['lng']
    df_unique['longitude'][i] = lng

    address = r.json()["results"][0]["address_components"]
    for data in address:
        if "country" in data['types']:
            country = data['long_name']
            df_unique['country'][i] = country
        if "administrative_area_level_1" in data['types']:
            state = data['long_name']
            df_unique['state'][i] = state
        elif state == None:
            df_unique['state'][i] = state

    print("********************************************")
    print("Completed data retrieval for index = " + str(i))
    print("*********************************************")


print("********************************************")
print("ALL DATA RETRIEVAL NOW COMPLETE!!")
print("*********************************************")

********************************************
Completed data retrieval for index = 0
*********************************************
********************************************
Completed data retrieval for index = 1
*********************************************
********************************************
Completed data retrieval for index = 2
*********************************************
********************************************
Completed data retrieval for index = 3
*********************************************
********************************************
Completed data retrieval for index = 4
*********************************************
********************************************
Completed data retrieval for index = 5
*********************************************
********************************************
Completed data retrieval for index = 6
*********************************************
********************************************
Completed data retrieval for index = 7
******

In [9]:
df_unique

Unnamed: 0,nid,glocation,country,state,latitude,longitude,pub_date,year,month,weekday
0,2,Louisiana,United States,Louisiana,30.9843,-91.9623,2015-01-01,2015,January,Thursday
1,3,United States,United States,,37.0902,-95.7129,2015-01-01,2015,January,Thursday
2,5,Boston (Mass),United States,Massachusetts,42.3601,-71.0589,2015-01-01,2015,January,Thursday
3,6,Killeen (Tex),United States,Texas,31.1171,-97.7278,2015-01-01,2015,January,Thursday
4,11,"East Village (Manhattan, NY)",United States,New York,40.7265,-73.9815,2015-01-01,2015,January,Thursday
...,...,...,...,...,...,...,...,...,...,...
1343,50437,Massena (NY),United States,Florida,30.1766,-85.8055,2015-11-12,2015,November,Thursday
1344,50471,Kearny (NJ),United States,New Hampshire,43.1939,-71.5724,2015-11-12,2015,November,Thursday
1345,50511,Sharjah (United Arab Emirates),United States,,37.0902,-95.7129,2015-11-12,2015,November,Thursday
1346,50546,Troy (NY),United States,,37.0902,-95.7129,2015-11-13,2015,November,Friday


In [11]:
# Convert to CSV

df_unique.to_csv("static/data/geo_locations_dates.csv", index=False)

In [10]:
r = requests.get("https://maps.googleapis.com/maps/api/geocode/json", params={
    "address": "Acadia National Park (Me)",
    "key": API_KEY
})

lat = r.json()["results"][0]['geometry']['location']['lat']
lng = r.json()["results"][0]['geometry']['location']['lng']

address = r.json()["results"][0]["address_components"]
for data in address:
    if "country" in data['types']:
        country = data['long_name']
    if "administrative_area_level_1" in data['types']:
        state = data['long_name']
    else:
        state == None
print(country, state, lat, lng)

United States Maine 44.3385559 -68.2733346


In [6]:
# def get_geocode_cached(place_string):
#     if place_string in database:
#         return place_string's result column
#     else:
#         result = get_geocode(place_string)
#         load place_string and result into the database
#         return result