# Geocoding Addresses
## This notebook contains code to retrieve addresses from the articles dataframe and send them to Google's Geocoding service to receive lat/long coordinates for locating in a mapping service.
# Load articles data

In [None]:
# Allows us to import packages that exist one level up in the file system
# See https://stackoverflow.com/questions/34478398
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path = [module_path] + sys.path

In [None]:
from tagnews.utils import load_data as ld
import numpy as np
import pandas as pd
import pickle

In [None]:
df = ld.load_data()

In [None]:
df.head()

# Count total number of articles.

In [None]:
df.loc[:, 'OEMC':].any(axis=1).sum()

# Count all the articles with addressses transcribed from the articles.

In [None]:
df['locations'].apply(lambda x: bool(x)).sum()

In [None]:
addr_list = df.locations[df.locations.apply(lambda x: bool(x))]

In [None]:
addr_list.head()

# Count the total number of addresses transcribed from the articles to be geocoded.

In [None]:
addr_list.apply(lambda x: len(x)).sum()

In [None]:
count = 0
for i,j in addr_list.iteritems():
    if 'lat_long' not in j[0].keys():
        count += len(j)
count

# Run following 2 lines if making changes to lat_long.py. They allow this notebook to automatically update those changes for testing.

In [None]:
%load_ext autoreload

In [None]:
%autoreload 2

#  Please also note that you will need to supply an api_key from Google's Geocoding API site.
In the shell you launched this notebook from enter the following code with your geocode api key from Google:
```
        export GOOGLE_GEOCODE_API_KEY=api...
```


In [None]:
import lat_long as ll
api_key = os.environ["GOOGLE_GEOCODE_API_KEY"]

# Main program to gather available locations data that does not yet have lat/long coordinates. 
### Set `test = True` to run smaller batches to prevent reaching Google's query limits too quickly.

In [None]:
latlong_data = ll.get_lat_long(df, api_key, test=True)

In [None]:
latlong_data.iloc[0]

In [None]:
latlong_data.to_pickle('addr_geotag_list.pkl')

# Can see from running code block below that some of the queries didn't return results for a variety of reasons.

In [None]:
count = 0
no_results = []
for i in latlong_data:
    for j in i:
        if 'lat_long' not in j.keys():
            count += 1
            no_results.append(j)
print('Number of addresses that didn\'t recieve lat/log coords: {}.'.format(count))
no_results

# Code block below can be run to see how the query works. You will need to supply your own api_key.

In [None]:
from geopy.geocoders import GoogleV3
api_key = ''
g = GoogleV3(api_key = api_key, timeout = 10)

In [None]:
latlong_data[0][1]['lat_long'][1][0]

In [None]:
addr = addr_list[0][2]['cleaned text']
location = g.geocode(addr, components={'locality':'Chicago'})
location

# Code to construct URL for displaying lat/longs in Google Static Map

Was not able to get this work. Can get the map to output, but with no markers. Abandoning this for now.

In [None]:
import mapper as m
import webbrowser

In [None]:
mapURL = m.mapper(addr_list, api_key='')
webbrowser.open(mapURL)

In [None]:
len(mapURL)

In [None]:
baseURL = 'http://maps.googleapis.com/maps/api/staticmap?center=Chicago,IL&zoom=12&size=640x640&scale=2'
#markers = '&markers=size:mid&color:red'
#markers += '%7C' + str(latlong_data[0][1]['lat_long'][1][1]) + ',' + str(latlong_data[29][0]['lat_long'][1][1])
markers = '&markers=' + str(latlong_data[0][1]['lat_long'][1][0]) + ',' + str(latlong_data[0][1]['lat_long'][1][1])
mapURL = baseURL + markers + '&key='
webbrowser.open(mapURL)

In [None]:
mapURL