# Geocoding Addresses
## This notebook contains code to retrieve addresses from the articles dataframe and send them to Google's Geocoding service to receive lat/long coordinates for locating in a mapping service.

# Load articles data

In [None]:
# Allows us to import packages that exist one level up in the file system
# See https://stackoverflow.com/questions/34478398
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path = [module_path] + sys.path

In [None]:
from tagnews.utils import load_data as ld
import numpy as np
import pandas as pd

In [None]:
df = ld.load_data()

In [None]:
df.head()

## Count all the articles with addressses transcribed from the articles.

In [None]:
addr_list = df.locations[df.locations.apply(len) > 0].values

In [None]:
len(addr_list)

## Count the total number of addresses transcribed from the articles to be geocoded.

In [None]:
count = 0
for i in addr_list:
    if 'lat_long' not in i[0].keys():
        count += len(i)
print(count)

## Run following 2 lines if making changes to lat_long.py. They allow this notebook to automatically update those changes for testing.

In [None]:
%load_ext autoreload

In [None]:
%autoreload 2

##  Please also note that you will need to supply an api_key from Google's Geocoding API site.

In [None]:
import lat_long as ll
api_key = ''

## Main program to gather available locations data that does not yet have lat/long coordinates. 
### Set `test = True` to run smaller batches to prevent reaching Google's query limits too quickly.

In [None]:
latlong_data = ll.get_lat_long(df, test=False, api_key = api_key)

In [None]:
latlong_data[312]

## Can see from running code block below that some of the queries didn't return results for a variety of reasons.

In [None]:
count = 0
no_results = []
for i in latlong_data:
    for j in i:
        if 'lat_long' not in j.keys():
            count += 1
            no_results.append(j)
print('Number of addresses that didn\'t recieve lat/log coords: {}.'.format(count))
no_results

## Code block below can be run to see how the query works. You will need to supply your own api_key.

In [None]:
from geopy.geocoders import GoogleV3
api_key = ''
g = GoogleV3(api_key = api_key, timeout = 10)

In [None]:
addr_list[100][3]['cleaned text']

In [None]:
addr = addr_list[0][2]['cleaned text']
location = g.geocode(addr, components={'locality':'Chicago'})
location