# Step 1a: Get Coordinates for MRT Stations

## Import Libraries

In [1]:
import pandas as pd
import requests
import json as json
import time
import csv

from myFunctions import *
keys = getKeys('GoogleAPIKey.txt')

## User Inputs
I like to date my output files so that I don't accidentally overwrite files of my previous runs. For more automated systems, it should be quite simple to append today's date to the filename. A function will be needed to append a version number to the date if there is already a file with the same name in the directory. But for developmental purposes, it is better to have the user manually change this so that the user knows what he/she is doing. 

The data file is 'MRT.csv' which I modified from a list from Wikipedia. 

In [2]:
dataFile = 'MRT.csv'
outputFile = 'MRT_address_latlong_20180307.csv'
outputFailureFile = 'MRT_address_latlong_failed_20180307.csv'

## Get Station Addresses to Query
MRT addresses are created and stored as a dictionary to be queried later. <br>
The data frame that is read from the data file is preserved to: 
- contain all required columns. 
- be indexed by Station Code. 

This is so that details of failed requests can be accessed from this data frame and saved to a failure file.  The failure file can thus be re-run with this same code later. 

In [5]:
mrt = pd.read_csv(dataFile)
mrt.loc[mrt['LRT/MRT'] == 'Both','LRT/MRT'] = 'MRT'
print('Number of stations: ', len(mrt))

# Create address format. 
address = ('Singapore ' + mrt['Station Name'] + ' ' + mrt['LRT/MRT']).tolist()
address = ['+'.join(i.split(' '))for i in address]
index = mrt['Station Code'].tolist()

# Comment out the following 2 lines for full run
#index = index[20:25]
#address = address[20:25]

# Create address dictionary to query.
address = dict(zip(index, address))

# Preserve mrt for failure retry later
mrt = mrt.loc[:, ['Station Code', 'Station Name', 'LRT/MRT']]
mrt.set_index('Station Code', inplace = True)

Number of stations:  190


Since there are only 190 stations, this code can be run in a single run. There is no need to split the query. 

## API Query
The outermost ***with*** loop ensures that retrieved data are all written to a file if the code is interrupted.   

The ***success*** and ***failure*** dictionaries store the coordinate/error for each address. This is to ease troubleshooting. In the event that these dictionaries take up too much memory space, the ***success*** dictionary can be removed because the data are already written to file in the ***with*** loop. However, the ***failure*** dictinary needs to be kept as the data are converted to a data frame and file at the end of the code. 

Because the naming of MRT stations in Google Maps are not standardized (especially for LRT stations and stations that are building-in-progress), the query may not always return a valid result. For each of such failure cases, the code will attempt to query the API again up to 2 other possible address formats. If there are still failure cases after the 3 formats, a manual intervention is required to check the coordinates on Google Maps on a browser. 


In [6]:
success = {}
failure = {}

url = 'https://maps.googleapis.com/maps/api/geocode/json?'

In [7]:
with open(outputFile, 'w', newline = '', encoding = 'utf-8') as output:
    writer = csv.writer(output, delimiter=',')
    writer.writerow(['Station Code', 'Station Address', 'Latitude', 'Longtitude', 'latlong'])
    
    # First Attempt
    for i in address:
        params = {'key' : keys['GoogleMapsGeocoding'],'address': address[i] }
        paramString = getParamString(params)
        reqStatus, returnedStatus, data, tries = tryGET(3,url+paramString)

        if reqStatus == 200 and returnedStatus == 'OK':
            lat = data['results'][0]['geometry']['location']['lat']
            long = data['results'][0]['geometry']['location']['lng']
            success[i] = (lat,long)
            writer.writerow([i, address[i], lat, long, str(lat) + ',' + str(long)])
            print(i, ' Geocoding succeeded.')
        else:
            failure[i] = (mrt.loc[i,'Station Name'], mrt.loc[i,'LRT/MRT'], reqStatus, returnedStatus)
            print(i, 'Geocoding failed after ', tries, ' tries.')
            print(reqStatus, returnedStatus)
            
    print('Number of success after first attempt: ', len(success))
    print('Number of failures after first attempt: ', len(failure))
    
    # Second Attempt
    retrySucceed = []
    if len(failure)!=0: 
        print('Retry geocoding with MRT/LRT station.')
        for i in failure: 
            address[i] = '+'.join(address[i].split('+')[:-1])
            address[i] = address[i] + '+MRT/LRT+station'
            params = {'key' : keys['GoogleMapsGeocoding'],'address': address[i] }
            paramString = getParamString(params)
            reqStatus, returnedStatus, data, tries = tryGET(3,url+paramString, sleep = 2)
        
            if reqStatus == 200 and returnedStatus == 'OK':
                lat = data['results'][0]['geometry']['location']['lat']
                long = data['results'][0]['geometry']['location']['lng']
                success[i] = (lat,long)
                writer.writerow([i, address[i], lat, long, str(lat) + ',' + str(long)])
                print(i, ' Geocoding succeeded.')
                retrySucceed.append(i)
            else:
                failure[i] = (mrt.loc[i,'Station Name'], mrt.loc[i,'LRT/MRT'], reqStatus, returnedStatus)
                print(i, 'Geocoding failed after ', tries, ' tries.')
                print(reqStatus, returnedStatus)    

    for i in retrySucceed: 
        del failure[i]
    
    print('Number of success after second attempt: ', len(success))
    print('Number of failures after second attempt: ', len(failure))
    
    # Third Attempt
    retrySucceed = []
    if len(failure)!=0: 
        print('Retry geocoding with Light Rail Station.')
        for i in failure: 
            address[i] = '+'.join(address[i].split('+')[:-2])
            address[i] = address[i] + '+Light+Rail+Station'        
            params = {'key' : keys['GoogleMapsGeocoding'],'address': address[i] }
            paramString = getParamString(params)
            reqStatus, returnedStatus, data, tries = tryGET(3,url+paramString, sleep = 2)
        
            if reqStatus == 200 and returnedStatus == 'OK':
                lat = data['results'][0]['geometry']['location']['lat']
                long = data['results'][0]['geometry']['location']['lng']
                success[i] = (lat,long)
                writer.writerow([i, address[i], lat, long, str(lat) + ',' + str(long)])
                print(i, ' Geocoding succeeded.')
                retrySucceed.append(i)
            else:
                failure[i] = (mrt.loc[i,'Station Name'], mrt.loc[i,'LRT/MRT'], reqStatus, returnedStatus)
                print(i, 'Geocoding failed after ', tries, ' tries.')
                print(reqStatus, returnedStatus)    

    for i in retrySucceed: 
        del failure[i]
    
    print('Number of success: ', len(success))
    print('Number of failures: ', len(failure))

Try: 1. 
NS10  Geocoding succeeded.
Try: 1. 
EW9  Geocoding succeeded.
Try: 1. 
NS16  Geocoding succeeded.
Try: 1. 
SE3  Geocoding succeeded.
Try: 1. 
BP9  Geocoding succeeded.
Try: 1. 
CC12  Geocoding succeeded.
Try: 1. 
CE1 DT16  Geocoding succeeded.
Try: 1. 
TE29  Geocoding succeeded.
Try: 1. 
DT5  Geocoding succeeded.
Try: 1. 
EW5  Geocoding succeeded.
Try: 1. 
DT29  Geocoding succeeded.
Try: 1. 
DT30  Geocoding succeeded.
Try: 1. 
TE30  Geocoding succeeded.
Try: 1. 
DT21  Geocoding succeeded.
Try: 1. 
DT23  Geocoding succeeded.
Try: 1. 
NS17 CC15  Geocoding succeeded.
Try: 1. 
NE9  Geocoding succeeded.
Try: 1. 
EW27  Geocoding succeeded.
Try: 1. 
CC19 DT9  Geocoding succeeded.
Try: 1. 
NS18  Geocoding succeeded.
Try: 1. 
CC2  Geocoding succeeded.
Try: 1. 
TE7  Geocoding succeeded.
Try: 1. 
NE15  Geocoding succeeded.
Try: 1. 
EW12 DT14  Geocoding succeeded.
Try: 1. 
NS2  Geocoding succeeded.
Try: 1. 
CC18  Geocoding succeeded.
Try: 1. 
NS3  Geocoding succeeded.
Try: 1. 
DT1 BP6  Ge

## Save Failure Data

In [8]:
if len(failure)!=0:
    failure = pd.DataFrame.from_dict(failure, orient = 'Index')
    failure.reset_index(level=0, inplace=True)
    failure.columns = ['Station Code', 'Station Name', 'LRT/MRT', 'reqStatus', 'returnedStatus']
    failure.to_csv(outputFailureFile, index = False)