Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Flush file after each write & catch errors addresses to another output file #1

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
.Trashes
ehthumbs.db
Thumbs.db
.idea

input.csv
found*.csv
failed*.csv
output*.csv
157 changes: 94 additions & 63 deletions address_transformation.py
Original file line number Diff line number Diff line change
@@ -1,75 +1,106 @@
import requests
import json
import csv
import time
from tqdm import *

def addresses_from_csv(path=None, idColumn=None, addrColumn=None):

addresses = []
# Settings
google_maps_api_key = '__REPLACE_GOOGLE_API_KEY_HERE__'
input_path = 'input.csv'
id_column = 0
addr_column = 1
reader_delimiter = ';'
reader_quoting = csv.QUOTE_NONE

with open(path, 'r') as f:
reader = csv.reader(f, delimiter=';', quoting=csv.QUOTE_NONE)
first_row = next(reader)
for row in reader:
addresses.append([row[idColumn],row[addrColumn]])
# print(row[idColumn], row[addrColumn])

return addresses

# Get addresses from CSV
addresses = addresses_from_csv(path='input.csv', idColumn=0, addrColumn=1)
#print(addresses)
# Get all addresses from CSV
addresses = []
with open(input_path, 'r') as f:
reader = csv.reader(f, delimiter=reader_delimiter, quoting=reader_quoting)

# Set Google Maps API key
api_key = 'YOUR_API_KEY'
# Check and ignore first line
first_row = next(reader)
if first_row[0] != 'id':
print('Warning: First line is ignored, it should have value of "id;address"')

# Initialize array for transformed addresses
transformed = []
transformed.append(['ID', 'Street', 'Street (Short)', 'Number', 'Post code', 'City', 'State', 'State (Short)', 'Country'])
for row in reader:
addresses.append([row[id_column], row[addr_column]])


# Open files for writing
time_str = time.strftime('%Y%m%d-%H%M%S')
file_found = open('found_' + time_str + '.csv', 'w', newline='\n', encoding='utf-8')
file_failed = open('failed_' + time_str + '.csv', 'w', newline='\n', encoding='utf-8')

# Initiate csv writer
writer_found = csv.writer(file_found, delimiter=';', quoting=csv.QUOTE_ALL, quotechar='"', escapechar='\\')
writer_failed = csv.writer(file_failed, delimiter=';', quoting=csv.QUOTE_ALL, quotechar='"', escapechar='\\')

# Set headers
writer_found.writerow(['ID', 'Street', 'Street (Short)', 'Number', 'Post code', 'City', 'State', 'State (Short)', 'Country'])
writer_failed.writerow(['ID', 'Failure Reason', 'Address'])


# Loop through addresses, create request and write to files
fails_count = 0
success_count = 0
total_addresses = len(addresses)

for query in tqdm(addresses):

# API call, storing information as JSON
url = 'https://maps.googleapis.com/maps/api/geocode/json?address=' + query[1] + '&lang=en&key=' + api_key
r = requests.get(url)
data = r.json()
#print(data)

# clear all values to avoid appending values from previous iterations a second time
number = street_short = street = country = postal_code = city = state = state_short = ''

if data['status'] == 'ZERO_RESULTS':
transformed.append([query[0], 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A'])
print(url)
print(data)
else:
# looping over address components in JSON
for component in data['results'][0]['address_components']:
if 'street_number' in component['types']:
number = component['long_name']
elif 'route' in component['types']:
street = component['long_name']
street_short = component['short_name']
elif 'country' in component['types']:
country = component['long_name']
elif 'administrative_area_level_1' in component['types']:
state = component['long_name']
state_short = component['short_name']
elif 'postal_code' in component['types']:
postal_code = component['long_name']
elif 'locality' in component['types']:
city = component['long_name']
elif 'postal_town' in component['types']:
city = component['long_name']
else:
continue
if len(query) != 2:
reason = 'Expected two columns but found ' + str(len(query))
writer_failed.writerow([query[0], reason, ''])
file_failed.flush()
print('Skipping ' + query[0] + ' ' + reason)
fails_count += 1

try:
# API call, storing information as JSON
url = 'https://maps.googleapis.com/maps/api/geocode/json?address=' + query[1] + '&lang=en&key=' + google_maps_api_key
r = requests.get(url, timeout=15)
data = r.json()

# clear all values to avoid appending values from previous iterations a second time
number = street = country = postal_code = city = street_short = state = state_short = ''

transformed.append([query[0], street, street_short, number, postal_code, city, state, state_short, country])

with open('output_' + time.strftime('%Y%m%d-%H%M%S') + '.csv', 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f, delimiter=';', quoting=csv.QUOTE_ALL, quotechar='"')
for row in transformed:
writer.writerow(row)
if data['status'] == 'ZERO_RESULTS':
writer_failed.writerow([query[0], 'ZERO_RESULT', query[1]])
print('No result found for #' + query[0])
file_failed.flush()
print(url)
print(data)
fails_count += 1
else:
# looping over address components in JSON
for component in data['results'][0]['address_components']:
if 'street_number' in component['types']:
number = component['long_name']
elif 'route' in component['types']:
street = component['long_name']
street_short = component['short_name']
elif 'country' in component['types']:
country = component['long_name']
elif 'administrative_area_level_1' in component['types']:
state = component['long_name']
state_short = component['short_name']
elif 'postal_code' in component['types']:
postal_code = component['long_name']
elif 'locality' in component['types']:
city = component['long_name']
elif 'postal_town' in component['types']:
city = component['long_name']
else:
continue
writer_found.writerow([query[0], street, street_short, number, postal_code, city, state, state_short, country])
file_found.flush()
success_count += 1
except:
writer_failed.writerow([query[0], 'Exception', query[1]])
print('Exception while trying to get #' + query[0])
file_failed.flush()
fails_count += 1

print('Done')
print('############')
print('# Found: ' + str(success_count))
print('# Failed: ' + str(fails_count))
print('# Total: ' + str(total_addresses))
print('############')
print('Done')