Skip to content

Commit

Permalink
Added geocoding script
Browse files Browse the repository at this point in the history
  • Loading branch information
davidfischer committed May 4, 2013
1 parent 11761fe commit 5c23e53
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 1 deletion.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,4 @@ venv

# Ignore github archive directory
githubarchive
.csv
8 changes: 7 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.PHONY: githubarchive tests loaddb jsonify
.PHONY: githubarchive tests loaddb jsonify geocode


all: tests
Expand All @@ -25,5 +25,11 @@ jsonify:
@echo "www/data/events.json written"


geocode:
sqlite3 github-events.db < queries/locations.sql > locations.csv
python gdc2/geocoder.py locations.csv
@echo "www/data/locations.json written"


tests:
nosetests
83 changes: 83 additions & 0 deletions gdc2/geocoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import json
import sys
import time

import requests
import unicodecsv


NOMINATIM_URL = 'http://nominatim.openstreetmap.org/search'
TIMEOUT = 30 # seconds

# Copyright (c) OpenStreetMap Contributors
def nominatim(location):
"""
Returns a lat/lng pair for a location or None
This should be called no more than once per sec
"""

args = {
'q': location,
'format': 'json',
'limit': '1',
'addressdetails': '1',
}

headers = {'User-Agent': 'gdc2 geocoder -- REPLACE WITH YOUR EMAIL'}

try:
resp = requests.get(NOMINATIM_URL, params=args, headers=headers, timeout=TIMEOUT)
except Exception:
return None

if resp.ok:
try:
return resp.json()
except Exception:
pass

return None

def geocode(location):
"""
Returns a lat/lng pair for a location or None
"""

return nominatim(location)


if __name__ == '__main__':
if len(sys.argv) < 2:
sys.stderr.write('USAGE: python gdc2/geocoder.py locations.csv')
sys.exit(1)

locations = []
OUTFILE = 'locations.json'
try:
with open(OUTFILE, 'r') as f:
outs = json.loads(f.read())
print "Found %s with %d / %d geocoded locations" %(OUTFILE, len([k for k in outs if outs[k] is not None]), len(outs.keys()))
except Exception:
outs = {}

with open(sys.argv[1]) as f:
reader = unicodecsv.DictReader(f)
for row in reader:
line = row['actor_attributes_location']
locations.append(' '.join(line.replace(',', ' ').replace('.', ' ').split()))

newlocations = [l for l in locations if l not in outs.keys()]
print "Geocoding %d new locations" %len(newlocations)
for i, location in enumerate(newlocations):
if location in outs and outs[location] is not None:
pass
else:
outs[location] = geocode(location)
time.sleep(1)

if i % 10 == 0:
print " - %d / %d" %(i, len(len(newlocations)))

with open(OUTFILE, 'w') as f:
f.write(json.dumps(outs, indent=1))

0 comments on commit 5c23e53

Please sign in to comment.