Skip to content

Commit

Permalink
add initial politician data from govtrack
Browse files Browse the repository at this point in the history
  • Loading branch information
aaronsw committed Apr 13, 2008
1 parent 2ce8b0a commit 8dfd496
Show file tree
Hide file tree
Showing 10 changed files with 156 additions and 7 deletions.
6 changes: 5 additions & 1 deletion import/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ DBCLIENT=psql

all: .database

.database: ../schema.sql $(PARSE)/states/index.json $(PARSE)/districts/index.json $(PARSE)/districts/almanac.json $(PARSE)/districts/shapes.json
.database: ../schema.sql load/json.py $(PARSE)/states/index.json $(PARSE)/districts/index.json $(PARSE)/districts/almanac.json $(PARSE)/districts/shapes.json $(PARSE)/politicians/index.json $(PARSE)/politicians/govtrack.json
$(DBCLIENT) watchdog_dev < ../schema.sql
python load/json.py
touch .database
Expand All @@ -24,3 +24,7 @@ $(PARSE)/districts/shapes.json: load/shapes.py

$(PARSE)/politicians/index.json: parse/manual/politicians.json
cp parse/manual/politicians.json $(PARSE)/politicians/index.json

$(PARSE)/politicians/govtrack.json: load/govtrack.py parse/govtrack.py
PYTHONPATH=. python load/govtrack.py > $(PARSE)/politicians/govtrack.json.tmp
mv $(PARSE)/politicians/govtrack.json.tmp $(PARSE)/politicians/govtrack.json
2 changes: 1 addition & 1 deletion import/load/almanac.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def main():

dist = web.lstrips(web.rstrips(fn.split('/')[-1], '.htm'), 'rep_')

d = scrapenj.scrape1(fn)
d = almanac.scrape1(fn)
if 'demographics' in d:
demo = d['demographics']
district.cook_index = cleanint(demo['Cook Partisan Voting Index'])
Expand Down
44 changes: 44 additions & 0 deletions import/load/govtrack.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""
load data from govtrack.us
from: data/crawl/govtrack/people.xml
to: data/parse/politicians/govtrack.json
"""

import simplejson
from parse import govtrack

reps = simplejson.load(file('../data/parse/politicians/index.json'))
dist2rep = {}
for repid, rep in reps.iteritems():
dist2rep[rep['district']] = repid

mapping = {
'bioguideid': 'bioguideid',
'birthday': 'birthday',
'firstname': 'firstname',
'gender': 'gender',
'id': 'govtrackid',
'lastname': 'lastname',
'middlename': 'middlename',
'osid': 'opensecretsid',
'party': 'party',
'religion': 'religion',
'represents': 'district',
'url': 'officeurl'
}

out = {}
def callback(pol):
newpol = {}
for k, v in mapping.iteritems():
if k in pol: newpol[v] = pol[k]

if pol.get('represents') and pol.represents in dist2rep:
rep = dist2rep[pol.represents]
if pol.lastname.lower().replace(' ', '_') in rep:
out[rep] = newpol

if __name__ == "__main__":
govtrack.main(callback)
print simplejson.dumps(out, indent=2, sort_keys=True)
11 changes: 11 additions & 0 deletions import/load/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,16 @@ def load():
districts = simplejson.load(file(DATA_DIR + '/districts/%s.json' % fn))
for name, district in districts.iteritems():
db.update('district', where='name = $name', vars=locals(), **unidecode(district))

politicians = simplejson.load(file(DATA_DIR + '/politicians/index.json'))
for polid, pol in politicians.iteritems():
db.insert('politician', seqname=False, id=polid, **unidecode(pol))

for fn in ['govtrack']:
print 'loading', fn
politicians = simplejson.load(file(DATA_DIR + '/politicians/%s.json' % fn))
for polid, pol in politicians.iteritems():
db.update('politician', where='id = $polid', vars=locals(), **unidecode(pol))


if __name__ == "__main__": load()
2 changes: 1 addition & 1 deletion import/parse/almanac_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
import scrapenj, re, cgitb
import almanac, re, cgitb
cgitb.enable(format='text')

def ok(a, b): assert a == b, (a, b)
Expand Down
41 changes: 41 additions & 0 deletions import/parse/govtrack.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""
parse data from govtrack.us
from: data/crawl/govtrack/people.xml
"""

import web
from xml.sax import make_parser, handler

class PeopleXML(handler.ContentHandler):
def __init__(self, callback):
self.callback = callback
self.current = None

def startElement(self, name, attrs):
if name == 'person':
self.current = web.storage(attrs)
if self.current.get('district'):
self.current.represents = self.current.state + '-' + self.current.district.zfill(2)
else:
if self.current.get('state'):
self.current.represents = self.current.state
assert self.current.title == 'Sen.'
if name == 'current-committee-assignment':
self.current.active = True

def endElement(self, name):
if name == 'person':
self.callback(self.current)
self.current = None

def callback(pol):
if pol.get('active', False):
print pol.represents

def main(callback):
parser = make_parser()
parser.setContentHandler(PeopleXML(callback))
parser.parse('../data/crawl/govtrack/people.xml')

if __name__ == "__main__": main(callback)
23 changes: 21 additions & 2 deletions schema.sql
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
DROP TABLE district;
DROP TABLE state;
DROP TABLE state, district, politician CASCADE;

CREATE TABLE state (
-- index.json
Expand Down Expand Up @@ -29,3 +28,23 @@ CREATE TABLE district (
-- shapes.json
outline text -- geojson
);

CREATE TABLE politician (
-- index.json
id varchar(256) primary key,
district varchar(10) references district,
wikipedia varchar(256),

-- govtrack.json
bioguideid varchar(256),
opensecretsid varchar(256),
govtrackid varchar(256),
gender varchar(1),
birthday date,
firstname varchar(256),
middlename varchar(256),
lastname varchar(256),
officeurl varchar(256),
party varchar(256),
religion varchar(256)
);
4 changes: 3 additions & 1 deletion templates/district.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

$var head:
<script src="http://maps.google.com/maps?file=api&amp;v=2&amp;key=ABQIAAAA32O3YO3sztAoMSWYheUtTBQYu38vSyEOJo4J0P8Mwlrd5U_lzhQ2ZoxaOYKY-qh7R36VeBWvCtHU5g" type="text/javascript"></script>
<script src="http://www.govtrack.us/scripts/gmap-wms.js"></script>
<script src="/static/distmap.js"></script>
<script type="text/javascript">distmap('googlemap', $:d.outline)</script>

Expand Down Expand Up @@ -34,4 +33,7 @@
</tr>
</table>

$if d.pol_id:
<p>Represented by <a href="/p/$d.pol_id">$d.pol_firstname $d.pol_lastname</a>.</p>

<div class="sources">District information from <cite><a href="http://nationaljournal.com/pubs/almanac/2008/">The Almanac of American Politics 2008</a></cite>. Maps from <a href="http://www.govtrack.us/">GovTrack</a>.</div>
19 changes: 19 additions & 0 deletions templates/politician.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
$def with (pol)

$var head:
<script src="http://maps.google.com/maps?file=api&amp;v=2&amp;key=ABQIAAAA32O3YO3sztAoMSWYheUtTBQYu38vSyEOJo4J0P8Mwlrd5U_lzhQ2ZoxaOYKY-qh7R36VeBWvCtHU5g" type="text/javascript"></script>
<script src="/static/distmap.js"></script>
<script type="text/javascript">distmap('googlemap', $:pol.district_outline)</script>

$var title: $pol.firstname $pol.middlename $pol.lastname

<div id="googlemap" style="width: 150px; height: 150px; margin-bottom: 2em; float: right"></div>

<p>
represents <a href="/us/$pol.district">$pol.district</a><br />
$if pol.birthday: b. $pol.birthday<br />
$pol.party, $pol.religion<br />
<a href="$pol.officeurl">Official website</a>
</p>

<div class="sources">Politician information from <a href="$pol.wikipedia">Wikipedia</a> and <a href="http://www.govtrack.us/congress/person.xpd?id=$pol.govtrackid">GovTrack</a>. Campaign finance information from <a href="http://opensecrets.org/politicians/summary.asp?CID=$pol.opensecretsid">Open Secrets</a>. Voting record from the <a href="http://projects.washingtonpost.com/congress/members/$pol.bioguideid.lower()">Washington Post</a>.</div>
11 changes: 10 additions & 1 deletion webapp.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
'/us/([a-z][a-z]-\d+)', 'district',
'/us/by/(.*)/distribution.png', 'sparkdist',
'/us/by/(.*)', 'dproperty',
'/p/(.*)', 'politician',
'/about(/?)', 'about',
'/about/feedback', 'feedback',
'/blog', 'reblog',
Expand Down Expand Up @@ -68,12 +69,20 @@ class district:
def GET(self, district):
try:
district = district.upper()
d = db.select(['district', 'state'], what='district.*, state.name as state_name', where='district.name = $district AND district.state = state.code', vars=locals())[0]
d = db.select(['district', 'state', 'politician'], what='district.*, state.name as state_name, politician.firstname as pol_firstname, politician.lastname as pol_lastname, politician.id as pol_id', where='district.name = $district AND district.state = state.code AND politician.district = district.name', vars=locals())[0]
except IndexError:
raise web.notfound

return render.district(d)

class politician:
def GET(self, polid):
if polid != polid.lower():
raise web.seeother('/p/' + polid.lower())

p = db.select(['politician', 'district'], what="politician.*, district.outline as district_outline", where='id=$polid AND district.name = politician.district', vars=locals())[0]
return render.politician(p)

r_safeproperty = re.compile('^[a-z0-9_]+$')

class dproperty:
Expand Down

0 comments on commit 8dfd496

Please sign in to comment.