Permalink
Browse files

load in bill and vote info from govtrack; first stab at maplight

  • Loading branch information...
1 parent 671ea12 commit 7e303e124c89018160c0fe7468999d6c1db7c5c5 @aaronsw committed May 7, 2008
Showing with 475 additions and 0 deletions.
  1. +56 −0 import/load/bills.py
  2. +9 −0 import/load/maplight.py
  3. +27 −0 import/load/tools.py
  4. +22 −0 schema.sql
  5. +361 −0 vendor/xmltramp.py
View
@@ -0,0 +1,56 @@
+"""
+load bill data
+
+from: data/crawl/govtrack/us/110/{bills,rolls}
+"""
+from __future__ import with_statement
+import os, sys, glob
+import xmltramp, web
+from tools import db, govtrackp
+
+def bill2dict(bill):
+ d = {}
+ d['id'] = 'us/%s/%s%s' % (bill('session'), bill('type'), bill('number'))
+ d['session'] = bill('session')
+ d['type'] = bill('type')
+ d['number'] = bill('number')
+ d['introduced'] = bill.introduced('datetime')
+ d['title'] = [unicode(x) for x in bill.titles['title':]
+ if x('type') == 'official'][0]
+ d['sponsor'] = govtrackp(bill.sponsor().get('id'))
+ d['summary'] = unicode(bill.summary)
+ return d
+
+def fixvote(s):
+ return {'0': None, '+': 1, '-': -1, 'P': 0}[s]
+
+with db.transaction():
+ db.delete('vote', '1=1')
+ db.delete('bill', '1=1')
+ for fn in glob.glob('../data/crawl/govtrack/us/*/bills/*.xml'):
+ bill = xmltramp.load(fn)
+ d = bill2dict(bill)
+ db.insert('bill', seqname=False, **d)
+ print '\r', d['id'],
+ sys.stdout.flush()
+
+ done = []
+ for vote in bill.actions['vote':]:
+ if not vote().get('roll'): continue
+ if vote('where') in done: continue # don't count veto overrides
+ done.append(vote('where'))
+
+ votedoc = '%s/rolls/%s%s-%s.xml' % (
+ d['session'],
+ vote('where'),
+ vote('datetime')[:4],
+ vote('roll'))
+ vote = xmltramp.load('../data/crawl/govtrack/us/' + votedoc)
+ for voter in vote['voter':]:
+ rep = govtrackp(voter('id'))
+ if rep:
+ db.insert('vote', seqname=False,
+ politician_id=rep, bill_id=d['id'], vote=fixvote(voter('vote')))
+
+if __name__ == "__main__":
+ pass
View
@@ -0,0 +1,9 @@
+"""
+load maplight info
+
+from: data/crawl/maplight/
+"""
+
+import csv
+
+c = csv.reader(file('../data/crawl/maplight/map_export_bill_research.csv'))
View
@@ -0,0 +1,27 @@
+"""
+common tools for load scripts
+"""
+import os
+import web
+db = web.database(dbn=os.environ.get('DATABASE_ENGINE', 'postgres'), db='watchdog_dev')
+
+_govtrackcache = {}
+
+def govtrackp(govtrack_id):
+ """
+ Return the watchdog ID for a person's `govtrack_id`.
+
+ >>> govtrackp('400114')
+ 'michael_f._doyle'
+ >>> print govtrackp('aosijdoisad') # ID we don't have
+ None
+ """
+ if not _govtrackcache:
+ for pol in db.select('politician', what='id, govtrackid'):
+ _govtrackcache[pol.govtrackid] = str(pol.id)
+
+ return _govtrackcache.get(govtrack_id)
+
+if __name__ == "__main__":
+ import doctest
+ doctest.testmod()
View
@@ -4,6 +4,8 @@ DROP TABLE IF EXISTS politician CASCADE;
DROP TABLE IF EXISTS interest_group_ratings CASCADE;
DROP TABLE IF EXISTS interest_group_rating CASCADE;
DROP TABLE IF EXISTS interest_group CASCADE;
+DROP TABLE IF EXISTS bill CASCADE;
+DROP TABLE IF EXISTS vote CASCADE;
CREATE TABLE state (
-- index.json
@@ -87,8 +89,28 @@ CREATE TABLE interest_group_rating ( -- interest group scores for politicians
rating int -- typically 0-100
);
+CREATE TABLE bill (
+ id varchar(256) primary key,
+ session int,
+ type varchar(5),
+ number int,
+ introduced date,
+ title text,
+ sponsor varchar(256) references politician,
+ summary text
+);
+
+CREATE TABLE vote (
+ bill_id varchar(256) references bill,
+ politician_id varchar(256) references politician,
+ vote int2,
+ primary key (bill_id, politician_id)
+);
+
GRANT ALL on state TO watchdog;
GRANT ALL on district TO watchdog;
GRANT ALL on politician TO watchdog;
GRANT ALL on interest_group_rating TO watchdog;
GRANT ALL on interest_group TO watchdog;
+GRANT ALL on bill TO watchdog;
+GRANT ALL on vote TO watchdog;
Oops, something went wrong.

0 comments on commit 7e303e1

Please sign in to comment.