Skip to content

Commit

Permalink
import rolls, allow staggered import of bills
Browse files Browse the repository at this point in the history
  • Loading branch information
aaronsw committed Oct 1, 2008
1 parent f826c4b commit 3be04aa
Show file tree
Hide file tree
Showing 8 changed files with 180 additions and 86 deletions.
7 changes: 1 addition & 6 deletions import/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,8 @@ database: $(LOADERS)
$(PYTHON) load/earmarks.py
touch $@

$(LOAD)/bill.tsv $(LOAD)/vote.tsv: load/bills.py
.bills: load/bills.py
$(PYTHON) load/bills.py
touch $@

.bills: $(LOAD)/bill.tsv $(LOAD)/vote.tsv .schema
(echo "BEGIN; DELETE FROM interest_group_bill_support; DELETE FROM vote; DELETE FROM bill;"; cat $(LOAD)/bill.tsv; cat $(LOAD)/vote.tsv; echo "COMMIT;") | $(DBCLIENT) $(WATCHDOG_TABLE)
touch $@

.interests: load/maplight.py .bills
$(PYTHON) load/maplight.py
Expand Down
146 changes: 84 additions & 62 deletions import/load/bills.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,33 @@
from: data/crawl/govtrack/us/110/{bills,rolls}
"""
from __future__ import with_statement
import os, sys, glob
import os, sys, glob, anydbm
from psycopg2 import IntegrityError #@@level-breaker
import xmltramp
import web
from tools import db, govtrackp

DATA_DIR = '../data/'
DATA_DIR = '../data'
GOVTRACK_CRAWL = DATA_DIR+'/crawl/govtrack'

class NotDone(Exception): pass

def makemarkdone(done):
def markdone(func):
def internal(fn, *a, **kw):
mtime = str(os.stat(fn).st_mtime)
if fn not in done or done[fn] != mtime:
try:
func(fn)
done[fn] = mtime
except NotDone:
pass
return internal
return markdone

def fixvote(s):
return {'0': None, '+': 1, '-': -1, 'P': 0}[s]

def bill2dict(bill):
d = web.storage()
d.id = 'us/%s/%s%s' % (bill('session'), bill('type'), bill('number'))
Expand All @@ -34,77 +53,80 @@ def bill2dict(bill):
d.sponsor_id = govtrackp(bill.sponsor().get('id'))
return d

def fixvote(s):
return {'0': None, '+': 1, '-': -1, 'P': 0}[s]

vote_list = {}
bill_list =[]
def loadbill(fn, maplightid=None, batch_mode=False):
def loadbill(fn, maplightid=None):
bill = xmltramp.load(fn)
d = bill2dict(bill)
if maplightid: d['maplightid'] = maplightid
else: d['maplightid'] = None
if not batch_mode: db.insert('bill', seqname=False, **d)
print >>sys.stderr,'\r %-25s' % d['id'],
sys.stderr.flush()
d.maplightid = maplightid

try:
bill_id = d.id
db.insert('bill', seqname=False, **d)
except IntegrityError:
bill_id = d.pop('id')
db.update('bill', where="id=" + web.sqlquote(bill_id), **d)

done = []
d['yeas']=d['neas']=0
positions = {}
for vote in bill.actions['vote':]:
if not vote().get('roll'): continue
if vote('where') in done: continue # don't count veto overrides
done.append(vote('where'))

votedoc = '%s/rolls/%s%s-%s.xml' % (
d['session'],
vote('where'),
vote('datetime')[:4],
vote('roll'))
vote = xmltramp.load(GOVTRACK_CRAWL+'/us/' + votedoc)
yeas = 0
neas = 0
rolldoc = '/us/%s/rolls/%s%s-%s.xml' % (
d.session, vote('where'), vote('datetime')[:4], vote('roll'))
roll = xmltramp.load(GOVTRACK_CRAWL + rolldoc)
for voter in roll['voter':]:
positions[govtrackp(voter('id'))] = fixvote(voter('vote'))

if None in positions: del positions[None]
with db.transaction():
db.delete('position', where='bill_id=$bill_id', vars=locals())
for p, v in positions.iteritems():
db.insert('position', seqname=False,
bill_id=bill_id, politician_id=p, vote=v)


def loadroll(fn):
roll = web.storage()
roll.id = fn.split('/')[-1].split('.')[0]
vote = xmltramp.load(fn)
if vote['bill':]:
b = vote.bill
roll.bill_id = 'us/%s/%s%s' % (b('session'), b('type'), b('number'))
else:
roll.bill_id = None
roll.type = str(vote.type)
roll.question = str(vote.question)
roll.required = str(vote.required)
roll.result = str(vote.result)

try:
db.insert('roll', seqname=False, **roll)
except IntegrityError:
if not db.update('roll', where="id=" + web.sqlquote(roll.id), bill_id=roll.bill_id):
print "\nMissing bill:", roll.bill_id
raise NotDone

with db.transaction():
db.delete('vote', where="roll_id=$roll.id", vars=locals())
for voter in vote['voter':]:
if fixvote(voter('vote')) == 1:
yeas += 1
elif fixvote(voter('vote')) == -1:
neas += 1
rep = govtrackp(voter('id'))
if rep:
if batch_mode:
vote_list['%(bill_id)s\t%(politician_id)s'% {'bill_id':d['id'], 'politician_id':rep}]={'bill_id':d['id'], 'politician_id':rep, 'vote':fixvote(voter('vote'))}
else:
if not db.select('vote',where="bill_id=$d['id'] AND politician_id=$rep", vars=locals()):
db.insert('vote', seqname=False, politician_id=rep, bill_id=d['id'], vote=fixvote(voter('vote')))
else:
print
print "Updating:", votedoc, rep, d['id'], fixvote(voter('vote'))
db.update('vote', where="bill_id=$d['id'] AND politician_id=$rep", vote=fixvote(voter('vote')),vars=locals())

if not batch_mode: db.update('bill', where="id = $d['id']", yeas=yeas, neas=neas, vars=locals())
d['yeas'] = yeas
d['neas'] = neas
if batch_mode: bill_list.append(d)
db.insert('vote', seqname=False,
politician_id=rep, roll_id=roll.id, vote=fixvote(voter('vote')))
else:
pass #@@!--check again after load_everyone
# print "\nMissing rep: %s" % voter('id')


################################################################################
def main():
from bulk_loader import bulk_loader_db
for c,fn in enumerate(sorted(glob.glob(GOVTRACK_CRAWL+'/us/*/bills/*.xml'))):
loadbill(fn, batch_mode=True)


db = bulk_loader_db(os.environ.get('WATCHDOG_TABLE', 'watchdog_dev'))
bill_cols = ['id', 'session', 'type', 'number', 'introduced', 'title', 'sponsor_id', 'summary', 'maplightid', 'yeas', 'neas']
db.open_table('bill', bill_cols, delete_first=True, filename=DATA_DIR+'load/bill.tsv')
vote_col = ['bill_id', 'politician_id', 'vote']
db.open_table('vote', vote_col, delete_first=True, filename=DATA_DIR+'load/vote.tsv')
for bill in bill_list:
db.insert('bill',**bill)
for vote in vote_list.values():
db.insert('vote',**vote)

done = anydbm.open('.bills', 'c')
markdone = makemarkdone(done)

for fn in sorted(glob.glob(GOVTRACK_CRAWL+'/us/*/bills/*.xml')):
print >>sys.stderr,'\r %-25s' % fn,; sys.stderr.flush()
markdone(loadbill)(fn)

for fn in sorted(glob.glob(GOVTRACK_CRAWL+'/us/*/rolls/*.xml')):
print >>sys.stderr,'\r %-25s' % fn,; sys.stderr.flush()
markdone(loadroll)(fn)
print >>sys.stderr, '\r' + ' '*72

if __name__ == "__main__":
main()


2 changes: 1 addition & 1 deletion import/load/fec.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def load_fec_cans():
)

def load_fec_efilings():
for f in fec_cvs.parse_efilings():
for f in fec_csv.parse_efilings():
for s in f['schedules']:
if s['type'] == 'contribution':
politician_id = None
Expand Down
6 changes: 3 additions & 3 deletions import/load/maplight.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,9 @@ def generate_similarities():
"""
Generate similarity information for each (interest group, politician) pair and store in DB
"""
result = db.query('select igbp.group_id, vote.politician_id, igbp.support, vote.vote'
' from interest_group_bill_support igbp, vote'
' where igbp.bill_id = vote.bill_id')
result = db.query('select igbp.group_id, position.politician_id, igbp.support, position.vote'
' from interest_group_bill_support igbp, position'
' where igbp.bill_id = position.bill_id')
sim = {}
total = {}

Expand Down
78 changes: 71 additions & 7 deletions schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,17 +190,28 @@ def _uri_(self):
sponsor = sql.Reference(Politician) #@@rename to sponsor_id
summary = sql.String()
maplightid = sql.String(10)

# computed from vote
yeas = sql.Number()
neas = sql.Number()

interest_group_support = sql.Backreference('Interest_group_bill_support', 'bill', order='support desc')

@property
def name(self):
typemap = {
'h': 'H.R.',
's': 'S.',
'hj': 'H.J.Res.',
'sj': 'S.J.Res.',
'hc': 'H.Con.Res.',
'sc': 'S.Con.Res.',
'hr': 'H.Res.',
'sr': 'S.Res.'
}

return typemap[self.type] + ' ' + str(self.number)

@property
def votes_by_party(self):
"""Get the votes of the political parties for a bill."""
result = db.select(['politician p, vote v'],
result = db.select(['politician p, position v'],
what="v.vote, count(v.vote), p.party",
where="v.politician_id = p.id and v.bill_id = $self.id "
"AND v.vote is not null",
Expand All @@ -218,7 +229,7 @@ def votes_by_party(self):
def votes_by_caucus(self):
caucuses = simplejson.load(file('import/load/manual/caucuses.json'))
members = sum([x['members'] for x in caucuses], [])
result = db.select(['vote'],
result = db.select(['position'],
where=web.sqlors('politician_id=', members) +
'AND bill_id=' + web.sqlquote(self.id),
vars=locals()
Expand All @@ -236,7 +247,60 @@ def votes_by_caucus(self):
cdict[v] += 1
return d

class Vote (sql.Table):
class Roll(sql.Table):
id = sql.String(primary=True)
type = sql.String()
question = sql.String()
required = sql.String()
result = sql.String()
bill = sql.Reference(Bill)

#@@@@@ DON'T REPEAT YOURSELF
@property
def votes_by_party(self):
"""Get the votes of the political parties for a bill."""
result = db.select(['politician p, vote v'],
what="v.vote, count(v.vote), p.party",
where="v.politician_id = p.id and v.roll_id = $self.id "
"AND v.vote is not null",
group="p.party, v.vote",
vars = locals()
).list()

d = {}
for r in result:
d.setdefault(r.party, {})
d[r.party][r.vote] = r.count
return d

@property
def votes_by_caucus(self):
caucuses = simplejson.load(file('import/load/manual/caucuses.json'))
members = sum([x['members'] for x in caucuses], [])
result = db.select(['vote'],
where=web.sqlors('politician_id=', members) +
'AND roll_id=' + web.sqlquote(self.id),
vars=locals()
).list()

if not result: return None

votemap = dict((r.politician_id, r.vote) for r in result)
d = {}
for c in caucuses:
cdict = d[c['name']] = {}
for m in c['members']:
v = votemap.get(m)
cdict.setdefault(v, 0)
cdict[v] += 1
return d

class Vote(sql.Table):
roll = sql.Reference(Roll, primary=True)
politician = sql.Reference(Politician, primary=True)
vote = sql.Int2()

class Position(sql.Table):
bill = sql.Reference(Bill, primary=True)
politician = sql.Reference(Politician, primary=True)
vote = sql.Int2()
Expand Down
2 changes: 1 addition & 1 deletion templates/bill.html
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
$def with (bill)

$var title: $bill.type.upper(). $bill.number
$var title: $bill.name

<p><strong>$bill.title</strong></p>

Expand Down
5 changes: 2 additions & 3 deletions templates/politician_group.html
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
$var title: Votes

<table>
<tr><th>Bill</th><th>Pol</th><th>Group</th><th>Full vote</th></tr>
<tr><th>Bill</th><th>Pol</th><th>Group</th></tr>

$ support_decode = {1: 'Support', 0: 'Abstain', -1: 'Oppose', None: 'Unknown'}

Expand All @@ -14,6 +14,5 @@
>$support_decode[vote.vote]</span></td>
<td><span class="$support_decode[vote.support].lower()"
>$support_decode[vote.support]</span></td>
<td>$vote.yeas-$vote.neas</td>
</tr>
</table>
</table>
20 changes: 17 additions & 3 deletions webapp.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
r'/p/(.*?)/(\d+)', 'politician_group',
r'/p/(.*?)%s?' % options, 'politician',
r'/b/(.*?)%s?' % options, 'bill',
r'/r/us/(.*?)%s?' % options, 'roll',
r'/c', petition.app,
r'/u', users.app,
r'/writerep', writerep.app,
Expand Down Expand Up @@ -181,6 +182,19 @@ def bill_list(format, page=0, limit=50):

return render.bill_list(bills, limit)

class roll:
def GET(self, roll_id, format=None):
try:
b = schema.Roll.where(id=roll_id)[0]
votes = schema.Vote.where(roll_id=b.id)
except IndexError:
raise web.notfound

out = apipublish.publish([b], format)
if out: return out

return render.roll(b, votes)

class bill:
def GET(self, bill_id, format=None):
if bill_id == "" or bill_id == "index":
Expand Down Expand Up @@ -250,9 +264,9 @@ def GET(self, politician_id):

class politician_group:
def GET(self, politician_id, group_id):
votes = db.select(['vote', 'interest_group_bill_support', 'bill'],
where="interest_group_bill_support.bill_id = vote.bill_id AND "
"vote.bill_id = bill.id AND "
votes = db.select(['position', 'interest_group_bill_support', 'bill'],
where="interest_group_bill_support.bill_id = position.bill_id AND "
"position.bill_id = bill.id AND "
"politician_id = $politician_id AND group_id = $group_id",
order='vote = support desc',
vars=locals())
Expand Down

0 comments on commit 3be04aa

Please sign in to comment.