Skip to content

Commit

Permalink
first version of a simple bill id filter
Browse files Browse the repository at this point in the history
  • Loading branch information
h4ck3rm1k3 committed Aug 18, 2012
1 parent ae84657 commit 78fc6a9
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 18 deletions.
51 changes: 40 additions & 11 deletions billy/bin/update.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
#!/usr/bin/env python
from billy.utils import configure_logging, term_for_session
configure_logging("startup") # we need this before the command line args are read in
import logging
_log = logging.getLogger('billy')

import bson.binary
from bson.binary import ALL_UUID_SUBTYPES
from bson.binary import OLD_UUID_SUBTYPE
Expand All @@ -7,7 +12,7 @@
import sys
import json
import glob
import logging

import inspect
import argparse
import traceback
Expand All @@ -19,7 +24,6 @@
# code snippet, to be included in 'sitecustomize.py'
import sys


from pymongo.errors import OperationFailure

def info(type, value, tb):
Expand Down Expand Up @@ -47,10 +51,10 @@ def info(type, value, tb):
from billy.conf import settings, base_arg_parser
from billy.scrape import (ScrapeError, JSONDateEncoder, get_scraper,
check_sessions)
from billy.utils import configure_logging, term_for_session

from billy.scrape.validator import DatetimeValidator

_log = logging.getLogger('billy')


def _clear_scraped_data(output_dir, scraper_type=''):
# make or clear directory for this type
Expand Down Expand Up @@ -118,13 +122,18 @@ def _run_scraper(scraper_type, options, metadata):
for time in times:
scraper.validate_term(time, scraper.latest_only)

#
if (not(options.billid is False)):
scraper.set_filter_bill_id(options.billid)


# run scraper against year/session/term
for time in times:
# old style
if _is_old_scrape(scraper.scrape):
for chamber in options.chambers:
scraper.scrape(chamber, time)
else:
else:
scraper.scrape(time, chambers=options.chambers)

if scraper_type == 'events' and len(options.chambers) == 2:
Expand Down Expand Up @@ -173,7 +182,7 @@ def _do_imports(abbrev, args):
dist['_id'] = '%(abbr)s-%(chamber)s-%(name)s' % dist
dist['boundary_id'] = dist['boundary_id'] % dist
dist['num_seats'] = int(dist['num_seats'])
_log.debug(dist)
# _log.debug(dist)
db.districts.save(dist, safe=True)
else:
_log.warning("%s not found, continuing without "
Expand Down Expand Up @@ -242,21 +251,31 @@ def main(old_scrape_compat=False):
for arg in ('upper', 'lower'):
what.add_argument('--' + arg, action='append_const',
dest='chambers', const=arg)


for arg in ('bills', 'legislators', 'committees', 'votes', 'events'):
what.add_argument('--' + arg, action='append_const', dest='types',
const=arg)

for arg in ('scrape', 'import', 'report'):
parser.add_argument('--' + arg, dest='actions',
action="append_const", const=arg,
help='only run %s step' % arg)


# special modes for debugging
scrape.add_argument('--nonstrict', action='store_false', dest='strict',
default=True, help="don't fail immediately when"
" encountering validation warning")

scrape.add_argument('--fastmode', help="scrape in fast mode",
action="store_true", default=False)


scrape.add_argument('--billid', help="scrape only a single bill",
action="store", default=False)


# scrapelib overrides
scrape.add_argument('-r', '--rpm', action='store', type=int,
dest='SCRAPELIB_RPM')
Expand Down Expand Up @@ -339,9 +358,13 @@ def main(old_scrape_compat=False):
terms=%s""" % (args.module, ','.join(args.actions), ','.join(args.types),
','.join(args.sessions), ','.join(args.terms))
_log.info(plan)

scrape_data = {}

if args.billid is False :
_log.debug("No billid filter.")
else:
_log.debug("Search for billid: %s" % args.billid)

if 'scrape' in args.actions:
_clear_scraped_data(args.output_dir)

Expand Down Expand Up @@ -387,17 +410,23 @@ def main(old_scrape_compat=False):
exec_start = dt.datetime.utcnow()

# scraper order matters
order = ('legislators', 'committees', 'votes', 'bills', 'events')
if args.billid is False :
order = ('legislators', 'committees', 'votes', 'bills', 'events')
else:
_log.debug("going to process bills")
order = ('bills',) # only process the bills

_traceback = None
try:
for stype in order:
_log.debug("consider to process %s" % stype )
if stype in args.types:

_log.debug("going to process %s" % stype )
scraper_results= _run_scraper(stype, args, metadata)

_log.debug(scraper_results)

run_record += scraper_results
else:
_log.debug("skipping %s" % stype )

except Exception as e:
_traceback = _, _, exc_traceback = sys.exc_info()
Expand Down
15 changes: 14 additions & 1 deletion billy/scrape/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,19 @@ class Scraper(scrapelib.Scraper):

latest_only = False


"""
Filter only this bill id
"""
def set_filter_bill_id (self, billid):
# _log.debug("old self.filter_bill_id was :%s" % self.filter_bill_id)
self.filter_bill_id = billid
# _log.debug("net self.filter_bill_id= %s" % self.filter_bill_id)

def get_filter_bill_id (self):
# _log.debug("self.filter_bill_id %s" % self.filter_bill_id)
return self.filter_bill_id

def __init__(self, metadata, output_dir=None, strict_validation=None,
fastmode=False, **kwargs):
"""
Expand All @@ -98,7 +111,7 @@ def __init__(self, metadata, output_dir=None, strict_validation=None,
kwargs['cache_write_only'] = False

super(Scraper, self).__init__(**kwargs)

self.filter_bill_id = False
self.metadata = metadata
self.output_dir = output_dir

Expand Down
6 changes: 0 additions & 6 deletions billy/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,6 @@ def extract_fields(d, fields, delimiter='|'):

def configure_logging(module=None):

#logging.basicConfig(level=logging.DEBUG)
#h = logging.StreamHandler()
#f = logging.Formatter("%(levelname)s %(asctime)s %(funcName)s %(lineno)d %(message)s")
#h.setFormatter(f)
#x.addHandler(h)

if module:
format = ("BILLY:%(pathname)s %(asctime)s %(name)s %(levelname)s " + module + " %(funcName)s %(lineno)d %(message)s")
else:
Expand Down

0 comments on commit 78fc6a9

Please sign in to comment.