first version of a simple bill id filter

h4ck3rm1k3 · Aug 18, 2012 · 78fc6a9 · 78fc6a9
1 parent ae84657
commit 78fc6a9
Show file tree

Hide file tree

Showing 3 changed files with 54 additions and 18 deletions.
diff --git a/billy/bin/update.py b/billy/bin/update.py
@@ -1,4 +1,9 @@
 #!/usr/bin/env python
+from billy.utils import configure_logging, term_for_session
+configure_logging("startup") # we need this before the command line args are read in
+import logging
+_log = logging.getLogger('billy')
+
 import bson.binary
 from bson.binary import ALL_UUID_SUBTYPES
 from bson.binary import OLD_UUID_SUBTYPE
@@ -7,7 +12,7 @@
 import sys
 import json
 import glob
-import logging
+
 import inspect
 import argparse
 import traceback
@@ -19,7 +24,6 @@
 # code snippet, to be included in 'sitecustomize.py'
 import sys
 
-
 from pymongo.errors import OperationFailure 
 
 def info(type, value, tb):
@@ -47,10 +51,10 @@ def info(type, value, tb):
 from billy.conf import settings, base_arg_parser
 from billy.scrape import (ScrapeError, JSONDateEncoder, get_scraper,
                           check_sessions)
-from billy.utils import configure_logging, term_for_session
+
 from billy.scrape.validator import DatetimeValidator
 
-_log = logging.getLogger('billy')
+
 
 def _clear_scraped_data(output_dir, scraper_type=''):
     # make or clear directory for this type
@@ -118,13 +122,18 @@ def _run_scraper(scraper_type, options, metadata):
         for time in times:
             scraper.validate_term(time, scraper.latest_only)
 
+    #
+    if (not(options.billid is False)):
+        scraper.set_filter_bill_id(options.billid)
+
+
     # run scraper against year/session/term
     for time in times:
         # old style
         if _is_old_scrape(scraper.scrape):
             for chamber in options.chambers:
                 scraper.scrape(chamber, time)
-        else:
+        else:   
             scraper.scrape(time, chambers=options.chambers)
 
         if scraper_type == 'events' and len(options.chambers) == 2:
@@ -173,7 +182,7 @@ def _do_imports(abbrev, args):
             dist['_id'] = '%(abbr)s-%(chamber)s-%(name)s' % dist
             dist['boundary_id'] = dist['boundary_id'] % dist
             dist['num_seats'] = int(dist['num_seats'])
-            _log.debug(dist)                            
+#            _log.debug(dist)                            
             db.districts.save(dist, safe=True)
     else:
         _log.warning("%s not found, continuing without "
@@ -242,21 +251,31 @@ def main(old_scrape_compat=False):
         for arg in ('upper', 'lower'):
             what.add_argument('--' + arg, action='append_const',
                               dest='chambers', const=arg)
+
+
         for arg in ('bills', 'legislators', 'committees', 'votes', 'events'):
             what.add_argument('--' + arg, action='append_const', dest='types',
                               const=arg)
+
         for arg in ('scrape', 'import', 'report'):
             parser.add_argument('--' + arg, dest='actions',
                                 action="append_const", const=arg,
                                 help='only run %s step' % arg)
 
+
         # special modes for debugging
         scrape.add_argument('--nonstrict', action='store_false', dest='strict',
                             default=True, help="don't fail immediately when"
                             " encountering validation warning")
+
         scrape.add_argument('--fastmode', help="scrape in fast mode",
                             action="store_true", default=False)
 
+
+        scrape.add_argument('--billid', help="scrape only a single bill",
+                            action="store", default=False)
+
+
         # scrapelib overrides
         scrape.add_argument('-r', '--rpm', action='store', type=int,
                             dest='SCRAPELIB_RPM')
@@ -339,9 +358,13 @@ def main(old_scrape_compat=False):
     terms=%s""" % (args.module, ','.join(args.actions), ','.join(args.types),
                    ','.join(args.sessions), ','.join(args.terms))
         _log.info(plan)
-
         scrape_data = {}
 
+        if args.billid is False :
+            _log.debug("No billid filter.")
+        else:
+            _log.debug("Search for billid: %s" % args.billid)
+
         if 'scrape' in args.actions:
             _clear_scraped_data(args.output_dir)
 
@@ -387,17 +410,23 @@ def main(old_scrape_compat=False):
             exec_start = dt.datetime.utcnow()
 
             # scraper order matters
-            order = ('legislators', 'committees', 'votes', 'bills', 'events')
+            if args.billid is False :
+                order = ('legislators', 'committees', 'votes', 'bills', 'events')
+            else:
+                _log.debug("going to process bills")
+                order = ('bills',) # only process the bills
+
             _traceback = None
             try:
                 for stype in order:
+                    _log.debug("consider to process %s" % stype )
                     if stype in args.types:
-
+                        _log.debug("going to process %s" % stype )
                         scraper_results= _run_scraper(stype, args, metadata)
 
-                        _log.debug(scraper_results)
-
                         run_record += scraper_results
+                    else:
+                        _log.debug("skipping %s" % stype )
 
             except Exception as e:
                 _traceback = _, _, exc_traceback = sys.exc_info()

diff --git a/billy/scrape/__init__.py b/billy/scrape/__init__.py
@@ -76,6 +76,19 @@ class Scraper(scrapelib.Scraper):
 
     latest_only = False
 
+
+    """
+    Filter only this bill id
+    """
+    def set_filter_bill_id (self, billid):
+#        _log.debug("old self.filter_bill_id was :%s" % self.filter_bill_id)
+        self.filter_bill_id = billid
+#        _log.debug("net self.filter_bill_id= %s" % self.filter_bill_id)
+
+    def get_filter_bill_id (self):
+#        _log.debug("self.filter_bill_id %s" % self.filter_bill_id)
+        return self.filter_bill_id 
+
     def __init__(self, metadata, output_dir=None, strict_validation=None,
                  fastmode=False, **kwargs):
         """
@@ -98,7 +111,7 @@ def __init__(self, metadata, output_dir=None, strict_validation=None,
             kwargs['cache_write_only'] = False
 
         super(Scraper, self).__init__(**kwargs)
-
+        self.filter_bill_id = False
         self.metadata = metadata
         self.output_dir = output_dir
 

diff --git a/billy/utils.py b/billy/utils.py
@@ -77,12 +77,6 @@ def extract_fields(d, fields, delimiter='|'):
 
 def configure_logging(module=None):
 
-#logging.basicConfig(level=logging.DEBUG)
-#h = logging.StreamHandler()
-#f = logging.Formatter("%(levelname)s %(asctime)s %(funcName)s %(lineno)d %(message)s")
-#h.setFormatter(f)
-#x.addHandler(h)
-
     if module:
         format = ("BILLY:%(pathname)s %(asctime)s %(name)s %(levelname)s " + module + " %(funcName)s %(lineno)d %(message)s")
     else: