Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

first version of a simple bill id filter

  • Loading branch information...
commit 78fc6a926d382acde0c85242a288dd9ecc01c6b1 1 parent ae84657
James Michael DuPont authored August 18, 2012
51  billy/bin/update.py
... ...
@@ -1,4 +1,9 @@
1 1
 #!/usr/bin/env python
  2
+from billy.utils import configure_logging, term_for_session
  3
+configure_logging("startup") # we need this before the command line args are read in
  4
+import logging
  5
+_log = logging.getLogger('billy')
  6
+
2 7
 import bson.binary
3 8
 from bson.binary import ALL_UUID_SUBTYPES
4 9
 from bson.binary import OLD_UUID_SUBTYPE
@@ -7,7 +12,7 @@
7 12
 import sys
8 13
 import json
9 14
 import glob
10  
-import logging
  15
+
11 16
 import inspect
12 17
 import argparse
13 18
 import traceback
@@ -19,7 +24,6 @@
19 24
 # code snippet, to be included in 'sitecustomize.py'
20 25
 import sys
21 26
 
22  
-
23 27
 from pymongo.errors import OperationFailure 
24 28
 
25 29
 def info(type, value, tb):
@@ -47,10 +51,10 @@ def info(type, value, tb):
47 51
 from billy.conf import settings, base_arg_parser
48 52
 from billy.scrape import (ScrapeError, JSONDateEncoder, get_scraper,
49 53
                           check_sessions)
50  
-from billy.utils import configure_logging, term_for_session
  54
+
51 55
 from billy.scrape.validator import DatetimeValidator
52 56
 
53  
-_log = logging.getLogger('billy')
  57
+
54 58
 
55 59
 def _clear_scraped_data(output_dir, scraper_type=''):
56 60
     # make or clear directory for this type
@@ -118,13 +122,18 @@ def _run_scraper(scraper_type, options, metadata):
118 122
         for time in times:
119 123
             scraper.validate_term(time, scraper.latest_only)
120 124
 
  125
+    #
  126
+    if (not(options.billid is False)):
  127
+        scraper.set_filter_bill_id(options.billid)
  128
+
  129
+
121 130
     # run scraper against year/session/term
122 131
     for time in times:
123 132
         # old style
124 133
         if _is_old_scrape(scraper.scrape):
125 134
             for chamber in options.chambers:
126 135
                 scraper.scrape(chamber, time)
127  
-        else:
  136
+        else:   
128 137
             scraper.scrape(time, chambers=options.chambers)
129 138
 
130 139
         if scraper_type == 'events' and len(options.chambers) == 2:
@@ -173,7 +182,7 @@ def _do_imports(abbrev, args):
173 182
             dist['_id'] = '%(abbr)s-%(chamber)s-%(name)s' % dist
174 183
             dist['boundary_id'] = dist['boundary_id'] % dist
175 184
             dist['num_seats'] = int(dist['num_seats'])
176  
-            _log.debug(dist)                            
  185
+#            _log.debug(dist)                            
177 186
             db.districts.save(dist, safe=True)
178 187
     else:
179 188
         _log.warning("%s not found, continuing without "
@@ -242,21 +251,31 @@ def main(old_scrape_compat=False):
242 251
         for arg in ('upper', 'lower'):
243 252
             what.add_argument('--' + arg, action='append_const',
244 253
                               dest='chambers', const=arg)
  254
+
  255
+
245 256
         for arg in ('bills', 'legislators', 'committees', 'votes', 'events'):
246 257
             what.add_argument('--' + arg, action='append_const', dest='types',
247 258
                               const=arg)
  259
+
248 260
         for arg in ('scrape', 'import', 'report'):
249 261
             parser.add_argument('--' + arg, dest='actions',
250 262
                                 action="append_const", const=arg,
251 263
                                 help='only run %s step' % arg)
252 264
 
  265
+
253 266
         # special modes for debugging
254 267
         scrape.add_argument('--nonstrict', action='store_false', dest='strict',
255 268
                             default=True, help="don't fail immediately when"
256 269
                             " encountering validation warning")
  270
+
257 271
         scrape.add_argument('--fastmode', help="scrape in fast mode",
258 272
                             action="store_true", default=False)
259 273
 
  274
+
  275
+        scrape.add_argument('--billid', help="scrape only a single bill",
  276
+                            action="store", default=False)
  277
+
  278
+
260 279
         # scrapelib overrides
261 280
         scrape.add_argument('-r', '--rpm', action='store', type=int,
262 281
                             dest='SCRAPELIB_RPM')
@@ -339,9 +358,13 @@ def main(old_scrape_compat=False):
339 358
     terms=%s""" % (args.module, ','.join(args.actions), ','.join(args.types),
340 359
                    ','.join(args.sessions), ','.join(args.terms))
341 360
         _log.info(plan)
342  
-
343 361
         scrape_data = {}
344 362
 
  363
+        if args.billid is False :
  364
+            _log.debug("No billid filter.")
  365
+        else:
  366
+            _log.debug("Search for billid: %s" % args.billid)
  367
+
345 368
         if 'scrape' in args.actions:
346 369
             _clear_scraped_data(args.output_dir)
347 370
 
@@ -387,17 +410,23 @@ def main(old_scrape_compat=False):
387 410
             exec_start = dt.datetime.utcnow()
388 411
 
389 412
             # scraper order matters
390  
-            order = ('legislators', 'committees', 'votes', 'bills', 'events')
  413
+            if args.billid is False :
  414
+                order = ('legislators', 'committees', 'votes', 'bills', 'events')
  415
+            else:
  416
+                _log.debug("going to process bills")
  417
+                order = ('bills',) # only process the bills
  418
+
391 419
             _traceback = None
392 420
             try:
393 421
                 for stype in order:
  422
+                    _log.debug("consider to process %s" % stype )
394 423
                     if stype in args.types:
395  
-
  424
+                        _log.debug("going to process %s" % stype )
396 425
                         scraper_results= _run_scraper(stype, args, metadata)
397 426
 
398  
-                        _log.debug(scraper_results)
399  
-
400 427
                         run_record += scraper_results
  428
+                    else:
  429
+                        _log.debug("skipping %s" % stype )
401 430
 
402 431
             except Exception as e:
403 432
                 _traceback = _, _, exc_traceback = sys.exc_info()
15  billy/scrape/__init__.py
@@ -76,6 +76,19 @@ class Scraper(scrapelib.Scraper):
76 76
 
77 77
     latest_only = False
78 78
 
  79
+
  80
+    """
  81
+    Filter only this bill id
  82
+    """
  83
+    def set_filter_bill_id (self, billid):
  84
+#        _log.debug("old self.filter_bill_id was :%s" % self.filter_bill_id)
  85
+        self.filter_bill_id = billid
  86
+#        _log.debug("net self.filter_bill_id= %s" % self.filter_bill_id)
  87
+
  88
+    def get_filter_bill_id (self):
  89
+#        _log.debug("self.filter_bill_id %s" % self.filter_bill_id)
  90
+        return self.filter_bill_id 
  91
+        
79 92
     def __init__(self, metadata, output_dir=None, strict_validation=None,
80 93
                  fastmode=False, **kwargs):
81 94
         """
@@ -98,7 +111,7 @@ def __init__(self, metadata, output_dir=None, strict_validation=None,
98 111
             kwargs['cache_write_only'] = False
99 112
 
100 113
         super(Scraper, self).__init__(**kwargs)
101  
-
  114
+        self.filter_bill_id = False
102 115
         self.metadata = metadata
103 116
         self.output_dir = output_dir
104 117
 
6  billy/utils.py
@@ -77,12 +77,6 @@ def extract_fields(d, fields, delimiter='|'):
77 77
 
78 78
 def configure_logging(module=None):
79 79
 
80  
-#logging.basicConfig(level=logging.DEBUG)
81  
-#h = logging.StreamHandler()
82  
-#f = logging.Formatter("%(levelname)s %(asctime)s %(funcName)s %(lineno)d %(message)s")
83  
-#h.setFormatter(f)
84  
-#x.addHandler(h)
85  
-
86 80
     if module:
87 81
         format = ("BILLY:%(pathname)s %(asctime)s %(name)s %(levelname)s " + module + " %(funcName)s %(lineno)d %(message)s")
88 82
     else:

0 notes on commit 78fc6a9

Please sign in to comment.
Something went wrong with that request. Please try again.