Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Spruced up the output. Reintroduced futures. Ignore broken CSV files.

  • Loading branch information...
commit a6240df289eeb662150106c2e2919c457af6d2d2 1 parent b2edbd3
Drew Vogel authored
Showing with 20 additions and 10 deletions.
  1. +6 −2 analyzer.py
  2. +14 −8 main.py
View
8 analyzer.py
@@ -41,7 +41,7 @@ def analyze_records(reader, fiscal_year, datefield, fields):
digits = dict(((month, dict.fromkeys(fields, []))
for month in fy_months))
- for record in reader:
+ for (line_number, record) in enumerate(reader, 2):
dtstr = record[datefield]
if dtstr is None or dtstr.strip() == '':
print >>sys.stderr, "Skipping record with blank date field."
@@ -49,7 +49,11 @@ def analyze_records(reader, fiscal_year, datefield, fields):
dt = parsedate(record[datefield], settings.DATE_FORMATS)
dt1 = date(dt.year, dt.month, 1)
if dt1 not in fy_months:
- print "Skipping %s because it's not in fy_months" % repr(dt1)
+ fy_begin = min(fy_months)
+ fy_end = max(fy_months)
+ print "Skipping %s-%s because it's not in %s-%s - %s-%s" % (dt1.year, dt1.month,
+ fy_begin.year, fy_begin.month,
+ fy_end.year, fy_end.month)
continue
for field in fields:
View
22 main.py
@@ -1,6 +1,8 @@
+import _csv
import os
import sys
import json
+from futures import ThreadPoolExecutor
from functools import partial
from datetime import datetime, date
from itertools import islice, imap
@@ -39,12 +41,15 @@ def download_and_analyze(fiscal_year, agency, spending_type):
if isinstance(dl_result, DownloadFileFailure):
return (False, dl_result)
print >>sys.stdout, "Got file %s" % filename
-
- analyses = analyze_file(destpath, fiscal_year,
- settings.ANALYSIS_DATEFIELDS[spending_type],
- settings.ANALYSIS_FIELDS[spending_type])
- save_analyses(db, fiscal_year, agency, spending_type, analyses)
- return (True, analyses)
+
+ try:
+ analyses = analyze_file(destpath, fiscal_year,
+ settings.ANALYSIS_DATEFIELDS[spending_type],
+ settings.ANALYSIS_FIELDS[spending_type])
+ save_analyses(db, fiscal_year, agency, spending_type, analyses)
+ return (True, analyses)
+ except _csv.Error, e:
+ return (False, e)
return (True, None)
@@ -61,9 +66,10 @@ def save_analyses(db, fiscal_year, agency, spending_type, analyses):
monthly_analyses.save(analysis)
def main():
+ timewarp = ThreadPoolExecutor(2)
for fiscal_year in settings.FISCAL_YEARS:
- results = imap(lambda combs: apply(download_and_analyze, combs),
- usaspending.file_param_combs(fiscal_year))
+ results = timewarp.map(lambda combs: apply(download_and_analyze, combs),
+ usaspending.file_param_combs(fiscal_year))
for result in results:
success = result[0]
if success:
Please sign in to comment.
Something went wrong with that request. Please try again.