Spruced up the output. Reintroduced futures. Ignore broken CSV files.

dvogel · Dec 4, 2011 · a6240df · a6240df
1 parent b2edbd3
commit a6240df
Show file tree

Hide file tree

Showing 2 changed files with 20 additions and 10 deletions.
diff --git a/analyzer.py b/analyzer.py
@@ -41,15 +41,19 @@ def analyze_records(reader, fiscal_year, datefield, fields):
     digits = dict(((month, dict.fromkeys(fields, []))
                    for month in fy_months))
 
-    for record in reader:
+    for (line_number, record) in enumerate(reader, 2):
         dtstr = record[datefield]
         if dtstr is None or dtstr.strip() == '':
             print >>sys.stderr, "Skipping record with blank date field."
             continue
         dt = parsedate(record[datefield], settings.DATE_FORMATS)
         dt1 = date(dt.year, dt.month, 1)
         if dt1 not in fy_months:
-            print "Skipping %s because it's not in fy_months" % repr(dt1)
+            fy_begin = min(fy_months)
+            fy_end = max(fy_months)
+            print "Skipping %s-%s because it's not in %s-%s - %s-%s" % (dt1.year, dt1.month,
+                                                                        fy_begin.year, fy_begin.month,
+                                                                        fy_end.year, fy_end.month)
             continue
 
         for field in fields:

diff --git a/main.py b/main.py
@@ -1,6 +1,8 @@
+import _csv
 import os
 import sys
 import json
+from futures import ThreadPoolExecutor
 from functools import partial
 from datetime import datetime, date
 from itertools import islice, imap
@@ -39,12 +41,15 @@ def download_and_analyze(fiscal_year, agency, spending_type):
         if isinstance(dl_result, DownloadFileFailure):
             return (False, dl_result)
         print >>sys.stdout, "Got file %s" % filename
-
+
-        analyses = analyze_file(destpath, fiscal_year, 
+        try:
-                                settings.ANALYSIS_DATEFIELDS[spending_type],
+            analyses = analyze_file(destpath, fiscal_year, 
-                                settings.ANALYSIS_FIELDS[spending_type])
+                                    settings.ANALYSIS_DATEFIELDS[spending_type],
-        save_analyses(db, fiscal_year, agency, spending_type, analyses)
+                                    settings.ANALYSIS_FIELDS[spending_type])
-        return (True, analyses)
+            save_analyses(db, fiscal_year, agency, spending_type, analyses)
+            return (True, analyses)
+        except _csv.Error, e:
+            return (False, e)
 
     return (True, None)
 
@@ -61,9 +66,10 @@ def save_analyses(db, fiscal_year, agency, spending_type, analyses):
             monthly_analyses.save(analysis)
 
 def main():
+    timewarp = ThreadPoolExecutor(2)
     for fiscal_year in settings.FISCAL_YEARS:
-        results = imap(lambda combs: apply(download_and_analyze, combs),
+        results = timewarp.map(lambda combs: apply(download_and_analyze, combs),
-                       usaspending.file_param_combs(fiscal_year))
+                               usaspending.file_param_combs(fiscal_year))
         for result in results:
             success = result[0]
             if success: