Skip to content

Commit

Permalink
Spruced up the output. Reintroduced futures. Ignore broken CSV files.
Browse files Browse the repository at this point in the history
  • Loading branch information
Drew Vogel committed Dec 4, 2011
1 parent b2edbd3 commit a6240df
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 10 deletions.
8 changes: 6 additions & 2 deletions analyzer.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -41,15 +41,19 @@ def analyze_records(reader, fiscal_year, datefield, fields):
digits = dict(((month, dict.fromkeys(fields, [])) digits = dict(((month, dict.fromkeys(fields, []))
for month in fy_months)) for month in fy_months))


for record in reader: for (line_number, record) in enumerate(reader, 2):
dtstr = record[datefield] dtstr = record[datefield]
if dtstr is None or dtstr.strip() == '': if dtstr is None or dtstr.strip() == '':
print >>sys.stderr, "Skipping record with blank date field." print >>sys.stderr, "Skipping record with blank date field."
continue continue
dt = parsedate(record[datefield], settings.DATE_FORMATS) dt = parsedate(record[datefield], settings.DATE_FORMATS)
dt1 = date(dt.year, dt.month, 1) dt1 = date(dt.year, dt.month, 1)
if dt1 not in fy_months: if dt1 not in fy_months:
print "Skipping %s because it's not in fy_months" % repr(dt1) fy_begin = min(fy_months)
fy_end = max(fy_months)
print "Skipping %s-%s because it's not in %s-%s - %s-%s" % (dt1.year, dt1.month,
fy_begin.year, fy_begin.month,
fy_end.year, fy_end.month)
continue continue


for field in fields: for field in fields:
Expand Down
22 changes: 14 additions & 8 deletions main.py
Original file line number Original file line Diff line number Diff line change
@@ -1,6 +1,8 @@
import _csv
import os import os
import sys import sys
import json import json
from futures import ThreadPoolExecutor
from functools import partial from functools import partial
from datetime import datetime, date from datetime import datetime, date
from itertools import islice, imap from itertools import islice, imap
Expand Down Expand Up @@ -39,12 +41,15 @@ def download_and_analyze(fiscal_year, agency, spending_type):
if isinstance(dl_result, DownloadFileFailure): if isinstance(dl_result, DownloadFileFailure):
return (False, dl_result) return (False, dl_result)
print >>sys.stdout, "Got file %s" % filename print >>sys.stdout, "Got file %s" % filename


analyses = analyze_file(destpath, fiscal_year, try:
settings.ANALYSIS_DATEFIELDS[spending_type], analyses = analyze_file(destpath, fiscal_year,
settings.ANALYSIS_FIELDS[spending_type]) settings.ANALYSIS_DATEFIELDS[spending_type],
save_analyses(db, fiscal_year, agency, spending_type, analyses) settings.ANALYSIS_FIELDS[spending_type])
return (True, analyses) save_analyses(db, fiscal_year, agency, spending_type, analyses)
return (True, analyses)
except _csv.Error, e:
return (False, e)


return (True, None) return (True, None)


Expand All @@ -61,9 +66,10 @@ def save_analyses(db, fiscal_year, agency, spending_type, analyses):
monthly_analyses.save(analysis) monthly_analyses.save(analysis)


def main(): def main():
timewarp = ThreadPoolExecutor(2)
for fiscal_year in settings.FISCAL_YEARS: for fiscal_year in settings.FISCAL_YEARS:
results = imap(lambda combs: apply(download_and_analyze, combs), results = timewarp.map(lambda combs: apply(download_and_analyze, combs),
usaspending.file_param_combs(fiscal_year)) usaspending.file_param_combs(fiscal_year))
for result in results: for result in results:
success = result[0] success = result[0]
if success: if success:
Expand Down

0 comments on commit a6240df

Please sign in to comment.