Skip to content
This repository has been archived by the owner on May 26, 2021. It is now read-only.

Commit

Permalink
Parse created_at consistently
Browse files Browse the repository at this point in the history
  • Loading branch information
georgiana-b committed Jul 26, 2016
1 parent bf74111 commit 669a927
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 7 deletions.
4 changes: 2 additions & 2 deletions data_quality/tasks/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def run(self, pipeline):
if self.assess_timeliness:
extractor = RelevancePeriodExtractor(self.config)
extractor.run()
self.lookup = self.get_lookup()

with compat.UnicodeAppender(self.result_file, quoting=csv.QUOTE_MINIMAL) as result_file:
source = self.get_source(pipeline.data_source)
Expand Down Expand Up @@ -183,9 +184,8 @@ def get_publication_delay(self, source):
raise ValueError(('Cannot assess timeliness: Bad value for "created_at"'
' in source with id {0}.').format(source['id']))
dates['period_end'] = dates['period_end'] or dates['period_start']
self.timeliness_period = int(self.timeliness_period) * 30
timely_until = dates['period_end'] + \
timedelta(days=self.timeliness_period)
timedelta(days=(self.timeliness_period * 30))
if dates['period_start'] <= dates['publication_date'] <= timely_until:
delay = 0
else:
Expand Down
11 changes: 6 additions & 5 deletions data_quality/tasks/extract_relevance_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,14 @@ def run(self):

for source in sources:
if source['period_id'] is None:
self.timeliness_strategy = ['created_at']
period_start, period_end = self.identify_period(source)
creation_date = utilities.date_from_string(source['created_at'])
dates = [creation_date, creation_date]
else:
period_start, period_end = source['period_id']
periods = [period_start.date(), period_end.date()]
periods = [period.strftime('%d-%m-%Y') for period in periods]
source['period_id'] = '/'.join(periods)
dates = [period_start.date(), period_end.date()]
dates = [date.strftime('%d-%m-%Y') if isinstance(date, datetime.date)
else '' for date in dates]
source['period_id'] = '/'.join(dates)
self.update_sources_period(sources)

def extract_period_from_sources(self):
Expand Down

0 comments on commit 669a927

Please sign in to comment.