Skip to content

Commit

Permalink
Merge 189865f into d0d21b5
Browse files Browse the repository at this point in the history
  • Loading branch information
snopoke committed Jun 22, 2021
2 parents d0d21b5 + 189865f commit 7e68b09
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 24 deletions.
26 changes: 16 additions & 10 deletions commcare_export/commcare_hq_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def iterate(self, resource, paginator, params=None, checkpoint_manager=None):
def iterate_resource(resource=resource, params=params):
more_to_fetch = True
last_batch_ids = set()
total_count = None
total_count = UNKNOWN_COUNT
fetched = 0
repeat_counter = 0
last_params = None
Expand All @@ -141,35 +141,41 @@ def iterate_resource(resource=resource, params=params):

batch = self.get(resource, params)
last_params = copy.copy(params)
if not total_count or total_count == UNKNOWN_COUNT or fetched >= total_count:
total_count = int(batch['meta']['total_count']) if batch['meta']['total_count'] else UNKNOWN_COUNT
batch_meta = batch['meta']
if total_count == UNKNOWN_COUNT or fetched >= total_count:
if batch_meta.get('total_count'):
total_count = int(batch_meta['total_count'])
else:
total_count = UNKNOWN_COUNT
fetched = 0

fetched += len(batch['objects'])
batch_objects = batch['objects']
fetched += len(batch_objects)
logger.debug('Received %s of %s', fetched, total_count)
if not batch['objects']:
if not batch_objects:
more_to_fetch = False
else:
got_new_data = False
for obj in batch['objects']:
for obj in batch_objects:
if obj['id'] not in last_batch_ids:
yield obj
got_new_data = True

if batch['meta']['next']:
last_batch_ids = {obj['id'] for obj in batch['objects']}
if batch_meta.get('next'):
last_batch_ids = {obj['id'] for obj in batch_objects}
params = paginator.next_page_params_from_batch(batch)
if not params:
more_to_fetch = False
else:
more_to_fetch = False

limit = batch['meta'].get('limit')
limit = batch_meta.get('limit')
if more_to_fetch:
# Handle the case where API is 'non-counting' and repeats the last batch
repeated_last_page_of_non_counting_resource = (
not got_new_data
and total_count == UNKNOWN_COUNT
and (limit and len(batch['objects']) < limit)
and (limit and len(batch_objects) < limit)
)
more_to_fetch = not repeated_last_page_of_non_counting_resource

Expand Down
28 changes: 14 additions & 14 deletions tests/test_commcare_hq_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,28 +77,28 @@ def _get_results(self, params):
}


class FakMessageLogSession(FakeSession):
# for message logs, the last batch returns the same results in a loop, because
# we use a non-counting paginator in tastypie that can't know if it's "finished"
# We will gracefully treat this as success under the conditions where:
# - total_count is absent
# - the number of returned rows is fewer than the limit
# - the contents of the batch are the same
class FakeMessageLogSession(FakeSession):
def _get_results(self, params):
obj_1 = {'id': 1, 'foo': 1, 'date': '2017-01-01T15:36:22Z'}
obj_2 = {'id': 2, 'foo': 2, 'date': '2017-01-01T15:37:22Z'}
obj_3 = {'id': 3, 'foo': 3, 'date': '2017-01-01T15:38:22Z'}
if not params:
return {
'meta': {'next': '?offset=2', 'offset': 0, 'limit': 2, 'total_count': None},
'meta': {'next': '?cursor=xyz', 'limit': 2},
'objects': [obj_1, obj_2]
}
else:
since_query_param = DATE_PARAMS['date'].start_param
assert params[since_query_param] == '2017-01-01T15:37:22'
return {
'meta': { 'next': '?offset=1', 'offset': 0, 'limit': 2, 'total_count': None},
'objects': [obj_2]
}
since = params[since_query_param]
if since == '2017-01-01T15:37:22':
return {
'meta': {'next': '?cursor=xyz', 'limit': 2},
'objects': [obj_3]
}
if since == '2017-01-01T15:38:22':
return {'meta': {'next': None, 'limit': 2}, 'objects': []}

raise Exception(since)


class FakeDateFormSession(FakeSession):
Expand Down Expand Up @@ -154,7 +154,7 @@ def test_repeat_limit(self):
self._test_iterate(FakeRepeatedDateCaseSession(), get_paginator('case', 2), 2, [1, 2])

def test_message_log(self):
self._test_iterate(FakMessageLogSession(), get_paginator('messaging-event', 2), 2, [1, 2])
self._test_iterate(FakeMessageLogSession(), get_paginator('messaging-event', 2), 3, [1, 2, 3])


class TestDatePaginator(unittest.TestCase):
Expand Down

0 comments on commit 7e68b09

Please sign in to comment.