Skip to content

Commit

Permalink
Merge 136cd06 into b36b686
Browse files Browse the repository at this point in the history
  • Loading branch information
solleks committed Jun 12, 2020
2 parents b36b686 + 136cd06 commit 7a74552
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 3 deletions.
7 changes: 6 additions & 1 deletion commcare_export/cli.py
Expand Up @@ -17,7 +17,7 @@
from commcare_export.checkpoint import CheckpointManagerProvider
from commcare_export.misc import default_to_json
from commcare_export.utils import get_checkpoint_manager
from commcare_export.commcare_hq_client import CommCareHqClient, LATEST_KNOWN_VERSION
from commcare_export.commcare_hq_client import CommCareHqClient, LATEST_KNOWN_VERSION, ResourceRepeatException
from commcare_export.commcare_minilinq import CommCareHqEnv
from commcare_export.env import BuiltInEnv, JsonPathEnv, EmitterEnv
from commcare_export.exceptions import LongFieldsException, DataExportException, MissingQueryFileException
Expand Down Expand Up @@ -263,6 +263,11 @@ def evaluate_query(env, query):
return EXIT_STATUS_ERROR
else:
raise
except ResourceRepeatException as e:
print('Stopping because the export is stuck')
print(e.message)
print('Try increasing --batch-size to overcome the error')
return EXIT_STATUS_ERROR
except KeyboardInterrupt:
print('\nExport aborted', file=sys.stderr)
return EXIT_STATUS_ERROR
Expand Down
20 changes: 19 additions & 1 deletion commcare_export/commcare_hq_client.py
Expand Up @@ -24,7 +24,7 @@
logger = logging.getLogger(__name__)

LATEST_KNOWN_VERSION='0.5'

RESOURCE_REPEAT_LIMIT=10

def on_backoff(details):
_log_backoff(details, 'Waiting for retry.')
Expand All @@ -49,6 +49,14 @@ def is_client_error(ex):
return False


class ResourceRepeatException(Exception):
def __init__(self, message):
self.message = message

def __str__(self):
return self.message


class CommCareHqClient(object):
"""
A connection to CommCareHQ for a particular version, project, and user.
Expand Down Expand Up @@ -118,9 +126,19 @@ def iterate_resource(resource=resource, params=params):
last_batch_ids = set()
total_count = None
fetched = 0
repeat_counter = 0
last_params = None

while more_to_fetch:
if params == last_params:
repeat_counter += 1
else:
repeat_counter = 0
if repeat_counter >= RESOURCE_REPEAT_LIMIT:
raise ResourceRepeatException("Requested resource '{}' {} times with same parameters".format(resource, repeat_counter))

batch = self.get(resource, params)
last_params = params
if not total_count or total_count == 'unknown' or fetched >= total_count:
total_count = int(batch['meta']['total_count']) if batch['meta']['total_count'] else 'unknown'
fetched = 0
Expand Down
30 changes: 29 additions & 1 deletion tests/test_commcare_hq_client.py
Expand Up @@ -8,8 +8,10 @@

import requests

import pytest

from commcare_export.checkpoint import CheckpointManagerWithSince
from commcare_export.commcare_hq_client import CommCareHqClient
from commcare_export.commcare_hq_client import CommCareHqClient, ResourceRepeatException
from commcare_export.commcare_minilinq import SimplePaginator, DatePaginator, resource_since_params, get_paginator


Expand Down Expand Up @@ -53,6 +55,27 @@ def _get_results(self, params):
}


class FakeRepeatedDateCaseSession(FakeSession):
# Model the case where there are as many or more cases with the same
# server_date_modified than the batch size (2), so the client requests
# the same set of cases in a loop.
def _get_results(self, params):
if not params:
return {
'meta': {'next': '?offset=1', 'offset': 0, 'limit': 2, 'total_count': 4},
'objects': [{'id': 1, 'foo': 1, 'server_date_modified': '2017-01-01T15:36:22Z'},
{'id': 2, 'foo': 2, 'server_date_modified': '2017-01-01T15:36:22Z'}]
}
else:
since_query_param =resource_since_params['case'].start_param
assert params[since_query_param] == '2017-01-01T15:36:22'
return {
'meta': { 'next': '?offset=1', 'offset': 0, 'limit': 2, 'total_count': 4},
'objects': [{'id': 1, 'foo': 1, 'server_date_modified': '2017-01-01T15:36:22Z'},
{'id': 2, 'foo': 2, 'server_date_modified': '2017-01-01T15:36:22Z'}]
}


class FakeDateFormSession(FakeSession):
def _get_results(self, params):
since1 = '2017-01-01T15:36:22'
Expand Down Expand Up @@ -102,6 +125,11 @@ def test_iterate_date(self):
self._test_iterate(FakeDateFormSession(), get_paginator('form'), 3, [1, 2, 3])
self._test_iterate(FakeDateCaseSession(), get_paginator('case'), 2, [1, 2])

def test_repeat_limit(self):
with pytest.raises(ResourceRepeatException,
match="Requested resource '/fake/uri' 10 times with same parameters"):
self._test_iterate(FakeRepeatedDateCaseSession(), get_paginator('case', 2), 2, [1, 2])


class TestDatePaginator(unittest.TestCase):

Expand Down

0 comments on commit 7a74552

Please sign in to comment.