diff --git a/AUTHORS.rst b/AUTHORS.rst index 6302aac..6d91c91 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -10,4 +10,4 @@ Initial Work Contributors ------------ -Soon.. +* Joakim Uddholm diff --git a/scrapyd_api/client.py b/scrapyd_api/client.py index 9d99886..a214c2b 100644 --- a/scrapyd_api/client.py +++ b/scrapyd_api/client.py @@ -22,6 +22,10 @@ def _handle_response(self, response): response.status_code, response.text)) + def _handle_json_response(self, response): + """ + Handles the response received from Scrapyd. + """ try: json = response.json() except ValueError: @@ -34,5 +38,16 @@ def _handle_response(self, response): raise ScrapydResponseError(json['message']) def request(self, *args, **kwargs): + """ + Takes not_json to signal whether response should be parsed as json or + not. + """ + not_json = 'not_json' in kwargs and kwargs['not_json'] + if 'not_json' in kwargs: + del kwargs['not_json'] response = super(Client, self).request(*args, **kwargs) - return self._handle_response(response) + self._handle_response(response) + if not_json: + return response.text + else: + return self._handle_json_response(response) diff --git a/scrapyd_api/constants.py b/scrapyd_api/constants.py index 2df33c5..2dcfbc4 100644 --- a/scrapyd_api/constants.py +++ b/scrapyd_api/constants.py @@ -9,6 +9,7 @@ LIST_SPIDERS_ENDPOINT = 'list_spiders' LIST_VERSIONS_ENDPOINT = 'list_versions' SCHEDULE_ENDPOINT = 'schedule' +LOG_ENDPOINT = 'fetch_log' DEFAULT_ENDPOINTS = { ADD_VERSION_ENDPOINT: '/addversion.json', @@ -20,6 +21,7 @@ LIST_SPIDERS_ENDPOINT: '/listspiders.json', LIST_VERSIONS_ENDPOINT: '/listversions.json', SCHEDULE_ENDPOINT: '/schedule.json', + LOG_ENDPOINT: '/logs/{project}/{spider}/{job_id}.log' } FINISHED = 'finished' diff --git a/scrapyd_api/wrapper.py b/scrapyd_api/wrapper.py index 23afde5..e497c7e 100644 --- a/scrapyd_api/wrapper.py +++ b/scrapyd_api/wrapper.py @@ -45,7 +45,7 @@ def __init__(self, target='http://localhost:6800', auth=None, self.endpoints = deepcopy(constants.DEFAULT_ENDPOINTS) self.endpoints.update(endpoints) - def _build_url(self, endpoint): + def _build_url(self, endpoint, **kwargs): """ Builds the absolute URL using the target and desired endpoint. """ @@ -55,6 +55,7 @@ def _build_url(self, endpoint): msg = 'Unknown endpoint `{0}`' raise ValueError(msg.format(endpoint)) absolute_url = urljoin(self.target, path) + absolute_url = absolute_url.format(**kwargs) return absolute_url def add_version(self, project, version, egg): @@ -181,3 +182,13 @@ def schedule(self, project, spider, settings=None, **kwargs): data['setting'] = setting_params json = self.client.post(url, data=data) return json['jobid'] + + def fetch_log(self, project, spider, job_id): + """ + Fetches the log for a specific job. Maps to Scrapyd's log endpoint, + which is not documented as part of the API but there if you enabled it. + """ + url = self._build_url( + constants.LOG_ENDPOINT, project=project, + spider=spider, job_id=job_id) + return self.client.get(url, not_json=True) diff --git a/tests/test_client.py b/tests/test_client.py index ad0bc59..4f42883 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -72,6 +72,9 @@ def test_handle_http_error_response(): with pytest.raises(ScrapydResponseError) as excinfo: non_authed_client.get(URL) assert '500 error' in str(excinfo.value) + with pytest.raises(ScrapydResponseError) as excinfo: + non_authed_client.get(URL, not_json=True) + assert '500 error' in str(excinfo.value) @responses.activate @@ -85,6 +88,7 @@ def test_non_or_invalid_json_response_errors(): with pytest.raises(ScrapydResponseError) as excinfo: non_authed_client.get(URL) assert 'invalid JSON' in str(excinfo.value) + non_authed_client.get(URL, not_json=True) @responses.activate diff --git a/tests/test_wrapper.py b/tests/test_wrapper.py index 8ec0787..213342c 100644 --- a/tests/test_wrapper.py +++ b/tests/test_wrapper.py @@ -82,6 +82,18 @@ def test_build_url_with_non_existant_endpoint_errors(): api._build_url('does-not-exist') +def test_build_url_with_kwargs(): + """ + Tests that supplying _build_url with additional keyword arguments + """ + custom_endpoints = { + 'TEST': '/{var1}/{var2}?somevar={var3}' + } + api = ScrapydAPI('http://localhost', endpoints=custom_endpoints) + url = api._build_url('TEST', var1='hello', var2='world', var3='!') + assert url == 'http://localhost/hello/world?somevar=!' + + def test_add_version(): """ Test the method which handles adding a new version of a project. @@ -295,3 +307,17 @@ def test_schedule(): 'DOWNLOAD_DELAY=2'] assert 'spider' in data_kw assert data_kw['spider'] == SPIDER + + +def test_fetch_logs(): + """ + Test the method which fetches logs based on job id. + """ + mock_client = MagicMock() + mock_client.get.return_value = "example log" + api = ScrapydAPI(HOST_URL, client=mock_client) + rtn = api.fetch_log(PROJECT, SPIDER, JOB) + args, kwargs = mock_client.get.call_args + assert rtn == mock_client.get.return_value + assert args[0] == 'http://localhost/logs/{0}/{1}/{2}.log'.format( + PROJECT, SPIDER, JOB)