diff --git a/datacommons/__init__.py b/datacommons/__init__.py index f9bdfd41..2f5892ff 100644 --- a/datacommons/__init__.py +++ b/datacommons/__init__.py @@ -22,7 +22,7 @@ ############################################################################## # Data Commons SPARQL query support -from datacommons.query import query +from datacommons.sparql import query # Data Commons Python API from datacommons.core import get_property_labels, get_property_values, get_triples diff --git a/datacommons/query.py b/datacommons/query.py deleted file mode 100644 index 94b8ab20..00000000 --- a/datacommons/query.py +++ /dev/null @@ -1,129 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" Data Commons Python API Query Module. - -Implements functions for sending graph queries to the Data Commons Graph. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from datacommons.utils import _API_ROOT, _API_ENDPOINTS, _ENV_VAR_API_KEY - -import json -import os -import six.moves.urllib.error -import six.moves.urllib.request - -# ----------------------------- WRAPPER FUNCTIONS ----------------------------- - - -def query(query_string, select=None): - """ Returns the results of executing a SPARQL query on the Data Commons graph. - - Args: - query_string (:obj:`str`): The SPARQL query string. - select (:obj:`func` accepting a row in the query result): A function that - selects rows to be returned by :code:`query`. This function accepts a row - in the results of executing :code:`query_string` and return True if and - only if the row is to be returned by :code:`query`. The row passed in as - an argument is represented as a :obj:`dict` that maps a query variable in - :code:`query_string` to its value in the given row. - - Returns: - A table, represented as a :obj:`list` of rows, resulting from executing the - given SPARQL query. Each row is a :obj:`dict` mapping query variable to its - value in the row. If `select` is not `None`, then a row is included in the - returned :obj:`list` if and only if `select` returns :obj:`True` for that - row. - - Raises: - ValueError: If the payload returned by the Data Commons REST API is - malformed. - - Examples: - We would like to query for the name associated with three states identified - by their dcids - `California `_, - `Kentucky `_, and - `Maryland `_. - - >>> query_str = ''' - ... SELECT ?name ?dcid - ... WHERE { - ... ?a typeOf Place . - ... ?a name ?name . - ... ?a dcid ("geoId/06" "geoId/21" "geoId/24") . - ... ?a dcid ?dcid - ... } - ... ''' - >>> result = query(query_str) - >>> for r in result: - ... print(r) - {"?name": "Maryland", "?dcid": "geoId/24"} - {"?name": "Kentucky", "?dcid": "geoId/21"} - {"?name": "California", "?dcid": "geoId/06"} - - Optionally, we can specify which rows are returned by setting :code:`select` - like so. The following returns all rows where the name is "Maryland". - - >>> selector = lambda row: row['?name'] == 'Maryland' - >>> result = query(query_str, select=selector) - >>> for r in result: - ... print(r) - {"?name": "Maryland", "?dcid": "geoId/24"} - """ - - req_url = _API_ROOT + _API_ENDPOINTS['query'] - headers = { - 'Content-Type': 'application/json' - } - if os.environ.get(_ENV_VAR_API_KEY): - headers['x-api-key'] = os.environ[_ENV_VAR_API_KEY] - - req = six.moves.urllib.request.Request( - req_url, - data=json.dumps({'sparql': query_string}).encode("utf-8"), - headers=headers) - - try: - res = six.moves.urllib.request.urlopen(req) - except six.moves.urllib.error.HTTPError as e: - raise ValueError('Response error {}:\n{}'.format(e.code, e.read())) - - # Verify then store the results. - res_json = json.loads(res.read()) - - # Iterate through the query results - header = res_json.get('header') - if header is None: - raise ValueError('Ill-formatted response: does not contain a header.') - result_rows = [] - for row in res_json.get('rows', []): - # Construct the map from query variable to cell value. - row_map = {} - for idx, cell in enumerate(row.get('cells', [])): - if idx > len(header): - raise ValueError( - 'Query error: unexpected cell {}'.format(cell)) - if 'value' not in cell: - raise ValueError( - 'Query error: cell missing value {}'.format(cell)) - cell_var = header[idx] - row_map[cell_var] = cell['value'] - # Add the row to the result rows if it is selected - if select is None or select(row_map): - result_rows.append(row_map) - return result_rows diff --git a/datacommons/sparql.py b/datacommons/sparql.py new file mode 100644 index 00000000..1215b027 --- /dev/null +++ b/datacommons/sparql.py @@ -0,0 +1,97 @@ +# Copyright 2022 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" Data Commons Python API Query Module. + +Implements functions for sending graph queries to the Data Commons Graph. +""" + +from datacommons.requests import _post + + +def query(query_string, select=None): + """ Returns the results of executing a SPARQL query on the Data Commons graph. + + Args: + query_string (:obj:`str`): The SPARQL query string. + select (:obj:`func` accepting a row of the query result): A function that + selects rows to be returned by :code:`query`. This function accepts a row + on the results of executing :code:`query_string` and returns True if and + only if the row is to be returned by :code:`query`. The row passed in as + an argument is represented as a :obj:`dict` that maps a query variable in + :code:`query_string` to its value in the given row. + + Returns: + A table, represented as a :obj:`list` of rows, resulting from executing the + given SPARQL query. Each row is a :obj:`dict` mapping query variable to its + value in the row. If `select` is not `None`, then a row is included in the + returned :obj:`list` if and only if `select` returns :obj:`True` for that + row. + + Raises: + ValueError: If the payload returned by the Data Commons REST API is + malformed. + + Examples: + We would like to query for the name associated with three states identified + by their dcids + `California `_, + `Kentucky `_, and + `Maryland `_. + + >>> query_str = ''' + ... SELECT ?name ?dcid + ... WHERE { + ... ?a typeOf Place . + ... ?a name ?name . + ... ?a dcid ("geoId/06" "geoId/21" "geoId/24") . + ... ?a dcid ?dcid + ... } + ... ''' + >>> result = query(query_str) + >>> for r in result: + ... print(r) + {"?name": "Maryland", "?dcid": "geoId/24"} + {"?name": "Kentucky", "?dcid": "geoId/21"} + {"?name": "California", "?dcid": "geoId/06"} + + Optionally, we can specify which rows are returned by setting :code:`select` + like so. The following returns all rows where the name is "Maryland". + + >>> selector = lambda row: row['?name'] == 'Maryland' + >>> result = query(query_str, select=selector) + >>> for r in result: + ... print(r) + {"?name": "Maryland", "?dcid": "geoId/24"} + """ + resp = _post('/query', {'sparql': query_string}) + # Iterate through the query results + header = resp.get('header') + if header is None: + raise ValueError('Ill-formatted response: does not contain a header.') + result_rows = [] + for row in resp.get('rows', []): + # Construct the map from query variable to cell value. + row_map = {} + for idx, cell in enumerate(row.get('cells', [])): + if idx > len(header): + raise ValueError('Query error: unexpected cell {}'.format(cell)) + if 'value' not in cell: + raise ValueError( + 'Query error: cell missing value {}'.format(cell)) + cell_var = header[idx] + row_map[cell_var] = cell['value'] + # Add the row to the result rows if it is selected + if select is None or select(row_map): + result_rows.append(row_map) + return result_rows diff --git a/datacommons/test/node_test.py b/datacommons/test/node_test.py index d7f20a01..8e0005b0 100644 --- a/datacommons/test/node_test.py +++ b/datacommons/test/node_test.py @@ -13,14 +13,14 @@ # limitations under the License. import unittest -from unittest import mock +from unittest.mock import patch import datacommons class TestProperties(unittest.TestCase): - @mock.patch("datacommons.node._post") + @patch("datacommons.node._post") def test_with_data(self, _post): def side_effect(path, data): @@ -46,7 +46,7 @@ def side_effect(path, data): class TestPropertyValues(unittest.TestCase): - @mock.patch("datacommons.node._post") + @patch("datacommons.node._post") def test_with_data(self, _post): def side_effect(path, data): @@ -70,7 +70,7 @@ def side_effect(path, data): response = datacommons.property_values(["geoId/06"], "name") assert response == {"geoId/06": ["California"]} - @mock.patch("datacommons.node._post") + @patch("datacommons.node._post") def test_multiple_values(self, _post): def side_effect(path, data): @@ -100,7 +100,7 @@ def side_effect(path, data): class TestTriples(unittest.TestCase): - @mock.patch("datacommons.node._post") + @patch("datacommons.node._post") def test_with_data(self, _post): def side_effect(path, data): diff --git a/datacommons/test/query_test.py b/datacommons/test/query_test.py deleted file mode 100644 index 75b12a66..00000000 --- a/datacommons/test/query_test.py +++ /dev/null @@ -1,176 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" Data Commons Python API unit tests. - -Unit tests for the SPARQL query wrapper. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -try: - from unittest.mock import patch -except ImportError: - from mock import patch - -import datacommons as dc -import datacommons.utils as utils - -import json -import unittest -import six.moves.urllib as urllib - - -def request_mock(*args, **kwargs): - """ A mock urlopen call sent in the urllib package. """ - # Create the mock response object. - class MockResponse: - def __init__(self, json_data): - self.json_data = json_data - - def read(self): - return self.json_data - - # The accepted query. - accepted_query = (''' -SELECT ?name ?dcid -WHERE { - ?a typeOf Place . - ?a name ?name . - ?a dcid ("geoId/06" "geoId/21" "geoId/24") . - ?a dcid ?dcid -} -''') - - accepted_query2 = (''' -SELECT ?name ?dcid -WHERE { - ?a typeOf Place . - ?a name ?name . - ?a dcid ("geoId/DNE") . - ?a dcid ?dcid -} -''') - req = args[0] - data = json.loads(req.data) - - if req.get_full_url() == utils._API_ROOT + utils._API_ENDPOINTS['query']: - if data['sparql'] == accepted_query: - return MockResponse(json.dumps({ - 'header': [ - '?name', - '?dcid' - ], - 'rows': [ - { - 'cells': [ - { - 'value': 'California' - }, - { - 'value': 'geoId/06' - } - ] - }, - { - 'cells': [ - { - 'value': 'Kentucky' - }, - { - 'value': 'geoId/21' - } - ] - }, - { - 'cells': [ - { - 'value': 'Maryland' - }, - { - 'value': 'geoId/24' - } - ] - } - ] - })) - elif data['sparql'] == accepted_query2: - return MockResponse(json.dumps({ - 'header': [ - '?name', - '?dcid' - ], - })) - - # Otherwise, return an empty response and a 404. - return urllib.error.HTTPError(None, 404, None, None, None) - - -class TestQuery(unittest.TestCase): - """ Unit tests for the Query object. """ - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_rows(self, urlopen): - """ Sending a valid query returns the correct response. """ - # Create the SPARQL query - query_string = (''' -SELECT ?name ?dcid -WHERE { - ?a typeOf Place . - ?a name ?name . - ?a dcid ("geoId/06" "geoId/21" "geoId/24") . - ?a dcid ?dcid -} -''') - selector = lambda row: row['?name'] != 'California' - - # Issue the query - results = dc.query(query_string) - selected_results = dc.query(query_string, select=selector) - - # Execute the query and iterate through the results. - for idx, row in enumerate(results): - if idx == 0: - self.assertDictEqual(row, {'?name': 'California', '?dcid': 'geoId/06'}) - if idx == 1: - self.assertDictEqual(row, {'?name': 'Kentucky', '?dcid': 'geoId/21'}) - if idx == 2: - self.assertDictEqual(row, {'?name': 'Maryland', '?dcid': 'geoId/24'}) - - # Verify that the select function works. - for idx, row in enumerate(selected_results): - if idx == 0: - self.assertDictEqual(row, {'?name': 'Kentucky', '?dcid': 'geoId/21'}) - if idx == 1: - self.assertDictEqual(row, {'?name': 'Maryland', '?dcid': 'geoId/24'}) - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_no_rows(self, urlopen): - """ Handles row-less response. """ - # Create a SPARQL query - query_string = (''' -SELECT ?name ?dcid -WHERE { - ?a typeOf Place . - ?a name ?name . - ?a dcid ("geoId/DNE") . - ?a dcid ?dcid -} -''') - # Issue the query - self.assertEqual(dc.query(query_string), []) - -if __name__ == '__main__': - unittest.main() diff --git a/datacommons/test/set_api_key_test.py b/datacommons/test/set_api_key_test.py index 151f8ee9..9e0b002f 100644 --- a/datacommons/test/set_api_key_test.py +++ b/datacommons/test/set_api_key_test.py @@ -15,96 +15,20 @@ Unit tests setting the API Key. """ - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -try: - from unittest.mock import patch -except ImportError: - from mock import patch -import datacommons as dc -import datacommons.utils as utils - -import os -import json import unittest -import six.moves.urllib as urllib - -_TEST_API_KEY = 'TEST-API-KEY' - -_SPARQL_NO_KEY = 'query_no_key' -_SPARQL_W_KEY = 'query_w_key' - -_SEND_REQ_NO_KEY = 'https://send_request_no_key.com' -_SEND_REQ_W_KEY = 'https://send_request_w_key.com' - +import datacommons.key as key -def request_mock(*args, **kwargs): - """ A mock urlopen call sent in the urllib package. """ - # Create the mock response object. - class MockResponse: - def __init__(self, json_data): - self.json_data = json_data - - def read(self): - return self.json_data - - req = args[0] - - if req.get_full_url() == _SEND_REQ_NO_KEY or json.loads(req.data) == {'sparql': _SPARQL_NO_KEY}: - assert 'X-api-key' not in req.headers - else: - assert req.get_header('X-api-key') == _TEST_API_KEY - - if req.get_full_url() == utils._API_ROOT + utils._API_ENDPOINTS['query']: - # Return a dummy response that will parse into [] by query() - return MockResponse(json.dumps({ - 'header': [ - '?name', - '?dcid' - ], - })) - else: - # Return a dummy response that will parse into {} by _send_request() - return MockResponse(json.dumps({'payload': json.dumps({})})) +_KEY = "test-api-key" class TestApiKey(unittest.TestCase): - """Unit test for setting or not setting the API Key.""" - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_query_no_api_key(self, urlopen): - if os.getenv(utils._ENV_VAR_API_KEY): - del os.environ[utils._ENV_VAR_API_KEY] - # Issue a dummy SPARQL query that tells the mock to not expect a key - self.assertEqual(dc.query(_SPARQL_NO_KEY), []) - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_send_request_no_api_key(self, urlopen): - if os.getenv(utils._ENV_VAR_API_KEY): - del os.environ[utils._ENV_VAR_API_KEY] - # Issue a dummy url that tells the mock to not expect a key - self.assertEqual(utils._send_request(_SEND_REQ_NO_KEY, {'foo': ['bar']}), {}) - - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_query_w_api_key(self, urlopen): - """ Handles row-less response. """ - # Set the API key - dc.set_api_key('make_sure_I_am_replaced') - dc.set_api_key(_TEST_API_KEY) - # Issue a dummy SPARQL query that tells the mock to expect a key - self.assertEqual(dc.query(_SPARQL_W_KEY), []) + """Unit test for setting or not setting the API Key.""" - @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) - def test_send_request_w_api_key(self, urlopen): - """ Handles row-less response. """ - # Set the API key - dc.set_api_key(_TEST_API_KEY) - # Issue a dummy url that tells the mock to expect a key - self.assertEqual(utils._send_request(_SEND_REQ_W_KEY), {}) + def test_set_api_key(self): + key.set_api_key(_KEY) + self.assertEqual(key.get_api_key(), _KEY) if __name__ == '__main__': - unittest.main() + unittest.main() diff --git a/datacommons/test/sparql_test.py b/datacommons/test/sparql_test.py new file mode 100644 index 00000000..74ba7243 --- /dev/null +++ b/datacommons/test/sparql_test.py @@ -0,0 +1,131 @@ +# Copyright 2022 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" Data Commons Python API unit tests. + +Unit tests for the SPARQL query wrapper. +""" + +import unittest +from unittest.mock import patch + +import datacommons + +_QUERY1 = (''' +SELECT ?name ?dcid +WHERE { + ?a typeOf Place . + ?a name ?name . + ?a dcid ("geoId/06" "geoId/21" "geoId/24") . + ?a dcid ?dcid +} +''') + +_QUERY2 = (''' +SELECT ?name ?dcid +WHERE { + ?a typeOf Place . + ?a name ?name . + ?a dcid ("geoId/DNE") . + ?a dcid ?dcid +} +''') + + +def _post_mock(path, data): + """ A mock function for _post. """ + if path == "/query" and data['sparql'] == _QUERY1: + return { + 'header': ['?name', '?dcid'], + 'rows': [{ + 'cells': [{ + 'value': 'California' + }, { + 'value': 'geoId/06' + }] + }, { + 'cells': [{ + 'value': 'Kentucky' + }, { + 'value': 'geoId/21' + }] + }, { + 'cells': [{ + 'value': 'Maryland' + }, { + 'value': 'geoId/24' + }] + }] + } + if path == "/query" and data['sparql'] == _QUERY2: + return { + 'header': ['?name', '?dcid'], + } + + # Otherwise, return an empty response and a 404. + return Exception('mock exception') + + +class TestQuery(unittest.TestCase): + """ Unit tests for the Query object. """ + + @patch('datacommons.sparql._post') + def test_rows(self, _post): + """ Sending a valid query returns the correct response. """ + _post.side_effect = _post_mock + # Create the SPARQL query + selector = lambda row: row['?name'] != 'California' + # Issue the query + results = datacommons.query(_QUERY1) + selected_results = datacommons.query(_QUERY2, select=selector) + # Execute the query and iterate through the results. + for idx, row in enumerate(results): + if idx == 0: + self.assertDictEqual(row, { + '?name': 'California', + '?dcid': 'geoId/06' + }) + if idx == 1: + self.assertDictEqual(row, { + '?name': 'Kentucky', + '?dcid': 'geoId/21' + }) + if idx == 2: + self.assertDictEqual(row, { + '?name': 'Maryland', + '?dcid': 'geoId/24' + }) + + # Verify that the select function works. + for idx, row in enumerate(selected_results): + if idx == 0: + self.assertDictEqual(row, { + '?name': 'Kentucky', + '?dcid': 'geoId/21' + }) + if idx == 1: + self.assertDictEqual(row, { + '?name': 'Maryland', + '?dcid': 'geoId/24' + }) + + @patch('datacommons.sparql._post') + def test_no_rows(self, _post): + """ Handles row-less response. """ + _post.side_effect = _post_mock + # Issue the query + self.assertEqual(datacommons.query(_QUERY2), []) + + +if __name__ == '__main__': + unittest.main() diff --git a/datacommons_pandas/__init__.py b/datacommons_pandas/__init__.py index daf71300..0cf17689 100644 --- a/datacommons_pandas/__init__.py +++ b/datacommons_pandas/__init__.py @@ -21,7 +21,7 @@ # TODO: enforce this. https://github.com/datacommonsorg/api-python/issues/149 # ##############################################@################################ # Data Commons SPARQL query support -from datacommons_pandas.query import query +from datacommons_pandas.sparql import query # Data Commons Python API from datacommons_pandas.core import get_property_labels, get_property_values, get_triples diff --git a/datacommons_pandas/sparql.py b/datacommons_pandas/sparql.py new file mode 120000 index 00000000..f0d921b2 --- /dev/null +++ b/datacommons_pandas/sparql.py @@ -0,0 +1 @@ +../datacommons/sparql.py \ No newline at end of file