From a8aec19c1357d8ee5aa9f552d1b640e18811398c Mon Sep 17 00:00:00 2001 From: Antares Chen Date: Mon, 26 Aug 2019 15:59:41 -0400 Subject: [PATCH 1/4] Implemented get_place_obs --- datacommons/__init__.py | 2 +- datacommons/populations.py | 139 +++++++++++++++++- datacommons/test/populations_test.py | 87 ++++++++++- datacommons/utils.py | 3 +- .../_autosummary/datacommons.populations.rst | 1 + .../datacommons.places.get_pop_obs.rst | 6 - .../datacommons.populations.get_place_obs.rst | 6 + .../datacommons.populations.get_pop_obs.rst | 6 + 8 files changed, 231 insertions(+), 19 deletions(-) delete mode 100644 docs/source/_autosummary/datacommons_places/datacommons.places.get_pop_obs.rst create mode 100644 docs/source/_autosummary/datacommons_populations/datacommons.populations.get_place_obs.rst create mode 100644 docs/source/_autosummary/datacommons_populations/datacommons.populations.get_pop_obs.rst diff --git a/datacommons/__init__.py b/datacommons/__init__.py index 1370203a..2d366b7c 100644 --- a/datacommons/__init__.py +++ b/datacommons/__init__.py @@ -18,7 +18,7 @@ # Data Commons Python Client API from datacommons.core import get_property_labels, get_property_values, get_triples from datacommons.places import get_places_in -from datacommons.populations import get_populations, get_observations, get_pop_obs +from datacommons.populations import get_populations, get_observations, get_pop_obs, get_place_obs # Other utilities from .utils import set_api_key, clean_frame, flatten_frame diff --git a/datacommons/populations.py b/datacommons/populations.py index 8e29be7e..15fa2e2a 100644 --- a/datacommons/populations.py +++ b/datacommons/populations.py @@ -338,13 +338,140 @@ def get_pop_obs(dcid): Each :obj:`Observation` is represented by a :code:`dict` that have the keys: - - :code:`measuredProp` - - :code:`observationDate` - - :code:`observationPeriod` (optional) - - :code:`measurementMethod` (optional) + - :code:`measuredProp`: The property measured by the :obj:`Observation`. + - :code:`observationDate`: The date when the :obj:`Observation` was made. + - :code:`observationPeriod` (optional): The period over which the + :obj:`Observation` was made. + - :code:`measurementMethod` (optional): A field providing additional + information on how the :obj:`Observation` was collected. - one of: :code:`measuredValue`, :code:`meanValue`, :code:`maxValue`, - :code:`minValue`, :code:`medianValue` + :code:`minValue`, :code:`medianValue`: Fields that denote values measured + by the :obj:`Observation`. """ url = utils._API_ROOT + utils._API_ENDPOINTS['get_pop_obs'] + '?dcid={}'.format(dcid) - return utils._send_request(url, compress=True, post=False) \ No newline at end of file + return utils._send_request(url, compress=True, post=False) + +def get_place_obs(place_type, population_type, constraining_properties={}): + """ Returns all :obj:`StatisticalPopulation`'s and :obj:`Observation`'s for \ + all places of the given :code:`place_type`. + + Args: + place_type (:obj:`str`): The type of places to query + :obj:`StatisticalPopulation`'s and :obj:`Observation`'s for. + population_type (:obj:`str`): The population type of the + :obj:`StatisticalPopulation` + constraining_properties (:obj:`map` from :obj:`str` to :obj:`str`, optional): + A map from constraining property to the value that the + :obj:`StatisticalPopulation` should be constrained by. + + Returns: + Given a :code:`Place` type (i.e. :obj:`State`, :obj:`County`, :obj:`City`), + a :code:`population_type` (i.e. :obj:`Person`), and optionally a set of + constraining properties defining the `obj`:`StatisticalPopulation`, this + function returns *all* :obj:`StatisticalPopulation`'s and + :obj:`Observation`'s for all places of the given type. See examples for more + details on how the format of the return value is structured. + + Raises: + ValueError: If the payload returned by the Data Commons REST API is + malformed. + + Examples: + We would like to get all :obj:`StatisticalPopulation` and + :obj:`Observations` for all places of type :obj:`City` where the populations + have a population type of :obj:`Person` is specified by the following + constraining properties. + + - Persons should have `age `_ + with value `Years5To17 `_ + - Persons should have `placeOfBirth `_ + with value BornInOtherStateInTheUnitedStates. + + >>> props = { + >>> 'age': 'Years5To17', + >>> 'placeOfBirth': 'BornInOtherStateInTheUnitedStates' + >>> } + >>> get_place_obs('City', 'Person', constraining_properties=props) + [ + 'name': 'Marcus Hook borough', + 'place': 'geoId/4247344', + 'populations': { + 'dc/p/pq6frs32sfvk': { + 'observations': [ + { + 'id': 'dc/o/0005qml1el8qh', + 'marginOfError': 39, + 'measuredProp': 'count', + 'measuredValue': 67, + 'measurementMethod': 'CenusACS5yrSurvey', + 'observationDate': '2014', + 'provenanceId': 'dc/3j71hj1', + 'type': 'Observation' + }, + { + 'id': 'dc/o/wvskpk5vyjkhb', + 'marginOfError': 33, + 'measuredProp': 'count', + 'measuredValue': 58, + 'measurementMethod': 'CenusACS5yrSurvey', + 'observationDate': '2015', + 'provenanceId': 'dc/3j71hj1', + 'type': 'Observation' + }, + { + 'id': 'dc/o/3h44trf3vyrm3', + 'marginOfError': 36, + 'measuredProp': 'count', + 'measuredValue': 42, + 'measurementMethod': 'CenusACS5yrSurvey', + 'observationDate': '2011', + 'provenanceId': 'dc/3j71hj1', + 'type': 'Observation' + }, + # More observations... + ], + 'provenanceId': 'dc/3j71hj1' + }, + # Entries for more cities... + ] + + The value returned by :code:`get_place_obs` is a :obj:`list` of + :obj:`dict`'s. Each dictionary corresponds to :obj:`StatisticalPopulation`'s + matching the given :code:`population_type` and + :code:`constraining_properties` for a single place of the given + :code:`place_type`. The dictionary contains the following keys. + + - :code:`name`: The name of the place being described. + - :code:`place`: The dcid associated with the place being described. + - :code:`populations`: A :obj:`dict` mapping :code:`StatisticalPopulation` + dcids to a a :obj:`dict` with a list of :code:`observations` and a the + :code:`provenanceId` identifying the source that defined the + :code:`StatisticalPopulation`. + + Each :obj:`Observation` is represented by a :obj:`dict` with the following + keys. + + - :code:`id`: The :code:`dcid` identifying the :obj:`Observation`. + - :code:`provenanceId`: The dcid identifying the source that defined this + :obj:`Observation`. + - :code:`type`: The type associated with the :obj:`Observation`. + - :code:`measuredProp`: The property measured by the :obj:`Observation`. + - :code:`observationDate`: The date when the :obj:`Observation` was made. + - :code:`observationPeriod` (optional): The period over which the + :obj:`Observation` was made. + - :code:`measurementMethod` (optional): A field identifying how the + :obj:`Observation` was made + - one of: :code:`measuredValue`, :code:`meanValue`, :code:`maxValue`, + :code:`minValue`, :code:`medianValue`: Fields denoting values measured by + the :obj:`Observation`. + """ + # Create the json payload and send it to the REST API. + pv = [{'property': k, 'value': v} for k, v in constraining_properties.items()] + url = utils._API_ROOT + utils._API_ENDPOINTS['get_place_obs'] + payload = utils._send_request(url, req_json={ + 'place_type': place_type, + 'population_type': population_type, + 'pvs': pv, + }, compress=True) + return payload['places'] diff --git a/datacommons/test/populations_test.py b/datacommons/test/populations_test.py index d332e8b8..2717e891 100644 --- a/datacommons/test/populations_test.py +++ b/datacommons/test/populations_test.py @@ -94,6 +94,8 @@ def json(self): # provided to the method. res_json = json.dumps([]) return MockResponse({'payload': res_json}, 200) + + # Mock responses for post requests to get_observations if args[0] == utils._API_ROOT + utils._API_ENDPOINTS['get_observations']\ and req['measured_property'] == 'count'\ and req['stats_type'] == 'measuredValue'\ @@ -132,6 +134,40 @@ def json(self): res_json = json.dumps([]) return MockResponse({'payload': res_json}, 200) + # Mock responses for post requests to get_place_obs + if args[0] == utils._API_ROOT + utils._API_ENDPOINTS['get_place_obs']\ + and req['place_type'] == 'City'\ + and req['population_type'] == 'Person'\ + and req['pvs'] == constrained_props: + res_json = json.dumps({ + 'places': [ + { + 'name': 'Marcus Hook borough', + 'place': 'geoId/4247344', + 'populations': { + 'dc/p/pq6frs32sfvk': { + 'observations': [ + { + 'id': 'dc/o/0005qml1el8qh', + 'marginOfError': 39, + 'measuredProp': 'count', + 'measuredValue': 67, + 'measurementMethod': 'CenusACS5yrSurvey', + 'observationDate': '2014', + 'provenanceId': 'dc/3j71hj1', + 'type': 'Observation' + } + ], + 'provenanceId': 'dc/3j71hj1' + } + } + } + ] + }) + return MockResponse({ + 'payload': base64.b64encode(zlib.compress(res_json.encode('utf-8'))) + }, 200) + # Otherwise, return an empty response and a 404. return MockResponse({}, 404) @@ -189,7 +225,9 @@ def json(self): } } }) - return MockResponse({'payload': base64.b64encode(zlib.compress(res_json.encode('utf-8')))}, 200) + return MockResponse({ + 'payload': base64.b64encode(zlib.compress(res_json.encode('utf-8'))) + }, 200) # Otherwise, return an empty response and a 404. return MockResponse({}, 404) @@ -416,7 +454,7 @@ def test_series_no_dcids(self, post_mock): assert_series_equal(actual, expected) class TestGetPopObs(unittest.TestCase): - """ Unit stests for get_pop_Obs. """ + """ Unit tests for get_pop_obs. """ @mock.patch('requests.get', side_effect=get_request_mock) def test_valid_dcid(self, get_mock): @@ -424,9 +462,9 @@ def test_valid_dcid(self, get_mock): # Set the API key dc.set_api_key('TEST-API-KEY') - # Call get_places_in - popobs = dc.get_pop_obs('geoId/06085') - self.assertDictEqual(popobs, { + # Call get_pop_obs + pop_obs = dc.get_pop_obs('geoId/06085') + self.assertDictEqual(pop_obs, { 'name': 'Mountain View', 'placeType': 'City', 'populations': { @@ -460,6 +498,45 @@ def test_valid_dcid(self, get_mock): } }) +class TestGetPlaceObs(unittest.TestCase): + """ Unit tests for get_place_obs. """ + + @mock.patch('requests.post', side_effect=post_request_mock) + def test_valid(self, post_mock): + """ Calling get_place_obs with valid parameters returns a valid result. """ + # Set the API key + dc.set_api_key('TEST-API-KEY') + + # Call get_place_obs + pvs = { + 'placeOfBirth': 'BornInOtherStateInTheUnitedStates', + 'age': 'Years5To17' + } + place_obs = dc.get_place_obs('City', 'Person', constraining_properties=pvs) + self.assertListEqual(place_obs, [ + { + 'name': 'Marcus Hook borough', + 'place': 'geoId/4247344', + 'populations': { + 'dc/p/pq6frs32sfvk': { + 'observations': [ + { + 'id': 'dc/o/0005qml1el8qh', + 'marginOfError': 39, + 'measuredProp': 'count', + 'measuredValue': 67, + 'measurementMethod': 'CenusACS5yrSurvey', + 'observationDate': '2014', + 'provenanceId': 'dc/3j71hj1', + 'type': 'Observation' + } + ], + 'provenanceId': 'dc/3j71hj1' + } + } + } + ]) + if __name__ == '__main__': unittest.main() diff --git a/datacommons/utils.py b/datacommons/utils.py index 28159fde..cc8acd28 100644 --- a/datacommons/utils.py +++ b/datacommons/utils.py @@ -45,7 +45,8 @@ 'get_places_in': '/node/places-in', 'get_populations': '/node/populations', 'get_observations': '/node/observations', - 'get_pop_obs': '/bulk/pop-obs' + 'get_pop_obs': '/bulk/pop-obs', + 'get_place_obs': '/bulk/place-obs', } # The default value to limit to diff --git a/docs/source/_autosummary/datacommons.populations.rst b/docs/source/_autosummary/datacommons.populations.rst index 370185e7..97774221 100644 --- a/docs/source/_autosummary/datacommons.populations.rst +++ b/docs/source/_autosummary/datacommons.populations.rst @@ -11,3 +11,4 @@ datacommons.populations get_observations get_populations get_pop_obs + get_place_obs diff --git a/docs/source/_autosummary/datacommons_places/datacommons.places.get_pop_obs.rst b/docs/source/_autosummary/datacommons_places/datacommons.places.get_pop_obs.rst deleted file mode 100644 index 2bf4ebde..00000000 --- a/docs/source/_autosummary/datacommons_places/datacommons.places.get_pop_obs.rst +++ /dev/null @@ -1,6 +0,0 @@ -datacommons.places.get\_pop\_obs -================================ - -.. currentmodule:: datacommons.places - -.. autofunction:: get_pop_obs \ No newline at end of file diff --git a/docs/source/_autosummary/datacommons_populations/datacommons.populations.get_place_obs.rst b/docs/source/_autosummary/datacommons_populations/datacommons.populations.get_place_obs.rst new file mode 100644 index 00000000..166a7fe2 --- /dev/null +++ b/docs/source/_autosummary/datacommons_populations/datacommons.populations.get_place_obs.rst @@ -0,0 +1,6 @@ +datacommons.populations.get\_place\_obs +======================================== + +.. currentmodule:: datacommons.populations + +.. autofunction:: get_place_obs diff --git a/docs/source/_autosummary/datacommons_populations/datacommons.populations.get_pop_obs.rst b/docs/source/_autosummary/datacommons_populations/datacommons.populations.get_pop_obs.rst new file mode 100644 index 00000000..dd018989 --- /dev/null +++ b/docs/source/_autosummary/datacommons_populations/datacommons.populations.get_pop_obs.rst @@ -0,0 +1,6 @@ +datacommons.populations.get\_pop\_obs +======================================== + +.. currentmodule:: datacommons.populations + +.. autofunction:: get_pop_obs From b273f12c1915a4bef882337b77f01562b64a3d0b Mon Sep 17 00:00:00 2001 From: Antares Chen Date: Mon, 26 Aug 2019 17:29:24 -0400 Subject: [PATCH 2/4] Reimplemented query as a function instead of a class. --- datacommons/__init__.py | 2 +- datacommons/populations.py | 6 +- datacommons/query.py | 202 +++++++----------- datacommons/test/query_test.py | 11 +- .../source/_autosummary/datacommons.query.rst | 4 +- .../datacommons.query.Query.rst | 23 -- .../datacommons.query.query.rst | 6 + 7 files changed, 100 insertions(+), 154 deletions(-) delete mode 100644 docs/source/_autosummary/datacommons_query/datacommons.query.Query.rst create mode 100644 docs/source/_autosummary/datacommons_query/datacommons.query.query.rst diff --git a/datacommons/__init__.py b/datacommons/__init__.py index 2d366b7c..72a819d5 100644 --- a/datacommons/__init__.py +++ b/datacommons/__init__.py @@ -13,7 +13,7 @@ # limitations under the License. # Data Commons SPARQL query support -from datacommons.query import Query +from datacommons.query import query # Data Commons Python Client API from datacommons.core import get_property_labels, get_property_values, get_triples diff --git a/datacommons/populations.py b/datacommons/populations.py index 15fa2e2a..fa2e1361 100644 --- a/datacommons/populations.py +++ b/datacommons/populations.py @@ -389,9 +389,9 @@ def get_place_obs(place_type, population_type, constraining_properties={}): with value BornInOtherStateInTheUnitedStates. >>> props = { - >>> 'age': 'Years5To17', - >>> 'placeOfBirth': 'BornInOtherStateInTheUnitedStates' - >>> } + ... 'age': 'Years5To17', + ... 'placeOfBirth': 'BornInOtherStateInTheUnitedStates' + ... } >>> get_place_obs('City', 'Person', constraining_properties=props) [ 'name': 'Marcus Hook borough', diff --git a/datacommons/query.py b/datacommons/query.py index 29480d23..6754c265 100644 --- a/datacommons/query.py +++ b/datacommons/query.py @@ -13,8 +13,8 @@ # limitations under the License. """ Data Commons Python Client API Query Module. -Implements a wrapper object for sending SPARQL queries to the Data Commons -knowledge graph. +Implements functions for sending graph queries to the Data Commons knowledge +graph. """ from __future__ import absolute_import @@ -26,132 +26,92 @@ import os import requests -# ----------------------------------------------------------------------------- -# Query Class -# ----------------------------------------------------------------------------- +# ----------------------------- WRAPPER FUNCTIONS ----------------------------- -class Query(object): - """ A wrapper object that performs a SPARQL query on the Data Commons graph. +def query(query_string, select=None): + """ Returns the results of executing a SPARQL query on the Data Commons graph. Args: - **kwargs: Valid keyword arguments include the following. At least one - valid argument must be provided. - - - `sparql` (:obj:`str`): The SPARQL query string. + query_string (:obj:`str`): The SPARQL query string. + select (:obj:`func` accepting a `row` in the query result): A function + that returns true if and only if a row in the query results should be + kept. The argument for this function is a row returned by :code:`query`. + More precisely, it is a :obj:`dict` from query variable to its value in a + given row. + + Yields: + Rows from executing the query where each row is a :obj:`dict` mapping + query variable to its value in the row. If `select` is not `None`, then + the row is returned if and only if `select` returns :obj:`True`. Raises: - ValueError: If an invalid keyword argument is provided. + ValueError: If the payload returned by the Data Commons REST API is + malformed. - Example: - To construct a :obj:`Query` object, do the following. + Examples: + We would like to query for the name associated with three states identified + by their dcids + `California `_, + `Kentucky `_, and + `Maryland `_. >>> query_str = ''' - ...SELECT ?name ?dcid - ...WHERE { - ... ?a typeOf Place . - ... ?a name ?name . - ... ?a dcid ("geoId/06" "geoId/21" "geoId/24") . - ... ?a dcid ?dcid - ...} - ...''' - >>> query = dc.Query(sparql=query_str) + ... SELECT ?name ?dcid + ... WHERE { + ... ?a typeOf Place . + ... ?a name ?name . + ... ?a dcid ("geoId/06" "geoId/21" "geoId/24") . + ... ?a dcid ?dcid + ... } + ... ''' + >>> result = query(query_str) + >>> for r in result: + ... print(r) + {"?name": "Maryland", "?dcid": "geoId/24"} + {"?name": "Kentucky", "?dcid": "geoId/21"} + {"?name": "California", "?dcid": "geoId/06"} + + Optionally, we can specify which rows are returned by setting :code:`select` + like so. The following returns all rows where the name is "Maryland". + + >>> selector = lambda row: row['?name'] == 'Maryland' + >>> result = query(query_str, select=selector) + >>> for r in result: + ... print(r) + {"?name": "Maryland", "?dcid": "geoId/24"} """ - - # Valid query languages - _SPARQL_LANG = 'sparql' - _VALID_LANG = [_SPARQL_LANG] - - def __init__(self, **kwargs): - """ Initializes a SPARQL query targeting the Data Commons graph. """ - if self._SPARQL_LANG in kwargs: - self._query = kwargs[self._SPARQL_LANG] - self._language = self._SPARQL_LANG - self._result = None - else: - lang_str = ', '.join(self._VALID_LANG) - raise ValueError( - 'Must provide one of the following languages: {}'.format(lang_str)) - - def rows(self, select=None): - """ Returns the result of executing the query as an iterator over all rows. - - Args: - select (:obj:`func` accepting a `row` in the query result): A function - that returns true if and only if a row in the query results should be - kept. The argument for this function is a :obj:`dict` from query - variable to its value in a given row. - - Yields: - Rows from executing the query where each row is a :obj:`dict` mapping - query variable to its value in the row. If `select` is not `None`, then - the row is returned if and only if `select` returns :obj:`True`. - - Example: - The following query asks for names of three states: - `California `_, - `Kentucky `_, and - `Maryland `_. - - >>> query_str = ''' - ... SELECT ?name ?dcid - ... WHERE { - ... ?a typeOf Place . - ... ?a name ?name . - ... ?a dcid ("geoId/06" "geoId/21" "geoId/24") . - ... ?a dcid ?dcid - ... } - ... ''' - >>> query = dc.Query(sparql=query_str) - >>> for r in query.rows(): - ... print(r) - {"?name": "Maryland", "?dcid": "geoId/24"} - {"?name": "Kentucky", "?dcid": "geoId/21"} - {"?name": "California", "?dcid": "geoId/06"} - """ - # Execute the query if the results are empty. - if not self._result: - self._execute() - - # Iterate through the query results - header = self._result['header'] - for row in self._result['rows']: - # Construct the map from query variable to cell value. - row_map = {} - for idx, cell in enumerate(row['cells']): - if idx > len(header): - raise RuntimeError( - 'Query error: unexpected cell {}'.format(cell)) - if 'value' not in cell: - raise RuntimeError( - 'Query error: cell missing value {}'.format(cell)) - cell_var = header[idx] - row_map[cell_var] = cell['value'] - - # Yield the row if it is selected - if select is None or select(row_map): - yield row_map - - def _execute(self): - """ Execute the query. - - Raises: - RuntimeError: on query failure (see error hint). - """ - # Get the API Key and set the headers - if not os.environ.get(_ENV_VAR_API_KEY, None): - raise ValueError( - 'Request error: Must set an API key before using the API!') - headers = {'x-api-key': os.environ[_ENV_VAR_API_KEY]} - - # Create the query request. - if self._language == self._SPARQL_LANG: - payload = {'sparql': self._query} - url = _API_ROOT + _API_ENDPOINTS['query'] - res = requests.post(url, json=payload, headers=headers) - - # Verify then store the results. - res_json = res.json() - if 'message' in res_json: - raise RuntimeError('Query error: {}'.format(res_json['message'])) - self._result = res.json() + # Get the API Key and perform the POST request. + if not os.environ.get(_ENV_VAR_API_KEY, None): + raise ValueError( + 'Request error: Must set an API key before using the API!') + url = _API_ROOT + _API_ENDPOINTS['query'] + res = requests.post(url, json={'sparql': query_string}, headers={ + 'x-api-key': os.environ[_ENV_VAR_API_KEY] + }) + + # Verify then store the results. + if res.status_code != 200: + raise ValueError( + 'Response error: An HTTP {} code was returned by the mixer. Printing ' + 'response\n\n{}'.format(res.status_code , res.text)) + res_json = res.json() + + # Iterate through the query results + header = res_json['header'] + for row in res_json['rows']: + # Construct the map from query variable to cell value. + row_map = {} + for idx, cell in enumerate(row['cells']): + if idx > len(header): + raise ValueError( + 'Query error: unexpected cell {}'.format(cell)) + if 'value' not in cell: + raise ValueError( + 'Query error: cell missing value {}'.format(cell)) + cell_var = header[idx] + row_map[cell_var] = cell['value'] + + # Yield the row if it is selected + if select is None or select(row_map): + yield row_map diff --git a/datacommons/test/query_test.py b/datacommons/test/query_test.py index 486a3af1..782af70f 100644 --- a/datacommons/test/query_test.py +++ b/datacommons/test/query_test.py @@ -121,10 +121,14 @@ def test_rows(self, post_mock): ?a dcid ?dcid } ''') - query = dc.Query(sparql=query_string) + selector = lambda row: row['?name'] != 'California' + + # Issue the query + results = dc.query(query_string) + selected_results = dc.query(query_string, select=selector) # Execute the query and iterate through the results. - for idx, row in enumerate(query.rows()): + for idx, row in enumerate(results): if idx == 0: self.assertDictEqual(row, {'?name': 'California', '?dcid': 'geoId/06'}) if idx == 1: @@ -133,8 +137,7 @@ def test_rows(self, post_mock): self.assertDictEqual(row, {'?name': 'Maryland', '?dcid': 'geoId/24'}) # Verify that the select function works. - selector = lambda row: row['?name'] != 'California' - for idx, row in enumerate(query.rows(select=selector)): + for idx, row in enumerate(selected_results): if idx == 0: self.assertDictEqual(row, {'?name': 'Kentucky', '?dcid': 'geoId/21'}) if idx == 1: diff --git a/docs/source/_autosummary/datacommons.query.rst b/docs/source/_autosummary/datacommons.query.rst index a89ec368..58478d98 100644 --- a/docs/source/_autosummary/datacommons.query.rst +++ b/docs/source/_autosummary/datacommons.query.rst @@ -3,9 +3,9 @@ datacommons.query .. automodule:: datacommons.query - .. rubric:: Classes + .. rubric:: Functions .. autosummary:: :toctree: datacommons_query - Query + query diff --git a/docs/source/_autosummary/datacommons_query/datacommons.query.Query.rst b/docs/source/_autosummary/datacommons_query/datacommons.query.Query.rst deleted file mode 100644 index 2b4b9ff0..00000000 --- a/docs/source/_autosummary/datacommons_query/datacommons.query.Query.rst +++ /dev/null @@ -1,23 +0,0 @@ -datacommons.query.Query -======================= - -.. currentmodule:: datacommons.query - -.. autoclass:: Query - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~Query.__init__ - ~Query.rows - - - - - - \ No newline at end of file diff --git a/docs/source/_autosummary/datacommons_query/datacommons.query.query.rst b/docs/source/_autosummary/datacommons_query/datacommons.query.query.rst new file mode 100644 index 00000000..5b48b22e --- /dev/null +++ b/docs/source/_autosummary/datacommons_query/datacommons.query.query.rst @@ -0,0 +1,6 @@ +datacommons.query.query +========================================= + +.. currentmodule:: datacommons.query + +.. autofunction:: query From 0ed071aff825169666027f3c7330dcb9a6ba2de3 Mon Sep 17 00:00:00 2001 From: Antares Chen Date: Tue, 27 Aug 2019 13:39:18 -0400 Subject: [PATCH 3/4] query now returns a list. Amended docstrings. --- datacommons/populations.py | 21 ++++++++++---------- datacommons/query.py | 29 ++++++++++++++++------------ datacommons/test/populations_test.py | 4 ++-- 3 files changed, 30 insertions(+), 24 deletions(-) diff --git a/datacommons/populations.py b/datacommons/populations.py index fa2e1361..7517ae89 100644 --- a/datacommons/populations.py +++ b/datacommons/populations.py @@ -344,10 +344,10 @@ def get_pop_obs(dcid): :obj:`Observation` was made. - :code:`measurementMethod` (optional): A field providing additional information on how the :obj:`Observation` was collected. - - one of: :code:`measuredValue`, :code:`meanValue`, :code:`maxValue`, - :code:`minValue`, :code:`medianValue`: Fields that denote values measured - by the :obj:`Observation`. - + - Additional fields that denote values measured by the :obj:`Observation`. + These may include the following: :code:`measuredValue`, :code:`meanValue`, + :code:`medianValue`, :code:`maxValue`, :code:`minValue`, :code:`sumValue`, + :code:`marginOfError`, :code:`stdError`, :code:`meanStdError`, and others. """ url = utils._API_ROOT + utils._API_ENDPOINTS['get_pop_obs'] + '?dcid={}'.format(dcid) return utils._send_request(url, compress=True, post=False) @@ -404,7 +404,7 @@ def get_place_obs(place_type, population_type, constraining_properties={}): 'marginOfError': 39, 'measuredProp': 'count', 'measuredValue': 67, - 'measurementMethod': 'CenusACS5yrSurvey', + 'measurementMethod': 'CensusACS5yrSurvey', 'observationDate': '2014', 'provenanceId': 'dc/3j71hj1', 'type': 'Observation' @@ -414,7 +414,7 @@ def get_place_obs(place_type, population_type, constraining_properties={}): 'marginOfError': 33, 'measuredProp': 'count', 'measuredValue': 58, - 'measurementMethod': 'CenusACS5yrSurvey', + 'measurementMethod': 'CensusACS5yrSurvey', 'observationDate': '2015', 'provenanceId': 'dc/3j71hj1', 'type': 'Observation' @@ -424,7 +424,7 @@ def get_place_obs(place_type, population_type, constraining_properties={}): 'marginOfError': 36, 'measuredProp': 'count', 'measuredValue': 42, - 'measurementMethod': 'CenusACS5yrSurvey', + 'measurementMethod': 'CensusACS5yrSurvey', 'observationDate': '2011', 'provenanceId': 'dc/3j71hj1', 'type': 'Observation' @@ -462,9 +462,10 @@ def get_place_obs(place_type, population_type, constraining_properties={}): :obj:`Observation` was made. - :code:`measurementMethod` (optional): A field identifying how the :obj:`Observation` was made - - one of: :code:`measuredValue`, :code:`meanValue`, :code:`maxValue`, - :code:`minValue`, :code:`medianValue`: Fields denoting values measured by - the :obj:`Observation`. + - Additional fields that denote values measured by the :obj:`Observation`. + These may include the following: :code:`measuredValue`, :code:`meanValue`, + :code:`medianValue`, :code:`maxValue`, :code:`minValue`, :code:`sumValue`, + :code:`marginOfError`, :code:`stdError`, :code:`meanStdError`, and others. """ # Create the json payload and send it to the REST API. pv = [{'property': k, 'value': v} for k, v in constraining_properties.items()] diff --git a/datacommons/query.py b/datacommons/query.py index 6754c265..e9b0b26c 100644 --- a/datacommons/query.py +++ b/datacommons/query.py @@ -34,16 +34,19 @@ def query(query_string, select=None): Args: query_string (:obj:`str`): The SPARQL query string. - select (:obj:`func` accepting a `row` in the query result): A function - that returns true if and only if a row in the query results should be - kept. The argument for this function is a row returned by :code:`query`. - More precisely, it is a :obj:`dict` from query variable to its value in a - given row. - - Yields: - Rows from executing the query where each row is a :obj:`dict` mapping - query variable to its value in the row. If `select` is not `None`, then - the row is returned if and only if `select` returns :obj:`True`. + select (:obj:`func` accepting a row in the query result): A function that + selects rows to be returned by :code:`query`. This function accepts a row + in the results of executing :code:`query_string` and return True if and + only if the row is to be returned by :code:`query`. The row passed in as + an argument is represented as a :obj:`dict` that maps a query variable in + :code:`query_string` to its value in the given row. + + Returns: + A table, represented as a :obj:`list` of rows, resulting from executing the + given SPARQL query. Each row is a :obj:`dict` mapping query variable to its + value in the row. If `select` is not `None`, then a row is included in the + returned :obj:`list` if and only if `select` returns :obj:`True` for that + row. Raises: ValueError: If the payload returned by the Data Commons REST API is @@ -99,6 +102,7 @@ def query(query_string, select=None): # Iterate through the query results header = res_json['header'] + result_rows = [] for row in res_json['rows']: # Construct the map from query variable to cell value. row_map = {} @@ -112,6 +116,7 @@ def query(query_string, select=None): cell_var = header[idx] row_map[cell_var] = cell['value'] - # Yield the row if it is selected + # Add the row to the result rows if it is selected if select is None or select(row_map): - yield row_map + result_rows.append(row_map) + return result_rows diff --git a/datacommons/test/populations_test.py b/datacommons/test/populations_test.py index 2717e891..4adcc107 100644 --- a/datacommons/test/populations_test.py +++ b/datacommons/test/populations_test.py @@ -152,7 +152,7 @@ def json(self): 'marginOfError': 39, 'measuredProp': 'count', 'measuredValue': 67, - 'measurementMethod': 'CenusACS5yrSurvey', + 'measurementMethod': 'CensusACS5yrSurvey', 'observationDate': '2014', 'provenanceId': 'dc/3j71hj1', 'type': 'Observation' @@ -525,7 +525,7 @@ def test_valid(self, post_mock): 'marginOfError': 39, 'measuredProp': 'count', 'measuredValue': 67, - 'measurementMethod': 'CenusACS5yrSurvey', + 'measurementMethod': 'CensusACS5yrSurvey', 'observationDate': '2014', 'provenanceId': 'dc/3j71hj1', 'type': 'Observation' From be3c3373f7d1fc596f3ab8492c012c17b88eb0af Mon Sep 17 00:00:00 2001 From: Antares Chen Date: Tue, 27 Aug 2019 14:09:44 -0400 Subject: [PATCH 4/4] Fixed docstring typo --- datacommons/populations.py | 79 ++++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 38 deletions(-) diff --git a/datacommons/populations.py b/datacommons/populations.py index 7517ae89..521f03c3 100644 --- a/datacommons/populations.py +++ b/datacommons/populations.py @@ -394,44 +394,47 @@ def get_place_obs(place_type, population_type, constraining_properties={}): ... } >>> get_place_obs('City', 'Person', constraining_properties=props) [ - 'name': 'Marcus Hook borough', - 'place': 'geoId/4247344', - 'populations': { - 'dc/p/pq6frs32sfvk': { - 'observations': [ - { - 'id': 'dc/o/0005qml1el8qh', - 'marginOfError': 39, - 'measuredProp': 'count', - 'measuredValue': 67, - 'measurementMethod': 'CensusACS5yrSurvey', - 'observationDate': '2014', - 'provenanceId': 'dc/3j71hj1', - 'type': 'Observation' - }, - { - 'id': 'dc/o/wvskpk5vyjkhb', - 'marginOfError': 33, - 'measuredProp': 'count', - 'measuredValue': 58, - 'measurementMethod': 'CensusACS5yrSurvey', - 'observationDate': '2015', - 'provenanceId': 'dc/3j71hj1', - 'type': 'Observation' - }, - { - 'id': 'dc/o/3h44trf3vyrm3', - 'marginOfError': 36, - 'measuredProp': 'count', - 'measuredValue': 42, - 'measurementMethod': 'CensusACS5yrSurvey', - 'observationDate': '2011', - 'provenanceId': 'dc/3j71hj1', - 'type': 'Observation' - }, - # More observations... - ], - 'provenanceId': 'dc/3j71hj1' + { + 'name': 'Marcus Hook borough', + 'place': 'geoId/4247344', + 'populations': { + 'dc/p/pq6frs32sfvk': { + 'observations': [ + { + 'id': 'dc/o/0005qml1el8qh', + 'marginOfError': 39, + 'measuredProp': 'count', + 'measuredValue': 67, + 'measurementMethod': 'CensusACS5yrSurvey', + 'observationDate': '2014', + 'provenanceId': 'dc/3j71hj1', + 'type': 'Observation' + }, + { + 'id': 'dc/o/wvskpk5vyjkhb', + 'marginOfError': 33, + 'measuredProp': 'count', + 'measuredValue': 58, + 'measurementMethod': 'CensusACS5yrSurvey', + 'observationDate': '2015', + 'provenanceId': 'dc/3j71hj1', + 'type': 'Observation' + }, + { + 'id': 'dc/o/3h44trf3vyrm3', + 'marginOfError': 36, + 'measuredProp': 'count', + 'measuredValue': 42, + 'measurementMethod': 'CensusACS5yrSurvey', + 'observationDate': '2011', + 'provenanceId': 'dc/3j71hj1', + 'type': 'Observation' + }, + # More observations... + ], + 'provenanceId': 'dc/3j71hj1' + } + } }, # Entries for more cities... ]