From aa3879f24defda81fd6f8e7e19d50fe5ecbdc9a4 Mon Sep 17 00:00:00 2001 From: tjann <18621425+tjann@users.noreply.github.com> Date: Wed, 19 Aug 2020 22:53:39 -0700 Subject: [PATCH 01/12] Add get_stat_value to python API. --- datacommons/__init__.py | 1 + datacommons/examples/statvar.py | 44 +++++++++++++++ datacommons/statvar.py | 93 ++++++++++++++++++++++++++++++++ datacommons/test/statvar_test.py | 89 ++++++++++++++++++++++++++++++ datacommons/utils.py | 2 + 5 files changed, 229 insertions(+) create mode 100644 datacommons/examples/statvar.py create mode 100644 datacommons/statvar.py create mode 100644 datacommons/test/statvar_test.py diff --git a/datacommons/__init__.py b/datacommons/__init__.py index 59a8cfc2..c46cc552 100644 --- a/datacommons/__init__.py +++ b/datacommons/__init__.py @@ -19,6 +19,7 @@ from datacommons.core import get_property_labels, get_property_values, get_triples from datacommons.places import get_places_in, get_related_places, get_stats from datacommons.populations import get_populations, get_observations, get_pop_obs, get_place_obs +from datacommons.statvar import get_stat_value # Other utilities from .utils import set_api_key diff --git a/datacommons/examples/statvar.py b/datacommons/examples/statvar.py new file mode 100644 index 00000000..612ef080 --- /dev/null +++ b/datacommons/examples/statvar.py @@ -0,0 +1,44 @@ +# Copyright 2020 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" Data Commons Python API examples. + +Basic demo for get_stat_value. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sys +sys.path.append('../') +sys.path.append('../../') +import datacommons as dc + + +def main(): + # Dcid for Santa Clara County. + sc = 'geoId/06085' + + # Get population. + print('Get Count_Person') + print(dc.get_stat_value(sc, 'Count_Person')) + + # TODO(boxu/tjann): better error msgs starting from REST + # e.g. stat/value?place=geoId/x06&stat_var=Count_Person&date=2010 + print('Get Count_Person Fail') + print(dc.get_stat_value('bogus_id', 'Count_Person')) + + +if __name__ == '__main__': + main() diff --git a/datacommons/statvar.py b/datacommons/statvar.py new file mode 100644 index 00000000..da6d5a5b --- /dev/null +++ b/datacommons/statvar.py @@ -0,0 +1,93 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Data Commons Python API Stat Module. + +Provides functions for getting data on StatVars from Data Commons Graph. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from datacommons.utils import _API_ROOT, _API_ENDPOINTS, _ENV_VAR_API_KEY + +import json +import os +import six.moves.urllib.error +import six.moves.urllib.request + +import datacommons.utils as utils + + +def get_stat_value(place, + stat_var, + date=None, + measurement_method=None, + observation_period=None, + unit=None, + scaling_factor=None): + """Returns a value for :code:`place` based on the :code:`stat_var`. + + Args: + place (:obj:`iterable` of :obj:`str`): The dcid of `Place` to query for. + stat_var (:obj:`str`): The dcid of the `StatisticalVariable`. + obs_date (:obj:`str`): Optional, the preferred date of observation + in ISO 8601 format. If not specified, returns the latest observation. + measurement_method (:obj:`str`): Optional, the dcid of the preferred + `measurementMethod` value. + observation_period (:obj:`str`): Optional, the preferred + `observationPeriod` value. + unit (:obj:`str`): Optional, the dcid of the preferred `unit` value. + scaling_factor (:obj:`int`): Optional, the preferred `scalingFactor` value. + Returns: + A :obj:`double` the value of :code:`stat_var` for :code:`place`, filtered + by optional args. + + Raises: + ValueError: If the payload returned by the Data Commons REST API is + malformed. + + Examples: + >>> get_stat_value("geoId/05", "Count_Person") + 366331 + """ + url = utils._API_ROOT + utils._API_ENDPOINTS['get_stat_value'] + url += '?place={}&stat_var={}'.format(place, stat_var) + if date: + url += '&date={}'.format(date) + if measurement_method: + url += '&measurement_method={}'.format(measurement_method) + if observation_period: + url += '&observation_period={}'.format(observation_period) + if unit: + url += '&unit={}'.format(unit) + if scaling_factor: + url += '&scaling_factor={}'.format(scaling_factor) + + headers = {'Content-Type': 'application/json'} + if os.environ.get(_ENV_VAR_API_KEY): + headers['x-api-key'] = os.environ[_ENV_VAR_API_KEY] + + req = six.moves.urllib.request.Request(url, headers=headers) + + try: + res = six.moves.urllib.request.urlopen(req) + except six.moves.urllib.error.HTTPError as e: + raise ValueError('Response error {}:\n{}'.format(e.code, e.read())) + + # Verify then store the results. + res_json = json.loads(res.read()) + if 'value' not in res_json: + raise ValueError('No value in response.') + return res_json['value'] diff --git a/datacommons/test/statvar_test.py b/datacommons/test/statvar_test.py new file mode 100644 index 00000000..5cd05194 --- /dev/null +++ b/datacommons/test/statvar_test.py @@ -0,0 +1,89 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" Data Commons Python API unit tests. + +Unit tests for StatVar methods in the Data Commons Python API. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from unittest import mock + +import datacommons as dc +import datacommons.utils as utils +import json +import unittest +import six.moves.urllib as urllib + + +def request_mock(*args, **kwargs): + """A mock urlopen requests sent in the requests package.""" + + # Create the mock response object. + class MockResponse: + + def __init__(self, json_data): + self.json_data = json_data + + def read(self): + return self.json_data + + req = args[0] + + # Mock responses for urlopen requests to get_stat_value. + stat_value_url_base = utils._API_ROOT + utils._API_ENDPOINTS[ + 'get_stat_value'] + if req.full_url == stat_value_url_base + '?place=geoId/06&stat_var=Count_Person': + # Response returned when querying with basic args. + return MockResponse(json.dumps({'value': 123})) + if req.full_url == stat_value_url_base + '?place=geoId/06&stat_var=Count_Person&date=2010': + # Response returned when querying with observationDate. + return MockResponse(json.dumps({'value': 133})) + if (req.full_url == stat_value_url_base + + '?place=geoId/06&stat_var=Count_Person&' + + 'date=2010&measurement_method=CensusPEPSurvey&observation_period=P1Y&' + + 'unit=RealPeople&scaling_factor=100'): + # Response returned when querying with observationDate. + return MockResponse(json.dumps({'value': 103})) + # Otherwise, return an empty response and a 404. + return urllib.error.HTTPError + + +class TestGetStatValue(unittest.TestCase): + """Unit tests for get_stat_value.""" + + @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + def test_basic(self, urlopen): + """Calling get_stat_value with minimal and proper args.""" + # Call get_stat_value + + self.assertEqual(dc.get_stat_value('geoId/06', 'Count_Person'), 123) + + @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + def test_opt_args(self, urlopen): + """Calling get_stat_value with optional args returns specific data.""" + # Call get_stat_value for specific obs + self.assertEqual(dc.get_stat_value('geoId/06', 'Count_Person', '2010'), + 133) + + # Call get_stat_value with all optional args + stat = dc.get_stat_value('geoId/06', 'Count_Person', '2010', + 'CensusPEPSurvey', 'P1Y', 'RealPeople', 100) + self.assertEqual(stat, 103) + + +if __name__ == '__main__': + unittest.main() diff --git a/datacommons/utils.py b/datacommons/utils.py index 17f75530..dfce278f 100644 --- a/datacommons/utils.py +++ b/datacommons/utils.py @@ -49,6 +49,8 @@ 'get_pop_obs': '/bulk/pop-obs', 'get_place_obs': '/bulk/place-obs', 'get_stats': '/bulk/stats', + 'get_stat_value': '/stat/value', + } # The default value to limit to From 4650f104bd67c34827059fcded7ab106f0fa4bd1 Mon Sep 17 00:00:00 2001 From: tjann <18621425+tjann@users.noreply.github.com> Date: Wed, 19 Aug 2020 23:35:41 -0700 Subject: [PATCH 02/12] Add get_stat_series function. --- datacommons/__init__.py | 2 +- datacommons/examples/statvar.py | 22 +++++++-- datacommons/statvar.py | 78 +++++++++++++++++++++++++++----- datacommons/test/statvar_test.py | 51 +++++++++++++++++++-- datacommons/utils.py | 2 +- 5 files changed, 133 insertions(+), 22 deletions(-) diff --git a/datacommons/__init__.py b/datacommons/__init__.py index c46cc552..4172c6d4 100644 --- a/datacommons/__init__.py +++ b/datacommons/__init__.py @@ -19,7 +19,7 @@ from datacommons.core import get_property_labels, get_property_values, get_triples from datacommons.places import get_places_in, get_related_places, get_stats from datacommons.populations import get_populations, get_observations, get_pop_obs, get_place_obs -from datacommons.statvar import get_stat_value +from datacommons.statvar import get_stat_value, get_stat_series # Other utilities from .utils import set_api_key diff --git a/datacommons/examples/statvar.py b/datacommons/examples/statvar.py index 612ef080..1327ab24 100644 --- a/datacommons/examples/statvar.py +++ b/datacommons/examples/statvar.py @@ -31,13 +31,25 @@ def main(): sc = 'geoId/06085' # Get population. - print('Get Count_Person') + print('get_stat_value Count_Person') print(dc.get_stat_value(sc, 'Count_Person')) - # TODO(boxu/tjann): better error msgs starting from REST - # e.g. stat/value?place=geoId/x06&stat_var=Count_Person&date=2010 - print('Get Count_Person Fail') - print(dc.get_stat_value('bogus_id', 'Count_Person')) + print('get_stat_value Count_Person 2018') + print(dc.get_stat_value(sc, 'Count_Person', '2018')) + print('get_stat_value Count_Person 2018 from ACS 5 yr') + print( + dc.get_stat_value(sc, + 'Count_Person', + '2018', + measurement_method='CensusACS5yrSurvey')) + + # Get population. + print('get_stat_series Count_Person') + print(dc.get_stat_series(sc, 'UnemploymentRate_Person')) + print( + dc.get_stat_series(sc, + 'UnemploymentRate_Person', + observation_period="P1Y")) if __name__ == '__main__': diff --git a/datacommons/statvar.py b/datacommons/statvar.py index da6d5a5b..c8db1066 100644 --- a/datacommons/statvar.py +++ b/datacommons/statvar.py @@ -30,6 +30,24 @@ import datacommons.utils as utils +def _send_get_stat_req(url): + + headers = {'Content-Type': 'application/json'} + if os.environ.get(_ENV_VAR_API_KEY): + headers['x-api-key'] = os.environ[_ENV_VAR_API_KEY] + + req = six.moves.urllib.request.Request(url, headers=headers) + + try: + res = six.moves.urllib.request.urlopen(req) + except six.moves.urllib.error.HTTPError as e: + raise ValueError('Response error {}:\n{}'.format(e.code, e.read())) + + # Verify then store the results. + res_json = json.loads(res.read()) + return res_json + + def get_stat_value(place, stat_var, date=None, @@ -75,19 +93,55 @@ def get_stat_value(place, if scaling_factor: url += '&scaling_factor={}'.format(scaling_factor) - headers = {'Content-Type': 'application/json'} - if os.environ.get(_ENV_VAR_API_KEY): - headers['x-api-key'] = os.environ[_ENV_VAR_API_KEY] - - req = six.moves.urllib.request.Request(url, headers=headers) + res_json = _send_get_stat_req(url) - try: - res = six.moves.urllib.request.urlopen(req) - except six.moves.urllib.error.HTTPError as e: - raise ValueError('Response error {}:\n{}'.format(e.code, e.read())) - - # Verify then store the results. - res_json = json.loads(res.read()) if 'value' not in res_json: raise ValueError('No value in response.') return res_json['value'] + + +def get_stat_series(place, + stat_var, + measurement_method=None, + observation_period=None, + unit=None, + scaling_factor=None): + """Returns a :obj:`dict` for :code:`place` based on the :code:`stat_var`. + + Args: + place (:obj:`iterable` of :obj:`str`): The dcid of `Place` to query for. + stat_var (:obj:`str`): The dcid of the `StatisticalVariable`. + measurement_method (:obj:`str`): Optional, the dcid of the preferred + `measurementMethod` value. + observation_period (:obj:`str`): Optional, the preferred + `observationPeriod` value. + unit (:obj:`str`): Optional, the dcid of the preferred `unit` value. + scaling_factor (:obj:`int`): Optional, the preferred `scalingFactor` value. + Returns: + A :obj:`dict` mapping dates to value of :code:`stat_var` for :code:`place`, + filtered by optional args. + + Raises: + ValueError: If the payload returned by the Data Commons REST API is + malformed. + + Examples: + >>> get_stat_series("geoId/05", "Count_Person") + {"1962":17072000,"2009":36887615,"1929":5531000,"1930":5711000} + """ + url = utils._API_ROOT + utils._API_ENDPOINTS['get_stat_series'] + url += '?place={}&stat_var={}'.format(place, stat_var) + if measurement_method: + url += '&measurement_method={}'.format(measurement_method) + if observation_period: + url += '&observation_period={}'.format(observation_period) + if unit: + url += '&unit={}'.format(unit) + if scaling_factor: + url += '&scaling_factor={}'.format(scaling_factor) + + res_json = _send_get_stat_req(url) + + if 'series' not in res_json: + raise ValueError('No response.') + return res_json['series'] diff --git a/datacommons/test/statvar_test.py b/datacommons/test/statvar_test.py index 5cd05194..325a6069 100644 --- a/datacommons/test/statvar_test.py +++ b/datacommons/test/statvar_test.py @@ -46,6 +46,8 @@ def read(self): # Mock responses for urlopen requests to get_stat_value. stat_value_url_base = utils._API_ROOT + utils._API_ENDPOINTS[ 'get_stat_value'] + stat_series_url_base = utils._API_ROOT + utils._API_ENDPOINTS[ + 'get_stat_series'] if req.full_url == stat_value_url_base + '?place=geoId/06&stat_var=Count_Person': # Response returned when querying with basic args. return MockResponse(json.dumps({'value': 123})) @@ -54,10 +56,29 @@ def read(self): return MockResponse(json.dumps({'value': 133})) if (req.full_url == stat_value_url_base + '?place=geoId/06&stat_var=Count_Person&' + - 'date=2010&measurement_method=CensusPEPSurvey&observation_period=P1Y&' - + 'unit=RealPeople&scaling_factor=100'): - # Response returned when querying with observationDate. + 'date=2010&measurement_method=CensusPEPSurvey&' + + 'observation_period=P1Y&unit=RealPeople&scaling_factor=100'): + # Response returned when querying with above optional params. return MockResponse(json.dumps({'value': 103})) + if req.full_url == stat_series_url_base + '?place=geoId/06&stat_var=Count_Person': + # Response returned when querying with basic args. + return MockResponse(json.dumps({'series': {'2000': 1, '2001': 2}})) + if (req.full_url == stat_series_url_base + + '?place=geoId/06&stat_var=Count_Person&' + + 'measurement_method=CensusPEPSurvey&observation_period=P1Y&' + + 'unit=RealPeople&scaling_factor=100'): + + # 'CensusPEPSurvey', 'P1Y', 'RealPeople', 100 + # Response returned when querying with above optional params. + return MockResponse(json.dumps({'series': {'2000': 3, '2001': 42}})) + if (req.full_url == stat_series_url_base + + '?place=geoId/06&stat_var=Count_Person&' + + 'measurement_method=DNE'): + + # 'CensusPEPSurvey', 'P1Y', 'RealPeople', 100 + # Response returned when data not available for options. + # /stat/series?place=geoId/06&stat_var=Count_Person&measurement_method=DNE + return MockResponse(json.dumps({'series': {}})) # Otherwise, return an empty response and a 404. return urllib.error.HTTPError @@ -85,5 +106,29 @@ def test_opt_args(self, urlopen): self.assertEqual(stat, 103) +class TestGetStatSeries(unittest.TestCase): + """Unit tests for get_stat_series.""" + + @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + def test_basic(self, urlopen): + """Calling get_stat_value with minimal and proper args.""" + # Call get_stat_series + stats = dc.get_stat_series('geoId/06', 'Count_Person') + self.assertEqual(stats, {'2000': 1, '2001': 2}) + + @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + def test_opt_args(self, urlopen): + """Calling get_stat_value with optional args returns specific data.""" + + # Call get_stat_series with all optional args + stats = dc.get_stat_series('geoId/06', 'Count_Person', + 'CensusPEPSurvey', 'P1Y', 'RealPeople', 100) + self.assertEqual(stats, {'2000': 3, '2001': 42}) + + # Call get_stat_series with non-satisfiable optional args + stats = dc.get_stat_series('geoId/06', 'Count_Person', 'DNE') + self.assertEqual(stats, {}) + + if __name__ == '__main__': unittest.main() diff --git a/datacommons/utils.py b/datacommons/utils.py index dfce278f..81ed169e 100644 --- a/datacommons/utils.py +++ b/datacommons/utils.py @@ -50,7 +50,7 @@ 'get_place_obs': '/bulk/place-obs', 'get_stats': '/bulk/stats', 'get_stat_value': '/stat/value', - + 'get_stat_series': '/stat/series', } # The default value to limit to From adbbafad418102b9fd0db22208410cae13d5cb84 Mon Sep 17 00:00:00 2001 From: tjann <18621425+tjann@users.noreply.github.com> Date: Wed, 19 Aug 2020 23:54:28 -0700 Subject: [PATCH 03/12] Flush out more examples for get_stat_series. --- datacommons/examples/statvar.py | 27 +++++++++++++++++++++++++-- datacommons/statvar.py | 2 +- datacommons/test/statvar_test.py | 2 +- 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/datacommons/examples/statvar.py b/datacommons/examples/statvar.py index 1327ab24..f670134c 100644 --- a/datacommons/examples/statvar.py +++ b/datacommons/examples/statvar.py @@ -30,7 +30,7 @@ def main(): # Dcid for Santa Clara County. sc = 'geoId/06085' - # Get population. + # Get stat value. print('get_stat_value Count_Person') print(dc.get_stat_value(sc, 'Count_Person')) @@ -43,14 +43,37 @@ def main(): '2018', measurement_method='CensusACS5yrSurvey')) - # Get population. + # Get stat series. print('get_stat_series Count_Person') + print('get_stat_series UnemploymentRate_Person') print(dc.get_stat_series(sc, 'UnemploymentRate_Person')) + print('get_stat_series UnemploymentRate_Person for observationPeriod P1Y') print( dc.get_stat_series(sc, 'UnemploymentRate_Person', observation_period="P1Y")) + print( + 'get_stat_series UnemploymentRate_Person for observationPeriod P1Y and mmethod Unadjusted' + ) + print( + dc.get_stat_series(sc, + 'UnemploymentRate_Person', + measurement_method="BLSSeasonallyUnadjusted", + observation_period="P1Y")) + print('get_stat_series GDP') + print( + dc.get_stat_series( + 'nuts/HU22', + 'Amount_EconomicActivity_GrossDomesticProduction_Nominal')) + print('get_stat_series GDP with unit PurchasingPowerStandard') + print( + dc.get_stat_series( + 'nuts/HU22', + 'Amount_EconomicActivity_GrossDomesticProduction_Nominal', + observation_period="P1Y", + unit="PurchasingPowerStandard")) + if __name__ == '__main__': main() diff --git a/datacommons/statvar.py b/datacommons/statvar.py index c8db1066..a1670fed 100644 --- a/datacommons/statvar.py +++ b/datacommons/statvar.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2020 Google Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/datacommons/test/statvar_test.py b/datacommons/test/statvar_test.py index 325a6069..0a34fa45 100644 --- a/datacommons/test/statvar_test.py +++ b/datacommons/test/statvar_test.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2020 Google Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 37096ee3583020b4b8b865b541ab9d692ab3d706 Mon Sep 17 00:00:00 2001 From: tjann <18621425+tjann@users.noreply.github.com> Date: Thu, 20 Aug 2020 00:12:08 -0700 Subject: [PATCH 04/12] Minor fixes. --- README.md | 6 +++++- datacommons/examples/statvar.py | 14 ++++++++------ datacommons/statvar.py | 6 +++--- datacommons/test/statvar_test.py | 10 ++++++---- run_tests_local.sh | 7 +++++++ 5 files changed, 29 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 9ad6b7b1..5dfdeb71 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,8 @@ Apache 2.0 ## Development -The Python API currently supports `python>=2.7`. +The Python API currently supports `python>=2.7`. However, our tests are +currently written using `python3` dependent libraries. To test, run: @@ -46,6 +47,9 @@ To test, run: $ ./run_tests_local.sh ``` +which uses `python3`. Please also run through the examples using `python2` +to make sure the library fully supports both python versions. + To debug the continuous integration tests, run: ``` diff --git a/datacommons/examples/statvar.py b/datacommons/examples/statvar.py index f670134c..1902e79c 100644 --- a/datacommons/examples/statvar.py +++ b/datacommons/examples/statvar.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Data Commons Python API examples. - -Basic demo for get_stat_value. -""" +"""Basic examples for StatisticalVariable-based Data Commons API functions.""" from __future__ import absolute_import from __future__ import division @@ -45,8 +42,11 @@ def main(): # Get stat series. print('get_stat_series Count_Person') + print(dc.get_stat_series(sc, 'Count_Person')) + print('get_stat_series UnemploymentRate_Person') print(dc.get_stat_series(sc, 'UnemploymentRate_Person')) + print('get_stat_series UnemploymentRate_Person for observationPeriod P1Y') print( dc.get_stat_series(sc, @@ -61,12 +61,14 @@ def main(): 'UnemploymentRate_Person', measurement_method="BLSSeasonallyUnadjusted", observation_period="P1Y")) - print('get_stat_series GDP') + + print('get_stat_series Nominal GDP') print( dc.get_stat_series( 'nuts/HU22', 'Amount_EconomicActivity_GrossDomesticProduction_Nominal')) - print('get_stat_series GDP with unit PurchasingPowerStandard') + + print('get_stat_series Nominal GDP with unit PurchasingPowerStandard') print( dc.get_stat_series( 'nuts/HU22', diff --git a/datacommons/statvar.py b/datacommons/statvar.py index a1670fed..603aa161 100644 --- a/datacommons/statvar.py +++ b/datacommons/statvar.py @@ -60,7 +60,7 @@ def get_stat_value(place, Args: place (:obj:`iterable` of :obj:`str`): The dcid of `Place` to query for. stat_var (:obj:`str`): The dcid of the `StatisticalVariable`. - obs_date (:obj:`str`): Optional, the preferred date of observation + date (:obj:`str`): Optional, the preferred date of observation in ISO 8601 format. If not specified, returns the latest observation. measurement_method (:obj:`str`): Optional, the dcid of the preferred `measurementMethod` value. @@ -96,7 +96,7 @@ def get_stat_value(place, res_json = _send_get_stat_req(url) if 'value' not in res_json: - raise ValueError('No value in response.') + raise ValueError('No data in response.') return res_json['value'] @@ -143,5 +143,5 @@ def get_stat_series(place, res_json = _send_get_stat_req(url) if 'series' not in res_json: - raise ValueError('No response.') + raise ValueError('No data in response.') return res_json['series'] diff --git a/datacommons/test/statvar_test.py b/datacommons/test/statvar_test.py index 0a34fa45..7f015ff5 100644 --- a/datacommons/test/statvar_test.py +++ b/datacommons/test/statvar_test.py @@ -43,11 +43,12 @@ def read(self): req = args[0] - # Mock responses for urlopen requests to get_stat_value. stat_value_url_base = utils._API_ROOT + utils._API_ENDPOINTS[ 'get_stat_value'] stat_series_url_base = utils._API_ROOT + utils._API_ENDPOINTS[ 'get_stat_series'] + + # Mock responses for urlopen requests to get_stat_value. if req.full_url == stat_value_url_base + '?place=geoId/06&stat_var=Count_Person': # Response returned when querying with basic args. return MockResponse(json.dumps({'value': 123})) @@ -60,6 +61,8 @@ def read(self): 'observation_period=P1Y&unit=RealPeople&scaling_factor=100'): # Response returned when querying with above optional params. return MockResponse(json.dumps({'value': 103})) + + # Mock responses for urlopen requests to get_stat_value. if req.full_url == stat_series_url_base + '?place=geoId/06&stat_var=Count_Person': # Response returned when querying with basic args. return MockResponse(json.dumps({'series': {'2000': 1, '2001': 2}})) @@ -68,17 +71,16 @@ def read(self): 'measurement_method=CensusPEPSurvey&observation_period=P1Y&' + 'unit=RealPeople&scaling_factor=100'): - # 'CensusPEPSurvey', 'P1Y', 'RealPeople', 100 # Response returned when querying with above optional params. return MockResponse(json.dumps({'series': {'2000': 3, '2001': 42}})) if (req.full_url == stat_series_url_base + '?place=geoId/06&stat_var=Count_Person&' + 'measurement_method=DNE'): - # 'CensusPEPSurvey', 'P1Y', 'RealPeople', 100 - # Response returned when data not available for options. + # Response returned when data not available for optional parameters. # /stat/series?place=geoId/06&stat_var=Count_Person&measurement_method=DNE return MockResponse(json.dumps({'series': {}})) + # Otherwise, return an empty response and a 404. return urllib.error.HTTPError diff --git a/run_tests_local.sh b/run_tests_local.sh index bd4f7af9..57b8e65e 100755 --- a/run_tests_local.sh +++ b/run_tests_local.sh @@ -14,8 +14,15 @@ # limitations under the License. +# Note that our mocking library is python3 specific. +# Therefore, please make sure to run the examples +# to make sure your client code is python2 compatible. + python3 -m venv .env source .env/bin/activate pip3 install -r requirements.txt python3 -m pytest + +deactivate + From 103915ac8a182f91b673ae47c360162bd9f4da5e Mon Sep 17 00:00:00 2001 From: tjann <18621425+tjann@users.noreply.github.com> Date: Thu, 20 Aug 2020 10:05:33 -0700 Subject: [PATCH 05/12] Made tests work for python2, add python2 to run_tests_local.sh. Notable pain points: mock, urllib, unicode, and base64 changes between Py2&3. --- README.md | 6 +--- datacommons/test/core_test.py | 36 ++++++++++--------- datacommons/test/places_test.py | 27 +++++++------- datacommons/test/populations_test.py | 53 ++++++++++++++++++---------- datacommons/test/query_test.py | 13 ++++--- datacommons/test/set_api_key_test.py | 20 ++++++----- datacommons/test/statvar_test.py | 28 +++++++++------ run_tests_local.sh | 13 ++++--- 8 files changed, 116 insertions(+), 80 deletions(-) diff --git a/README.md b/README.md index 5dfdeb71..9ad6b7b1 100644 --- a/README.md +++ b/README.md @@ -38,8 +38,7 @@ Apache 2.0 ## Development -The Python API currently supports `python>=2.7`. However, our tests are -currently written using `python3` dependent libraries. +The Python API currently supports `python>=2.7`. To test, run: @@ -47,9 +46,6 @@ To test, run: $ ./run_tests_local.sh ``` -which uses `python3`. Please also run through the examples using `python2` -to make sure the library fully supports both python versions. - To debug the continuous integration tests, run: ``` diff --git a/datacommons/test/core_test.py b/datacommons/test/core_test.py index a370bff7..8dae3443 100644 --- a/datacommons/test/core_test.py +++ b/datacommons/test/core_test.py @@ -20,8 +20,12 @@ from __future__ import division from __future__ import print_function -from unittest import mock -import urllib +try: + from unittest.mock import patch +except ImportError: + from mock import patch + +import six.moves.urllib as urllib import datacommons as dc import datacommons.utils as utils @@ -44,7 +48,7 @@ def read(self): data = json.loads(req.data) # Mock responses for urlopen requests to get_property_labels. - if req.full_url == utils._API_ROOT + utils._API_ENDPOINTS['get_property_labels']: + if req.get_full_url() == utils._API_ROOT + utils._API_ENDPOINTS['get_property_labels']: if data['dcids'] == ['geoId/0649670']: # Response for sending a single dcid to get_property_labels out_arcs = ['containedInPlace', 'name', 'geoId', 'typeOf'] @@ -80,7 +84,7 @@ def read(self): return MockResponse(json.dumps({'payload': res_json})) # Mock responses for urlopen requests to get_property_values - if req.full_url == utils._API_ROOT + utils._API_ENDPOINTS['get_property_values']: + if req.get_full_url() == utils._API_ROOT + utils._API_ENDPOINTS['get_property_values']: if data['dcids'] == ['geoId/06085', 'geoId/24031']\ and data['property'] == 'containedInPlace'\ and data['value_type'] == 'Town': @@ -213,7 +217,7 @@ def read(self): return MockResponse(json.dumps({'payload': res_json})) # Mock responses for urlopen requests to get_triples - if req.full_url == utils._API_ROOT + utils._API_ENDPOINTS['get_triples']: + if req.get_full_url() == utils._API_ROOT + utils._API_ENDPOINTS['get_triples']: if data['dcids'] == ['geoId/06085', 'geoId/24031']: # Response for sending a request with two valid dcids. res_json = json.dumps({ @@ -313,7 +317,7 @@ def read(self): class TestGetPropertyLabels(unittest.TestCase): """ Unit tests for get_property_labels. """ - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_single_dcid(self, urlopen_mock): """ Calling get_property_labels with a single dcid returns a valid result. @@ -327,7 +331,7 @@ def test_single_dcid(self, urlopen_mock): in_props = dc.get_property_labels(['geoId/0649670'], out=False) self.assertDictEqual(in_props, {'geoId/0649670': []}) - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_multiple_dcids(self, urlopen_mock): """ Calling get_property_labels returns valid results with multiple dcids. @@ -352,7 +356,7 @@ def test_multiple_dcids(self, urlopen_mock): 'City': expected_in, }) - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_bad_dcids(self, urlopen_mock): """ Calling get_property_labels with dcids that do not exist returns empty results. @@ -365,7 +369,7 @@ def test_bad_dcids(self, urlopen_mock): in_props = dc.get_property_labels(['dc/MadDcid'], out=False) self.assertDictEqual(in_props, {'dc/MadDcid': []}) - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_no_dcids(self, urlopen_mock): """ Calling get_property_labels with no dcids returns empty results. """ @@ -383,7 +387,7 @@ class TestGetPropertyValues(unittest.TestCase): # --------------------------- STANDARD UNIT TESTS --------------------------- - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_multiple_dcids(self, urlopen_mock): """ Calling get_property_values with multiple dcids returns valid results. @@ -420,7 +424,7 @@ def test_multiple_dcids(self, urlopen_mock): 'dc/p/1234': [] }) - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_bad_dcids(self, urlopen_mock): """ Calling get_property_values with dcids that do not exist returns empty results. @@ -443,7 +447,7 @@ def test_bad_dcids(self, urlopen_mock): 'dc/MadderDcid': [] }) - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_bad_property(self, urlopen_mock): """ Calling get_property_values with a property that does not exist returns empty results. @@ -456,7 +460,7 @@ def test_bad_property(self, urlopen_mock): 'geoId/24031': [] }) - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_no_dcids(self, urlopen_mock): """ Calling get_property_values with no dcids returns empty results. """ # Get property values with an empty list of dcids. @@ -466,7 +470,7 @@ def test_no_dcids(self, urlopen_mock): class TestGetTriples(unittest.TestCase): """ Unit tests for get_triples. """ - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_multiple_dcids(self, urlopen_mock): """ Calling get_triples with proper dcids returns valid results. """ # Call get_triples @@ -484,7 +488,7 @@ def test_multiple_dcids(self, urlopen_mock): ] }) - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_bad_dcids(self, urlopen_mock): """ Calling get_triples with dcids that do not exist returns empty results. @@ -507,7 +511,7 @@ def test_bad_dcids(self, urlopen_mock): 'dc/MadderDcid': [] }) - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_no_dcids(self, urlopen_mock): """ Calling get_triples with no dcids returns empty results. """ # Call get_triples with no dcids diff --git a/datacommons/test/places_test.py b/datacommons/test/places_test.py index 31cbbb14..6c57a8cd 100644 --- a/datacommons/test/places_test.py +++ b/datacommons/test/places_test.py @@ -20,7 +20,10 @@ from __future__ import division from __future__ import print_function -from unittest import mock +try: + from unittest.mock import patch +except ImportError: + from mock import patch import datacommons as dc import datacommons.utils as utils @@ -43,7 +46,7 @@ def read(self): data = json.loads(req.data) # Mock responses for urlopen requests to get_places_in. - if req.full_url == utils._API_ROOT + utils._API_ENDPOINTS['get_places_in']: + if req.get_full_url() == utils._API_ROOT + utils._API_ENDPOINTS['get_places_in']: if (data['dcids'] == ['geoId/06085', 'geoId/24031'] and data['place_type'] == 'City'): # Response returned when querying for multiple valid dcids. @@ -84,7 +87,7 @@ def read(self): # Mock responses for urlopen requests to get_stats. - if req.full_url == utils._API_ROOT + utils._API_ENDPOINTS['get_stats']: + if req.get_full_url() == utils._API_ROOT + utils._API_ENDPOINTS['get_stats']: if (data['place'] == ['geoId/05', 'geoId/06'] and data['stats_var'] == 'dc/0hyp6tkn18vcb'): # Response returned when querying for multiple valid dcids. @@ -214,7 +217,7 @@ def read(self): class TestGetPlacesIn(unittest.TestCase): """ Unit stests for get_places_in. """ - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_multiple_dcids(self, urlopen): """ Calling get_places_in with proper dcids returns valid results. """ # Call get_places_in @@ -224,7 +227,7 @@ def test_multiple_dcids(self, urlopen): 'geoId/24031': ['geoId/2467675', 'geoId/2476650'] }) - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_bad_dcids(self, urlopen): """ Calling get_places_in with dcids that do not exist returns empty results. @@ -243,7 +246,7 @@ def test_bad_dcids(self, urlopen): 'dc/MadderDcid': [] }) - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_no_dcids(self, urlopen): """ Calling get_places_in with no dcids returns empty results. """ # Call get_places_in with no dcids. @@ -257,7 +260,7 @@ def test_no_dcids(self, urlopen): class TestGetStats(unittest.TestCase): """ Unit stests for get_stats. """ - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_multiple_dcids(self, urlopen): """ Calling get_stats with proper dcids returns valid results. """ # Call get_stats @@ -346,7 +349,7 @@ def test_multiple_dcids(self, urlopen): } }) - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_opt_args(self, urlopen): """ Calling get_stats with mmethod, unit, and obs period returns specific data. """ @@ -390,7 +393,7 @@ def test_opt_args(self, urlopen): } }) - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_bad_dcids(self, urlopen): """ Calling get_stats with dcids that do not exist returns empty results. @@ -412,21 +415,21 @@ def test_bad_dcids(self, urlopen): 'dc/0hyp6tkn18vcb') self.assertDictEqual({}, bad_dcids_2) - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_no_dcids(self, urlopen): """ Calling get_stats with no dcids returns empty results. """ # Call get_stats with no dcids. no_dcids = dc.get_stats([], 'dc/0hyp6tkn18vcb') self.assertDictEqual({}, no_dcids) - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_no_data(self, urlopen): """ Calling get_stats with for None data. """ # Call get_stats with no dcids. result = dc.get_stats(['geoId/00'], 'dc/0hyp6tkn18vcb') self.assertDictEqual({}, result) - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_batch_request(self, mock_urlopen): """ Make multiple calls to REST API when number of geos exceeds the batch size. """ save_batch_size = dc.utils._QUERY_BATCH_SIZE diff --git a/datacommons/test/populations_test.py b/datacommons/test/populations_test.py index 7054854c..e1cb8bc3 100644 --- a/datacommons/test/populations_test.py +++ b/datacommons/test/populations_test.py @@ -22,13 +22,17 @@ from __future__ import print_function import base64 -from unittest import mock + +try: + from unittest.mock import patch +except ImportError: + from mock import patch import datacommons as dc import datacommons.utils as utils import json import unittest -import urllib +import six.moves.urllib as urllib import zlib @@ -58,10 +62,23 @@ def read(self): } ] + def compare_constraint_helper(constrained_props, data_pvs): + """Py2 workaround for unicode vs str comparison." + for cpv in constrained_props: + satisfied = False + for k, v in cpv.items(): + for pv in data_pvs: + if k in pv and pv[k] == cpv[k]: + print("found ", k, v, " in", pv) + satisfied = True + if satisfied == False: + return False + return True + # Mock responses for urlopen request to get_populations. - if req.full_url == utils._API_ROOT + utils._API_ENDPOINTS['get_populations']\ + if req.get_full_url() == utils._API_ROOT + utils._API_ENDPOINTS['get_populations']\ and data['population_type'] == 'Person'\ - and data['pvs'] == constrained_props: + and compare_constraint_helper(constrained_props, data['pvs']): if data['dcids'] == ['geoId/06085', 'geoId/4805000']: # Response returned when querying for multiple valid dcids. res_json = json.dumps([ @@ -91,7 +108,7 @@ def read(self): return MockResponse(json.dumps({'payload': res_json})) # Mock responses for urlopen request to get_observations - if req.full_url == utils._API_ROOT + utils._API_ENDPOINTS['get_observations']\ + if req.get_full_url() == utils._API_ROOT + utils._API_ENDPOINTS['get_observations']\ and data['measured_property'] == 'count'\ and data['stats_type'] == 'measuredValue'\ and data['observation_date'] == '2018-12'\ @@ -130,11 +147,11 @@ def read(self): return MockResponse(json.dumps({'payload': res_json})) # Mock responses for urlopen request to get_place_obs - if req.full_url == utils._API_ROOT + utils._API_ENDPOINTS['get_place_obs']\ + if req.get_full_url() == utils._API_ROOT + utils._API_ENDPOINTS['get_place_obs']\ and data['place_type'] == 'City'\ and data['observation_date'] == '2017'\ and data['population_type'] == 'Person'\ - and data['pvs'] == constrained_props: + and compare_constraint_helper(constrained_props, data['pvs']): res_json = json.dumps({ 'places': [ { @@ -155,10 +172,10 @@ def read(self): ] }) return MockResponse(json.dumps( - {'payload': base64.encodebytes(zlib.compress(res_json.encode('utf-8'))).decode('ascii')})) + {'payload': base64.b64encode(zlib.compress(res_json.encode('utf-8'))).decode('ascii')})) # Mock responses for get requests to get_pop_obs. - if req.full_url == utils._API_ROOT + utils._API_ENDPOINTS['get_pop_obs'] + '?dcid=geoId/06085': + if req.get_full_url() == utils._API_ROOT + utils._API_ENDPOINTS['get_pop_obs'] + '?dcid=geoId/06085': # Response returned when querying for a city in the graph. res_json = json.dumps({ 'name': 'Mountain View', @@ -194,7 +211,7 @@ def read(self): } }) return MockResponse(json.dumps( - {'payload': base64.encodebytes(zlib.compress(res_json.encode('utf-8'))).decode('ascii')})) + {'payload': base64.b64encode(zlib.compress(res_json.encode('utf-8'))).decode('ascii')})) # Otherwise, return an empty response and a 404. return urllib.error.HTTPError(None, 404, None, None, None) @@ -207,7 +224,7 @@ class TestGetPopulations(unittest.TestCase): 'age': 'Years5To17' } - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_multiple_dcids(self, urlopen): """ Calling get_populations with proper dcids returns valid results. """ # Call get_populations @@ -219,7 +236,7 @@ def test_multiple_dcids(self, urlopen): }) - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_bad_dcids(self, urlopen): """ Calling get_populations with dcids that do not exist returns empty results. @@ -234,7 +251,7 @@ def test_bad_dcids(self, urlopen): self.assertDictEqual(pops_1, {'geoId/06085': 'dc/p/crgfn8blpvl35'}) self.assertDictEqual(pops_2, {}) - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_no_dcids(self, urlopen): """ Calling get_populations with no dcids returns empty results. """ pops = dc.get_populations( @@ -244,7 +261,7 @@ def test_no_dcids(self, urlopen): class TestGetObservations(unittest.TestCase): """ Unit tests for get_observations. """ - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_multiple_dcids(self, urlopen): """ Calling get_observations with proper dcids returns valid results. """ dcids = ['dc/p/x6t44d8jd95rd', 'dc/p/lr52m1yr46r44', 'dc/p/fs929fynprzs'] @@ -258,7 +275,7 @@ def test_multiple_dcids(self, urlopen): measurement_method='BLSSeasonallyAdjusted') self.assertDictEqual(actual, expected) - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_bad_dcids(self, urlopen): """ Calling get_observations with dcids that do not exist returns empty results. @@ -279,7 +296,7 @@ def test_bad_dcids(self, urlopen): self.assertDictEqual(actual_1, {'dc/p/x6t44d8jd95rd': 18704962.0}) self.assertDictEqual(actual_2, {}) - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_no_dcids(self, urlopen): """ Calling get_observations with no dcids returns empty results. """ actual = dc.get_observations([], 'count', 'measuredValue', '2018-12', @@ -291,7 +308,7 @@ def test_no_dcids(self, urlopen): class TestGetPopObs(unittest.TestCase): """ Unit tests for get_pop_obs. """ - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_valid_dcid(self, urlopen): """ Calling get_pop_obs with valid dcid returns valid results. """ # Call get_pop_obs @@ -333,7 +350,7 @@ def test_valid_dcid(self, urlopen): class TestGetPlaceObs(unittest.TestCase): """ Unit tests for get_place_obs. """ - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_valid(self, urlopen): """ Calling get_place_obs with valid parameters returns a valid result. """ # Call get_place_obs diff --git a/datacommons/test/query_test.py b/datacommons/test/query_test.py index 499ea70a..75b12a66 100644 --- a/datacommons/test/query_test.py +++ b/datacommons/test/query_test.py @@ -20,14 +20,17 @@ from __future__ import division from __future__ import print_function -from unittest import mock +try: + from unittest.mock import patch +except ImportError: + from mock import patch import datacommons as dc import datacommons.utils as utils import json import unittest -import urllib +import six.moves.urllib as urllib def request_mock(*args, **kwargs): @@ -63,7 +66,7 @@ def read(self): req = args[0] data = json.loads(req.data) - if req.full_url == utils._API_ROOT + utils._API_ENDPOINTS['query']: + if req.get_full_url() == utils._API_ROOT + utils._API_ENDPOINTS['query']: if data['sparql'] == accepted_query: return MockResponse(json.dumps({ 'header': [ @@ -118,7 +121,7 @@ def read(self): class TestQuery(unittest.TestCase): """ Unit tests for the Query object. """ - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_rows(self, urlopen): """ Sending a valid query returns the correct response. """ # Create the SPARQL query @@ -153,7 +156,7 @@ def test_rows(self, urlopen): if idx == 1: self.assertDictEqual(row, {'?name': 'Maryland', '?dcid': 'geoId/24'}) - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_no_rows(self, urlopen): """ Handles row-less response. """ # Create a SPARQL query diff --git a/datacommons/test/set_api_key_test.py b/datacommons/test/set_api_key_test.py index 369a32db..0c6260e3 100644 --- a/datacommons/test/set_api_key_test.py +++ b/datacommons/test/set_api_key_test.py @@ -20,15 +20,17 @@ from __future__ import division from __future__ import print_function -from unittest import mock - +try: + from unittest.mock import patch +except ImportError: + from mock import patch import datacommons as dc import datacommons.utils as utils import os import json import unittest -import urllib +import six.moves.urllib as urllib _TEST_API_KEY = 'TEST-API-KEY' @@ -52,12 +54,12 @@ def read(self): req = args[0] - if req.full_url == _SEND_REQ_NO_KEY or json.loads(req.data) == {'sparql': _SPARQL_NO_KEY}: + if req.get_full_url() == _SEND_REQ_NO_KEY or json.loads(req.data) == {'sparql': _SPARQL_NO_KEY}: assert 'X-api-key' not in req.headers else: assert req.get_header('X-api-key') == _TEST_API_KEY - if req.full_url == utils._API_ROOT + utils._API_ENDPOINTS['query']: + if req.get_full_url() == utils._API_ROOT + utils._API_ENDPOINTS['query']: # Return a dummy response that will parse into [] by query() return MockResponse(json.dumps({ 'header': [ @@ -72,19 +74,19 @@ def read(self): class TestApiKey(unittest.TestCase): """Unit test for setting or not setting the API Key.""" - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_query_no_api_key(self, urlopen): del os.environ[utils._ENV_VAR_API_KEY] # Issue a dummy SPARQL query that tells the mock to not expect a key self.assertEqual(dc.query(_SPARQL_NO_KEY), []) - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_send_request_no_api_key(self, urlopen): del os.environ[utils._ENV_VAR_API_KEY] # Issue a dummy url that tells the mock to not expect a key self.assertEqual(utils._send_request(_SEND_REQ_NO_KEY, {'foo': ['bar']}), {}) - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_query_w_api_key(self, urlopen): """ Handles row-less response. """ # Set the API key @@ -93,7 +95,7 @@ def test_query_w_api_key(self, urlopen): # Issue a dummy SPARQL query that tells the mock to expect a key self.assertEqual(dc.query(_SPARQL_W_KEY), []) - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_send_request_w_api_key(self, urlopen): """ Handles row-less response. """ # Set the API key diff --git a/datacommons/test/statvar_test.py b/datacommons/test/statvar_test.py index 7f015ff5..f4536bd5 100644 --- a/datacommons/test/statvar_test.py +++ b/datacommons/test/statvar_test.py @@ -20,7 +20,10 @@ from __future__ import division from __future__ import print_function -from unittest import mock +try: + from unittest.mock import patch +except ImportError: + from mock import patch import datacommons as dc import datacommons.utils as utils @@ -49,13 +52,15 @@ def read(self): 'get_stat_series'] # Mock responses for urlopen requests to get_stat_value. - if req.full_url == stat_value_url_base + '?place=geoId/06&stat_var=Count_Person': + if req.get_full_url( + ) == stat_value_url_base + '?place=geoId/06&stat_var=Count_Person': # Response returned when querying with basic args. return MockResponse(json.dumps({'value': 123})) - if req.full_url == stat_value_url_base + '?place=geoId/06&stat_var=Count_Person&date=2010': + if req.get_full_url( + ) == stat_value_url_base + '?place=geoId/06&stat_var=Count_Person&date=2010': # Response returned when querying with observationDate. return MockResponse(json.dumps({'value': 133})) - if (req.full_url == stat_value_url_base + + if (req.get_full_url() == stat_value_url_base + '?place=geoId/06&stat_var=Count_Person&' + 'date=2010&measurement_method=CensusPEPSurvey&' + 'observation_period=P1Y&unit=RealPeople&scaling_factor=100'): @@ -63,17 +68,18 @@ def read(self): return MockResponse(json.dumps({'value': 103})) # Mock responses for urlopen requests to get_stat_value. - if req.full_url == stat_series_url_base + '?place=geoId/06&stat_var=Count_Person': + if req.get_full_url( + ) == stat_series_url_base + '?place=geoId/06&stat_var=Count_Person': # Response returned when querying with basic args. return MockResponse(json.dumps({'series': {'2000': 1, '2001': 2}})) - if (req.full_url == stat_series_url_base + + if (req.get_full_url() == stat_series_url_base + '?place=geoId/06&stat_var=Count_Person&' + 'measurement_method=CensusPEPSurvey&observation_period=P1Y&' + 'unit=RealPeople&scaling_factor=100'): # Response returned when querying with above optional params. return MockResponse(json.dumps({'series': {'2000': 3, '2001': 42}})) - if (req.full_url == stat_series_url_base + + if (req.get_full_url() == stat_series_url_base + '?place=geoId/06&stat_var=Count_Person&' + 'measurement_method=DNE'): @@ -88,14 +94,14 @@ def read(self): class TestGetStatValue(unittest.TestCase): """Unit tests for get_stat_value.""" - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_basic(self, urlopen): """Calling get_stat_value with minimal and proper args.""" # Call get_stat_value self.assertEqual(dc.get_stat_value('geoId/06', 'Count_Person'), 123) - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_opt_args(self, urlopen): """Calling get_stat_value with optional args returns specific data.""" # Call get_stat_value for specific obs @@ -111,14 +117,14 @@ def test_opt_args(self, urlopen): class TestGetStatSeries(unittest.TestCase): """Unit tests for get_stat_series.""" - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_basic(self, urlopen): """Calling get_stat_value with minimal and proper args.""" # Call get_stat_series stats = dc.get_stat_series('geoId/06', 'Count_Person') self.assertEqual(stats, {'2000': 1, '2001': 2}) - @mock.patch('six.moves.urllib.request.urlopen', side_effect=request_mock) + @patch('six.moves.urllib.request.urlopen', side_effect=request_mock) def test_opt_args(self, urlopen): """Calling get_stat_value with optional args returns specific data.""" diff --git a/run_tests_local.sh b/run_tests_local.sh index 57b8e65e..9cf899ad 100755 --- a/run_tests_local.sh +++ b/run_tests_local.sh @@ -14,10 +14,6 @@ # limitations under the License. -# Note that our mocking library is python3 specific. -# Therefore, please make sure to run the examples -# to make sure your client code is python2 compatible. - python3 -m venv .env source .env/bin/activate @@ -26,3 +22,12 @@ python3 -m pytest deactivate + +python2 -m venv .env +source .env/bin/activate + +pip2 install -r requirements.txt +python2 -m pytest + +deactivate + From a0178c6fa9dacaa6c59cc7396ff8546a7df3f6c3 Mon Sep 17 00:00:00 2001 From: tjann <18621425+tjann@users.noreply.github.com> Date: Thu, 20 Aug 2020 10:12:17 -0700 Subject: [PATCH 06/12] Remove sys path hack and add example running instructions to README. --- README.md | 8 ++++++++ datacommons/examples/statvar.py | 3 --- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 9ad6b7b1..8a93ce8e 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,14 @@ $ cloud-build-local --config=cloudbuild.yaml --dryrun=false . Both commands will run the same set of tests. +To run the examples: + +``` +$ python -m datacommons.examples.XXX +``` + +where XXX is the module you want to run. + ## Release to PyPI - Update "VERSION" in setup.py diff --git a/datacommons/examples/statvar.py b/datacommons/examples/statvar.py index 1902e79c..bb30cc6f 100644 --- a/datacommons/examples/statvar.py +++ b/datacommons/examples/statvar.py @@ -17,9 +17,6 @@ from __future__ import division from __future__ import print_function -import sys -sys.path.append('../') -sys.path.append('../../') import datacommons as dc From d09bb7c6f6701560487364e6cd4c49db7912636d Mon Sep 17 00:00:00 2001 From: tjann <18621425+tjann@users.noreply.github.com> Date: Thu, 20 Aug 2020 10:12:56 -0700 Subject: [PATCH 07/12] double-> float --- datacommons/statvar.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datacommons/statvar.py b/datacommons/statvar.py index 603aa161..ebc51f15 100644 --- a/datacommons/statvar.py +++ b/datacommons/statvar.py @@ -69,7 +69,7 @@ def get_stat_value(place, unit (:obj:`str`): Optional, the dcid of the preferred `unit` value. scaling_factor (:obj:`int`): Optional, the preferred `scalingFactor` value. Returns: - A :obj:`double` the value of :code:`stat_var` for :code:`place`, filtered + A :obj:`float` the value of :code:`stat_var` for :code:`place`, filtered by optional args. Raises: From 4be761c20a2538d7310a0c81a4c02f4e2df936b2 Mon Sep 17 00:00:00 2001 From: tjann <18621425+tjann@users.noreply.github.com> Date: Thu, 20 Aug 2020 10:31:03 -0700 Subject: [PATCH 08/12] Parameterized the new get stat function examples. --- datacommons/examples/statvar.py | 121 +++++++++++++++++++------------- 1 file changed, 71 insertions(+), 50 deletions(-) diff --git a/datacommons/examples/statvar.py b/datacommons/examples/statvar.py index bb30cc6f..23532bd4 100644 --- a/datacommons/examples/statvar.py +++ b/datacommons/examples/statvar.py @@ -21,57 +21,78 @@ def main(): - # Dcid for Santa Clara County. - sc = 'geoId/06085' + data = [ + { + 'place': 'geoId/06085', + 'stat_var': 'Count_Person', + }, + { + 'place': 'geoId/06085', + 'stat_var': 'Count_Person', + 'date': '2018', + }, + { + 'place': 'geoId/06085', + 'stat_var': 'Count_Person', + 'date': '2018', + 'measurement_method': 'CensusACS5yrSurvey', + }, + { + 'place': 'geoId/06085', + 'stat_var': 'UnemploymentRate_Person', + }, + { + 'place': 'geoId/06085', + 'stat_var': 'UnemploymentRate_Person', + 'observation_period': "P1Y", + }, + { + 'place': 'geoId/06085', + 'stat_var': 'UnemploymentRate_Person', + 'observation_period': "P1Y", + 'measurement_method': "BLSSeasonallyUnadjusted", + }, + { + 'place': + 'nuts/HU22', + 'stat_var': + 'Amount_EconomicActivity_GrossDomesticProduction_Nominal', + }, + { + 'place': + 'nuts/HU22', + 'stat_var': + 'Amount_EconomicActivity_GrossDomesticProduction_Nominal', + 'observation_period': + "P1Y", + 'unit': + "PurchasingPowerStandard" + }, + ] - # Get stat value. - print('get_stat_value Count_Person') - print(dc.get_stat_value(sc, 'Count_Person')) - - print('get_stat_value Count_Person 2018') - print(dc.get_stat_value(sc, 'Count_Person', '2018')) - print('get_stat_value Count_Person 2018 from ACS 5 yr') - print( - dc.get_stat_value(sc, - 'Count_Person', - '2018', - measurement_method='CensusACS5yrSurvey')) - - # Get stat series. - print('get_stat_series Count_Person') - print(dc.get_stat_series(sc, 'Count_Person')) - - print('get_stat_series UnemploymentRate_Person') - print(dc.get_stat_series(sc, 'UnemploymentRate_Person')) - - print('get_stat_series UnemploymentRate_Person for observationPeriod P1Y') - print( - dc.get_stat_series(sc, - 'UnemploymentRate_Person', - observation_period="P1Y")) - - print( - 'get_stat_series UnemploymentRate_Person for observationPeriod P1Y and mmethod Unadjusted' - ) - print( - dc.get_stat_series(sc, - 'UnemploymentRate_Person', - measurement_method="BLSSeasonallyUnadjusted", - observation_period="P1Y")) - - print('get_stat_series Nominal GDP') - print( - dc.get_stat_series( - 'nuts/HU22', - 'Amount_EconomicActivity_GrossDomesticProduction_Nominal')) - - print('get_stat_series Nominal GDP with unit PurchasingPowerStandard') - print( - dc.get_stat_series( - 'nuts/HU22', - 'Amount_EconomicActivity_GrossDomesticProduction_Nominal', - observation_period="P1Y", - unit="PurchasingPowerStandard")) + for d in data: + print('\n>>> get_stat_value: ', + [param for param in d.values() if param]) + print( + '<<< ', + dc.get_stat_value(d.get('place'), + d.get('stat_var'), + date=d.get('date'), + measurement_method=d.get('measurement_method'), + observation_period=d.get('observation_period'), + unit=d.get('unit'), + scaling_factor=d.get('scaling_factor'))) + for d in data: + print('\n>>> get_stat_series: ', + [d[k] for k in d.keys() if k != 'date' and d[k]]) + print( + '<<< ', + dc.get_stat_series(d.get('place'), + d.get('stat_var'), + measurement_method=d.get('measurement_method'), + observation_period=d.get('observation_period'), + unit=d.get('unit'), + scaling_factor=d.get('scaling_factor'))) if __name__ == '__main__': From 3303c5b52774237da1c3c811914c8d5acec94cea Mon Sep 17 00:00:00 2001 From: tjann <18621425+tjann@users.noreply.github.com> Date: Thu, 20 Aug 2020 10:34:04 -0700 Subject: [PATCH 09/12] Fix probably accidental deletion of docstring end quotes. --- datacommons/test/populations_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datacommons/test/populations_test.py b/datacommons/test/populations_test.py index e1cb8bc3..f1fd3323 100644 --- a/datacommons/test/populations_test.py +++ b/datacommons/test/populations_test.py @@ -63,7 +63,7 @@ def read(self): ] def compare_constraint_helper(constrained_props, data_pvs): - """Py2 workaround for unicode vs str comparison." + """Py2 workaround for unicode vs str comparison.""" for cpv in constrained_props: satisfied = False for k, v in cpv.items(): From 6183a254cd33685a1b08b18b41f2864d07f2e55d Mon Sep 17 00:00:00 2001 From: tjann <18621425+tjann@users.noreply.github.com> Date: Thu, 20 Aug 2020 11:37:25 -0700 Subject: [PATCH 10/12] Improve example printing and varnames. --- datacommons/examples/statvar.py | 60 ++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 23 deletions(-) diff --git a/datacommons/examples/statvar.py b/datacommons/examples/statvar.py index 23532bd4..f2028907 100644 --- a/datacommons/examples/statvar.py +++ b/datacommons/examples/statvar.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Basic examples for StatisticalVariable-based Data Commons API functions.""" +"""Basic examples for StatisticalVariable-based param_set Commons API functions.""" from __future__ import absolute_import from __future__ import division @@ -21,7 +21,7 @@ def main(): - data = [ + param_sets = [ { 'place': 'geoId/06085', 'stat_var': 'Count_Person', @@ -70,29 +70,43 @@ def main(): }, ] - for d in data: - print('\n>>> get_stat_value: ', - [param for param in d.values() if param]) + def call_str(pvs): + """Helper function to print the minimal call string.""" + s = "'{}', '{}'".format(pvs.get('place'), pvs.get('stat_var')) + if pvs.get('measurement_method'): + s += ", measurement_method='{}'".format( + pvs.get('measurement_method')) + if pvs.get('observation_period'): + s += ", observation_period='{}'".format( + pvs.get('observation_period')) + if pvs.get('unit'): + s += ", unit='{}'".format(pvs.get('unit')) + if pvs.get('scaling_factor'): + s += ", scaling_factor={}".format(pvs.get('scaling_factor')) + return s + + for pvs in param_sets: + print('\nget_stat_value({})'.format(call_str(pvs))) print( - '<<< ', - dc.get_stat_value(d.get('place'), - d.get('stat_var'), - date=d.get('date'), - measurement_method=d.get('measurement_method'), - observation_period=d.get('observation_period'), - unit=d.get('unit'), - scaling_factor=d.get('scaling_factor'))) - for d in data: - print('\n>>> get_stat_series: ', - [d[k] for k in d.keys() if k != 'date' and d[k]]) + '>>> ', + dc.get_stat_value(pvs.get('place'), + pvs.get('stat_var'), + date=pvs.get('date'), + measurement_method=pvs.get('measurement_method'), + observation_period=pvs.get('observation_period'), + unit=pvs.get('unit'), + scaling_factor=pvs.get('scaling_factor'))) + for pvs in param_sets: + pvs.pop('date', None) + print('\nget_stat_series({})'.format(call_str(pvs))) print( - '<<< ', - dc.get_stat_series(d.get('place'), - d.get('stat_var'), - measurement_method=d.get('measurement_method'), - observation_period=d.get('observation_period'), - unit=d.get('unit'), - scaling_factor=d.get('scaling_factor'))) + '>>> ', + dc.get_stat_series(pvs.get('place'), + pvs.get('stat_var'), + measurement_method=pvs.get('measurement_method'), + observation_period=pvs.get('observation_period'), + unit=pvs.get('unit'), + scaling_factor=pvs.get('scaling_factor'))) if __name__ == '__main__': From 341bccaeff1013e6e5618d168e1556123235b566 Mon Sep 17 00:00:00 2001 From: tjann <18621425+tjann@users.noreply.github.com> Date: Thu, 20 Aug 2020 11:41:58 -0700 Subject: [PATCH 11/12] Rename statvar-> stat_vars in file names. --- datacommons/__init__.py | 2 +- datacommons/examples/{statvar.py => stat_vars.py} | 2 +- datacommons/{statvar.py => stat_vars.py} | 0 datacommons/test/{statvar_test.py => stat_vars_test.py} | 0 4 files changed, 2 insertions(+), 2 deletions(-) rename datacommons/examples/{statvar.py => stat_vars.py} (98%) rename datacommons/{statvar.py => stat_vars.py} (100%) rename datacommons/test/{statvar_test.py => stat_vars_test.py} (100%) diff --git a/datacommons/__init__.py b/datacommons/__init__.py index 4172c6d4..c24cb172 100644 --- a/datacommons/__init__.py +++ b/datacommons/__init__.py @@ -19,7 +19,7 @@ from datacommons.core import get_property_labels, get_property_values, get_triples from datacommons.places import get_places_in, get_related_places, get_stats from datacommons.populations import get_populations, get_observations, get_pop_obs, get_place_obs -from datacommons.statvar import get_stat_value, get_stat_series +from datacommons.stat_vars import get_stat_value, get_stat_series # Other utilities from .utils import set_api_key diff --git a/datacommons/examples/statvar.py b/datacommons/examples/stat_vars.py similarity index 98% rename from datacommons/examples/statvar.py rename to datacommons/examples/stat_vars.py index f2028907..3da59cd2 100644 --- a/datacommons/examples/statvar.py +++ b/datacommons/examples/stat_vars.py @@ -71,7 +71,7 @@ def main(): ] def call_str(pvs): - """Helper function to print the minimal call string.""" + """Helper function to print the minimal call string.""" s = "'{}', '{}'".format(pvs.get('place'), pvs.get('stat_var')) if pvs.get('measurement_method'): s += ", measurement_method='{}'".format( diff --git a/datacommons/statvar.py b/datacommons/stat_vars.py similarity index 100% rename from datacommons/statvar.py rename to datacommons/stat_vars.py diff --git a/datacommons/test/statvar_test.py b/datacommons/test/stat_vars_test.py similarity index 100% rename from datacommons/test/statvar_test.py rename to datacommons/test/stat_vars_test.py From fe040b1e7187d666929cfea70a9050d1f3255fae Mon Sep 17 00:00:00 2001 From: tjann <18621425+tjann@users.noreply.github.com> Date: Thu, 20 Aug 2020 12:06:53 -0700 Subject: [PATCH 12/12] Add payload option to _send_request and use that instead in stat_vars.py --- datacommons/stat_vars.py | 22 ++-------------------- datacommons/utils.py | 4 +++- 2 files changed, 5 insertions(+), 21 deletions(-) diff --git a/datacommons/stat_vars.py b/datacommons/stat_vars.py index ebc51f15..30c70a8e 100644 --- a/datacommons/stat_vars.py +++ b/datacommons/stat_vars.py @@ -30,24 +30,6 @@ import datacommons.utils as utils -def _send_get_stat_req(url): - - headers = {'Content-Type': 'application/json'} - if os.environ.get(_ENV_VAR_API_KEY): - headers['x-api-key'] = os.environ[_ENV_VAR_API_KEY] - - req = six.moves.urllib.request.Request(url, headers=headers) - - try: - res = six.moves.urllib.request.urlopen(req) - except six.moves.urllib.error.HTTPError as e: - raise ValueError('Response error {}:\n{}'.format(e.code, e.read())) - - # Verify then store the results. - res_json = json.loads(res.read()) - return res_json - - def get_stat_value(place, stat_var, date=None, @@ -93,7 +75,7 @@ def get_stat_value(place, if scaling_factor: url += '&scaling_factor={}'.format(scaling_factor) - res_json = _send_get_stat_req(url) + res_json = utils._send_request(url, post=False, use_payload=False) if 'value' not in res_json: raise ValueError('No data in response.') @@ -140,7 +122,7 @@ def get_stat_series(place, if scaling_factor: url += '&scaling_factor={}'.format(scaling_factor) - res_json = _send_get_stat_req(url) + res_json = utils._send_request(url, post=False, use_payload=False) if 'series' not in res_json: raise ValueError('No data in response.') diff --git a/datacommons/utils.py b/datacommons/utils.py index 81ed169e..313fd38c 100644 --- a/datacommons/utils.py +++ b/datacommons/utils.py @@ -84,7 +84,7 @@ def set_api_key(api_key): # ------------------------- INTERNAL HELPER FUNCTIONS ------------------------- -def _send_request(req_url, req_json={}, compress=False, post=True): +def _send_request(req_url, req_json={}, compress=False, post=True, use_payload=True): """ Sends a POST/GET request to req_url with req_json, default to POST. Returns: @@ -115,6 +115,8 @@ def _send_request(req_url, req_json={}, compress=False, post=True): # Get the JSON res_json = json.loads(res.read()) + if not use_payload: + return res_json if 'payload' not in res_json: raise ValueError( 'Response error: Payload not found. Printing response\n\n'