From 143d612e86d265ece28c8ccb9991bcf4403168e3 Mon Sep 17 00:00:00 2001 From: tjann Date: Fri, 24 Apr 2020 13:45:52 -0700 Subject: [PATCH 1/7] Add get_stats endpoint. --- datacommons/__init__.py | 2 +- datacommons/examples/places.py | 14 ++-- datacommons/places.py | 95 ++++++++++++++++++++++----- datacommons/test/places_test.py | 112 ++++++++++++++++++++++++++++++++ datacommons/utils.py | 1 + 5 files changed, 203 insertions(+), 21 deletions(-) diff --git a/datacommons/__init__.py b/datacommons/__init__.py index 6400a4da..e96aa3bb 100644 --- a/datacommons/__init__.py +++ b/datacommons/__init__.py @@ -17,7 +17,7 @@ # Data Commons Python Client API from datacommons.core import get_property_labels, get_property_values, get_triples -from datacommons.places import get_places_in, get_related_places +from datacommons.places import get_places_in, get_related_places, get_stats from datacommons.populations import get_populations, get_observations, get_pop_obs, get_place_obs # Other utilities diff --git a/datacommons/examples/places.py b/datacommons/examples/places.py index bb5a9f4a..492bfc6c 100644 --- a/datacommons/examples/places.py +++ b/datacommons/examples/places.py @@ -40,12 +40,16 @@ def main(): for dcid in tracts[mc][:10]: print(' - {}'.format(dcid)) + # Get place stats. + print('Get place stats') + stats = dc.get_stats(['geoId/05', 'geoId/06'], 'dc/0hyp6tkn18vcb') + print(stats) + # Get related places. - print('Get related places') - # TODO(*): s/Cenus/Census/g when data is ready in BT. - related_places = dc.get_related_places(['geoId/06085'], 'Person', 'count', - 'CenusACS5yrSurvey', "measuredValue", {"gender": "Female"}) - print(related_places) +# print('Get related places') +# related_places = dc.get_related_places(['geoId/06085'], 'Person', 'count', +# 'CensusACS5yrSurvey', "measuredValue", {"gender": "Female"}) +# print(related_places) if __name__ == '__main__': diff --git a/datacommons/places.py b/datacommons/places.py index 354333cc..9bb0ccc9 100644 --- a/datacommons/places.py +++ b/datacommons/places.py @@ -49,15 +49,15 @@ def get_places_in(dcids, place_type): >>> get_places_in(["geoId/06"], "County") { - 'geoId/06': [ - 'geoId/06041', - 'geoId/06089', - 'geoId/06015', - 'geoId/06023', - 'geoId/06067', - ... - # and 53 more - ] + 'geoId/06': [ + 'geoId/06041', + 'geoId/06089', + 'geoId/06015', + 'geoId/06023', + 'geoId/06067', + ... + # and 53 more + ] } """ dcids = filter(lambda v: v==v, dcids) # Filter out NaN values @@ -71,7 +71,72 @@ def get_places_in(dcids, place_type): # Create the results and format it appropriately result = utils._format_expand_payload(payload, 'place', must_exist=dcids) return result + +def get_stats(dcids, stats_var): + """ Returns :obj:`TimeSeries` for :code:`dcids` \ + based on the :code:`stats_var`. + Args: + dcids (:obj:`iterable` of :obj:`str`): Dcids of places to query for. + stats_var (:obj:`str`): The dcid of the :obj:StatisticalVariable. + Returns: + A :obj:`dict` mapping the :obj:`Place` identified by the given :code:`dcid` + to its place name and the :obj:`TimeSeries` associated with the + :obj:`StatisticalVariable` identified by the given :code:`stats_var`. + See example below for more detail about how the returned :obj:`dict` is + structured. + + Raises: + ValueError: If the payload returned by the Data Commons REST API is + malformed. + + Examples: + We would like to get the :obj:`TimeSeries` of the number of males + at least 25 years old that attended 12th grade but did not receive + a high school diploma + (`dc/0hyp6tkn18vcb `_) + in `Arkansas `_ + and `California `_. + + >>> get_stats(["geoId/05", "geoId/06"], "dc/0hyp6tkn18vcb") + { + 'geoId/05': { + 'place_name': 'Arkansas' + 'data': { + '2011':18136, + '2012':17279, + '2013':17459, + '2014':16966, + '2015':17173, + '2016':17041, + '2017':17783, + '2018':18003 + }, + }, + 'geoId/05': { + 'place_name': 'California' + 'data': { + '2011':316667, + '2012':324116, + '2013':331853, + '2014':342818, + '2015':348979, + '2016':354806, + '2017':360645, + '2018':366331 + }, + }, + } + """ + dcids = filter(lambda v: v==v, dcids) # Filter out NaN values + dcids = list(dcids) + url = utils._API_ROOT + utils._API_ENDPOINTS['get_stats'] + payload = utils._send_request(url, req_json={ + 'place': dcids, + 'stats_var': stats_var, + }) + + return payload def get_related_places(dcids, population_type, measured_property, measurement_method, stat_type, constraining_properties={}, @@ -112,12 +177,12 @@ def get_related_places(dcids, population_type, measured_property, "gender": "Female" }, "count", "CenusACS5yrSurvey", "measuredValue") { - 'geoId/06085': [ - 'geoId/06041', - 'geoId/06089', - 'geoId/06015', - 'geoId/06023', - ] + 'geoId/06085': [ + 'geoId/06041', + 'geoId/06089', + 'geoId/06015', + 'geoId/06023', + ] } """ dcids = filter(lambda v: v==v, dcids) # Filter out NaN values diff --git a/datacommons/test/places_test.py b/datacommons/test/places_test.py index e0f9670d..a1d7eef8 100644 --- a/datacommons/test/places_test.py +++ b/datacommons/test/places_test.py @@ -87,6 +87,70 @@ def read(self): # Response returned when no dcids are given. return MockResponse(json.dumps({'payload': res_json})) + + # Mock responses for urlopen requests to get_stats. + if req.full_url == utils._API_ROOT + utils._API_ENDPOINTS['get_stats']: + if (data['dcids'] == ['geoId/05', 'geoId/06'] and + data['stats_var'] == 'dc/0hyp6tkn18vcb'): + # Response returned when querying for multiple valid dcids. + res_json = json.dumps({ + 'geoId/05': { + 'data': { + '2011': 18136, + '2012': 17279, + '2013': 17459, + '2014': 16966, + '2015': 17173, + '2016': 17041, + '2017': 17783, + '2018': 18003 + }, + 'place_name': 'Arkansas' + }, + 'geoId/06': { + 'data': { + '2011': 316667, + '2012': 324116, + '2013': 331853, + '2014': 342818, + '2015': 348979, + '2016': 354806, + '2017': 360645, + '2018': 366331 + }, + 'place_name': 'California' + } + }) + return MockResponse(json.dumps({'payload': res_json})) + if (data['dcids'] == ['geoId/05', 'dc/MadDcid'] and + data['stats_var'] == 'dc/0hyp6tkn18vcb'): + # Response returned when querying for a dcid that does not exist. + res_json = json.dumps({ + 'geoId/05': { + 'data': { + '2011': 18136, + '2012': 17279, + '2013': 17459, + '2014': 16966, + '2015': 17173, + '2016': 17041, + '2017': 17783, + '2018': 18003 + }, + 'place_name': 'Arkansas' + } + }) + return MockResponse(json.dumps({'payload': res_json})) + if (data['dcids'] == ['dc/MadDcid', 'dc/MadderDcid'] and + data['stats_var'] == 'dc/0hyp6tkn18vcb'): + # Response returned when both given dcids do not exist. + res_json = json.dumps([]) + return MockResponse(json.dumps({'payload': res_json})) + if data['dcids'] == [] and data['stats_var'] == 'dc/0hyp6tkn18vcb': + res_json = json.dumps([]) + # Response returned when no dcids are given. + return MockResponse(json.dumps({'payload': res_json})) + # Otherwise, return an empty response and a 404. return urllib.error.HTTPError @@ -142,5 +206,53 @@ def test_no_dcids(self, urlopen): }) +class TestGetStats(unittest.TestCase): + """ Unit stests for get_stats. """ + + @mock.patch('urllib.request.urlopen', side_effect=request_mock) + def test_multiple_dcids(self): + """ Calling get_places_in with proper dcids returns valid results. """ + # Set the API key + dc.set_api_key('TEST-API-KEY') + + # Call get_places_in + stats = dc.get_stats(['geoId/05', 'geoId/06'], 'dc/0hyp6tkn18vcb') + self.assertDictEqual( + stats, { + 'geoId/06085': ['geoId/0649670'], + 'geoId/24031': ['geoId/2467675', 'geoId/2476650'] + }) + + @mock.patch('urllib.request.urlopen', side_effect=request_mock) + def test_bad_dcids(self): + """ Calling get_places_in with dcids that do not exist returns empty + results. + """ + # Set the API key + dc.set_api_key('TEST-API-KEY') + + # Call get_places_in with one dcid that does not exist + bad_dcids_1 = dc.get_stats(['geoId/05', 'dc/MadDcid'], 'dc/0hyp6tkn18vcb') + self.assertDictEqual(bad_dcids_1, { + 'geoId/06085': ['geoId/0649670'], + 'dc/MadDcid': [] + }) + + # Call get_places_in when both dcids do not exist + bad_dcids_2 = dc.get_stats(['dc/MadDcid', 'dc/MadderDcid'], + 'dc/0hyp6tkn18vcb') + self.assertDictEqual(bad_dcids_2, {'dc/MadDcid': [], 'dc/MadderDcid': []}) + + @mock.patch('urllib.request.urlopen', side_effect=request_mock) + def test_no_dcids(self): + """ Calling get_places_in with no dcids returns empty results. """ + # Set the API key + dc.set_api_key('TEST-API-KEY') + + # Call get_places_in with no valid dcids. + no_dcids = dc.get_stats([], 'dc/0hyp6tkn18vcb') + self.assertDictEqual(no_dcids, {}) + + if __name__ == '__main__': unittest.main() diff --git a/datacommons/utils.py b/datacommons/utils.py index 9189aee2..cbfe64f9 100644 --- a/datacommons/utils.py +++ b/datacommons/utils.py @@ -48,6 +48,7 @@ 'get_observations': '/node/observations', 'get_pop_obs': '/bulk/pop-obs', 'get_place_obs': '/bulk/place-obs', + 'get_stats': '/bulk/stats', } # The default value to limit to From 6a6a4a7d1b0cea405cfc72feb6c6310c726ae12c Mon Sep 17 00:00:00 2001 From: tjann Date: Fri, 24 Apr 2020 14:07:19 -0700 Subject: [PATCH 2/7] Restore urlopen to test, need for mocking. --- datacommons/test/places_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datacommons/test/places_test.py b/datacommons/test/places_test.py index a1d7eef8..7eecf5e5 100644 --- a/datacommons/test/places_test.py +++ b/datacommons/test/places_test.py @@ -210,7 +210,7 @@ class TestGetStats(unittest.TestCase): """ Unit stests for get_stats. """ @mock.patch('urllib.request.urlopen', side_effect=request_mock) - def test_multiple_dcids(self): + def test_multiple_dcids(self, urlopen): """ Calling get_places_in with proper dcids returns valid results. """ # Set the API key dc.set_api_key('TEST-API-KEY') @@ -224,7 +224,7 @@ def test_multiple_dcids(self): }) @mock.patch('urllib.request.urlopen', side_effect=request_mock) - def test_bad_dcids(self): + def test_bad_dcids(self, urlopen): """ Calling get_places_in with dcids that do not exist returns empty results. """ @@ -244,7 +244,7 @@ def test_bad_dcids(self): self.assertDictEqual(bad_dcids_2, {'dc/MadDcid': [], 'dc/MadderDcid': []}) @mock.patch('urllib.request.urlopen', side_effect=request_mock) - def test_no_dcids(self): + def test_no_dcids(self, urlopen): """ Calling get_places_in with no dcids returns empty results. """ # Set the API key dc.set_api_key('TEST-API-KEY') From cf73f113c582729527a32ec347c795dc05cdc2f2 Mon Sep 17 00:00:00 2001 From: tjann Date: Fri, 24 Apr 2020 14:15:25 -0700 Subject: [PATCH 3/7] Update get_stats test with get stats response, not get_places_in response --- datacommons/test/places_test.py | 59 +++++++++++++++++++++++++++------ 1 file changed, 48 insertions(+), 11 deletions(-) diff --git a/datacommons/test/places_test.py b/datacommons/test/places_test.py index 7eecf5e5..ff5f528a 100644 --- a/datacommons/test/places_test.py +++ b/datacommons/test/places_test.py @@ -215,12 +215,36 @@ def test_multiple_dcids(self, urlopen): # Set the API key dc.set_api_key('TEST-API-KEY') - # Call get_places_in + # Call get_stats stats = dc.get_stats(['geoId/05', 'geoId/06'], 'dc/0hyp6tkn18vcb') self.assertDictEqual( stats, { - 'geoId/06085': ['geoId/0649670'], - 'geoId/24031': ['geoId/2467675', 'geoId/2476650'] + 'geoId/05': { + 'data': { + '2011': 18136, + '2012': 17279, + '2013': 17459, + '2014': 16966, + '2015': 17173, + '2016': 17041, + '2017': 17783, + '2018': 18003 + }, + 'place_name': 'Arkansas' + }, + 'geoId/06': { + 'data': { + '2011': 316667, + '2012': 324116, + '2013': 331853, + '2014': 342818, + '2015': 348979, + '2016': 354806, + '2017': 360645, + '2018': 366331 + }, + 'place_name': 'California' + } }) @mock.patch('urllib.request.urlopen', side_effect=request_mock) @@ -231,17 +255,30 @@ def test_bad_dcids(self, urlopen): # Set the API key dc.set_api_key('TEST-API-KEY') - # Call get_places_in with one dcid that does not exist + # Call get_stats with one dcid that does not exist bad_dcids_1 = dc.get_stats(['geoId/05', 'dc/MadDcid'], 'dc/0hyp6tkn18vcb') - self.assertDictEqual(bad_dcids_1, { - 'geoId/06085': ['geoId/0649670'], - 'dc/MadDcid': [] - }) + self.assertDictEqual( + bad_dcids_1, { + 'geoId/05': { + 'data': { + '2011': 18136, + '2012': 17279, + '2013': 17459, + '2014': 16966, + '2015': 17173, + '2016': 17041, + '2017': 17783, + '2018': 18003 + }, + 'place_name': 'Arkansas' + }, + 'dc/MadDcid': {} + }) - # Call get_places_in when both dcids do not exist + # Call get_stats when both dcids do not exist bad_dcids_2 = dc.get_stats(['dc/MadDcid', 'dc/MadderDcid'], 'dc/0hyp6tkn18vcb') - self.assertDictEqual(bad_dcids_2, {'dc/MadDcid': [], 'dc/MadderDcid': []}) + self.assertDictEqual(bad_dcids_2, {'dc/MadDcid': {}, 'dc/MadderDcid': {}}) @mock.patch('urllib.request.urlopen', side_effect=request_mock) def test_no_dcids(self, urlopen): @@ -249,7 +286,7 @@ def test_no_dcids(self, urlopen): # Set the API key dc.set_api_key('TEST-API-KEY') - # Call get_places_in with no valid dcids. + # Call get_stats with no valid dcids. no_dcids = dc.get_stats([], 'dc/0hyp6tkn18vcb') self.assertDictEqual(no_dcids, {}) From 196364b0382aeefd3fb95930b708fb3bfe743db5 Mon Sep 17 00:00:00 2001 From: tjann Date: Fri, 24 Apr 2020 14:31:50 -0700 Subject: [PATCH 4/7] Update GetStatsTest test docstrings. --- datacommons/test/places_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datacommons/test/places_test.py b/datacommons/test/places_test.py index ff5f528a..37681752 100644 --- a/datacommons/test/places_test.py +++ b/datacommons/test/places_test.py @@ -211,7 +211,7 @@ class TestGetStats(unittest.TestCase): @mock.patch('urllib.request.urlopen', side_effect=request_mock) def test_multiple_dcids(self, urlopen): - """ Calling get_places_in with proper dcids returns valid results. """ + """ Calling get_stats with proper dcids returns valid results. """ # Set the API key dc.set_api_key('TEST-API-KEY') @@ -249,7 +249,7 @@ def test_multiple_dcids(self, urlopen): @mock.patch('urllib.request.urlopen', side_effect=request_mock) def test_bad_dcids(self, urlopen): - """ Calling get_places_in with dcids that do not exist returns empty + """ Calling get_stats with dcids that do not exist returns empty results. """ # Set the API key @@ -282,7 +282,7 @@ def test_bad_dcids(self, urlopen): @mock.patch('urllib.request.urlopen', side_effect=request_mock) def test_no_dcids(self, urlopen): - """ Calling get_places_in with no dcids returns empty results. """ + """ Calling get_stats with no dcids returns empty results. """ # Set the API key dc.set_api_key('TEST-API-KEY') From 795f3ffd178116a6cfb31a4dc1680cf76917a9bc Mon Sep 17 00:00:00 2001 From: tjann Date: Fri, 24 Apr 2020 14:37:04 -0700 Subject: [PATCH 5/7] REST API uses place instead of dcids for place key var name. --- datacommons/test/places_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datacommons/test/places_test.py b/datacommons/test/places_test.py index 37681752..a71e93ad 100644 --- a/datacommons/test/places_test.py +++ b/datacommons/test/places_test.py @@ -90,7 +90,7 @@ def read(self): # Mock responses for urlopen requests to get_stats. if req.full_url == utils._API_ROOT + utils._API_ENDPOINTS['get_stats']: - if (data['dcids'] == ['geoId/05', 'geoId/06'] and + if (data['place'] == ['geoId/05', 'geoId/06'] and data['stats_var'] == 'dc/0hyp6tkn18vcb'): # Response returned when querying for multiple valid dcids. res_json = json.dumps({ @@ -122,7 +122,7 @@ def read(self): } }) return MockResponse(json.dumps({'payload': res_json})) - if (data['dcids'] == ['geoId/05', 'dc/MadDcid'] and + if (data['place'] == ['geoId/05', 'dc/MadDcid'] and data['stats_var'] == 'dc/0hyp6tkn18vcb'): # Response returned when querying for a dcid that does not exist. res_json = json.dumps({ @@ -141,12 +141,12 @@ def read(self): } }) return MockResponse(json.dumps({'payload': res_json})) - if (data['dcids'] == ['dc/MadDcid', 'dc/MadderDcid'] and + if (data['place'] == ['dc/MadDcid', 'dc/MadderDcid'] and data['stats_var'] == 'dc/0hyp6tkn18vcb'): # Response returned when both given dcids do not exist. res_json = json.dumps([]) return MockResponse(json.dumps({'payload': res_json})) - if data['dcids'] == [] and data['stats_var'] == 'dc/0hyp6tkn18vcb': + if data['place'] == [] and data['stats_var'] == 'dc/0hyp6tkn18vcb': res_json = json.dumps([]) # Response returned when no dcids are given. return MockResponse(json.dumps({'payload': res_json})) From 927eba87a091bfd2961680202d6b42de03c0fb83 Mon Sep 17 00:00:00 2001 From: tjann Date: Fri, 24 Apr 2020 14:49:58 -0700 Subject: [PATCH 6/7] Update tests to match default REST/JSON output. --- datacommons/examples/places.py | 3 ++- datacommons/test/places_test.py | 9 ++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/datacommons/examples/places.py b/datacommons/examples/places.py index 492bfc6c..ae82196a 100644 --- a/datacommons/examples/places.py +++ b/datacommons/examples/places.py @@ -42,10 +42,11 @@ def main(): # Get place stats. print('Get place stats') - stats = dc.get_stats(['geoId/05', 'geoId/06'], 'dc/0hyp6tkn18vcb') + stats = dc.get_stats(['geoId/05', 'geoId/06', 'dc/madDcid'], 'dc/0hyp6tkn18vcb') print(stats) # Get related places. +# TODO(*): Fix the related places example. # print('Get related places') # related_places = dc.get_related_places(['geoId/06085'], 'Person', 'count', # 'CensusACS5yrSurvey', "measuredValue", {"gender": "Female"}) diff --git a/datacommons/test/places_test.py b/datacommons/test/places_test.py index a71e93ad..8d95378b 100644 --- a/datacommons/test/places_test.py +++ b/datacommons/test/places_test.py @@ -271,14 +271,13 @@ def test_bad_dcids(self, urlopen): '2018': 18003 }, 'place_name': 'Arkansas' - }, - 'dc/MadDcid': {} + } }) # Call get_stats when both dcids do not exist bad_dcids_2 = dc.get_stats(['dc/MadDcid', 'dc/MadderDcid'], 'dc/0hyp6tkn18vcb') - self.assertDictEqual(bad_dcids_2, {'dc/MadDcid': {}, 'dc/MadderDcid': {}}) + self.assertDictEqual(bad_dcids_2, {}) @mock.patch('urllib.request.urlopen', side_effect=request_mock) def test_no_dcids(self, urlopen): @@ -286,9 +285,9 @@ def test_no_dcids(self, urlopen): # Set the API key dc.set_api_key('TEST-API-KEY') - # Call get_stats with no valid dcids. + # Call get_stats with no dcids. no_dcids = dc.get_stats([], 'dc/0hyp6tkn18vcb') - self.assertDictEqual(no_dcids, {}) + self.assertFalse(no_dcids) if __name__ == '__main__': From d37ba9ff0f2f7badbc25352454ef2a14c6758282 Mon Sep 17 00:00:00 2001 From: tjann Date: Fri, 24 Apr 2020 14:54:10 -0700 Subject: [PATCH 7/7] Update tests to match default REST/JSON output. --- datacommons/test/places_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datacommons/test/places_test.py b/datacommons/test/places_test.py index 8d95378b..5222319d 100644 --- a/datacommons/test/places_test.py +++ b/datacommons/test/places_test.py @@ -277,7 +277,7 @@ def test_bad_dcids(self, urlopen): # Call get_stats when both dcids do not exist bad_dcids_2 = dc.get_stats(['dc/MadDcid', 'dc/MadderDcid'], 'dc/0hyp6tkn18vcb') - self.assertDictEqual(bad_dcids_2, {}) + self.assertFalse(bad_dcids_2) @mock.patch('urllib.request.urlopen', side_effect=request_mock) def test_no_dcids(self, urlopen):