diff --git a/datacommons/__init__.py b/datacommons/__init__.py index 6400a4da..e96aa3bb 100644 --- a/datacommons/__init__.py +++ b/datacommons/__init__.py @@ -17,7 +17,7 @@ # Data Commons Python Client API from datacommons.core import get_property_labels, get_property_values, get_triples -from datacommons.places import get_places_in, get_related_places +from datacommons.places import get_places_in, get_related_places, get_stats from datacommons.populations import get_populations, get_observations, get_pop_obs, get_place_obs # Other utilities diff --git a/datacommons/examples/places.py b/datacommons/examples/places.py index bb5a9f4a..ae82196a 100644 --- a/datacommons/examples/places.py +++ b/datacommons/examples/places.py @@ -40,12 +40,17 @@ def main(): for dcid in tracts[mc][:10]: print(' - {}'.format(dcid)) + # Get place stats. + print('Get place stats') + stats = dc.get_stats(['geoId/05', 'geoId/06', 'dc/madDcid'], 'dc/0hyp6tkn18vcb') + print(stats) + # Get related places. - print('Get related places') - # TODO(*): s/Cenus/Census/g when data is ready in BT. - related_places = dc.get_related_places(['geoId/06085'], 'Person', 'count', - 'CenusACS5yrSurvey', "measuredValue", {"gender": "Female"}) - print(related_places) +# TODO(*): Fix the related places example. +# print('Get related places') +# related_places = dc.get_related_places(['geoId/06085'], 'Person', 'count', +# 'CensusACS5yrSurvey', "measuredValue", {"gender": "Female"}) +# print(related_places) if __name__ == '__main__': diff --git a/datacommons/places.py b/datacommons/places.py index 354333cc..9bb0ccc9 100644 --- a/datacommons/places.py +++ b/datacommons/places.py @@ -49,15 +49,15 @@ def get_places_in(dcids, place_type): >>> get_places_in(["geoId/06"], "County") { - 'geoId/06': [ - 'geoId/06041', - 'geoId/06089', - 'geoId/06015', - 'geoId/06023', - 'geoId/06067', - ... - # and 53 more - ] + 'geoId/06': [ + 'geoId/06041', + 'geoId/06089', + 'geoId/06015', + 'geoId/06023', + 'geoId/06067', + ... + # and 53 more + ] } """ dcids = filter(lambda v: v==v, dcids) # Filter out NaN values @@ -71,7 +71,72 @@ def get_places_in(dcids, place_type): # Create the results and format it appropriately result = utils._format_expand_payload(payload, 'place', must_exist=dcids) return result + +def get_stats(dcids, stats_var): + """ Returns :obj:`TimeSeries` for :code:`dcids` \ + based on the :code:`stats_var`. + Args: + dcids (:obj:`iterable` of :obj:`str`): Dcids of places to query for. + stats_var (:obj:`str`): The dcid of the :obj:StatisticalVariable. + Returns: + A :obj:`dict` mapping the :obj:`Place` identified by the given :code:`dcid` + to its place name and the :obj:`TimeSeries` associated with the + :obj:`StatisticalVariable` identified by the given :code:`stats_var`. + See example below for more detail about how the returned :obj:`dict` is + structured. + + Raises: + ValueError: If the payload returned by the Data Commons REST API is + malformed. + + Examples: + We would like to get the :obj:`TimeSeries` of the number of males + at least 25 years old that attended 12th grade but did not receive + a high school diploma + (`dc/0hyp6tkn18vcb `_) + in `Arkansas `_ + and `California `_. + + >>> get_stats(["geoId/05", "geoId/06"], "dc/0hyp6tkn18vcb") + { + 'geoId/05': { + 'place_name': 'Arkansas' + 'data': { + '2011':18136, + '2012':17279, + '2013':17459, + '2014':16966, + '2015':17173, + '2016':17041, + '2017':17783, + '2018':18003 + }, + }, + 'geoId/05': { + 'place_name': 'California' + 'data': { + '2011':316667, + '2012':324116, + '2013':331853, + '2014':342818, + '2015':348979, + '2016':354806, + '2017':360645, + '2018':366331 + }, + }, + } + """ + dcids = filter(lambda v: v==v, dcids) # Filter out NaN values + dcids = list(dcids) + url = utils._API_ROOT + utils._API_ENDPOINTS['get_stats'] + payload = utils._send_request(url, req_json={ + 'place': dcids, + 'stats_var': stats_var, + }) + + return payload def get_related_places(dcids, population_type, measured_property, measurement_method, stat_type, constraining_properties={}, @@ -112,12 +177,12 @@ def get_related_places(dcids, population_type, measured_property, "gender": "Female" }, "count", "CenusACS5yrSurvey", "measuredValue") { - 'geoId/06085': [ - 'geoId/06041', - 'geoId/06089', - 'geoId/06015', - 'geoId/06023', - ] + 'geoId/06085': [ + 'geoId/06041', + 'geoId/06089', + 'geoId/06015', + 'geoId/06023', + ] } """ dcids = filter(lambda v: v==v, dcids) # Filter out NaN values diff --git a/datacommons/test/places_test.py b/datacommons/test/places_test.py index e0f9670d..5222319d 100644 --- a/datacommons/test/places_test.py +++ b/datacommons/test/places_test.py @@ -87,6 +87,70 @@ def read(self): # Response returned when no dcids are given. return MockResponse(json.dumps({'payload': res_json})) + + # Mock responses for urlopen requests to get_stats. + if req.full_url == utils._API_ROOT + utils._API_ENDPOINTS['get_stats']: + if (data['place'] == ['geoId/05', 'geoId/06'] and + data['stats_var'] == 'dc/0hyp6tkn18vcb'): + # Response returned when querying for multiple valid dcids. + res_json = json.dumps({ + 'geoId/05': { + 'data': { + '2011': 18136, + '2012': 17279, + '2013': 17459, + '2014': 16966, + '2015': 17173, + '2016': 17041, + '2017': 17783, + '2018': 18003 + }, + 'place_name': 'Arkansas' + }, + 'geoId/06': { + 'data': { + '2011': 316667, + '2012': 324116, + '2013': 331853, + '2014': 342818, + '2015': 348979, + '2016': 354806, + '2017': 360645, + '2018': 366331 + }, + 'place_name': 'California' + } + }) + return MockResponse(json.dumps({'payload': res_json})) + if (data['place'] == ['geoId/05', 'dc/MadDcid'] and + data['stats_var'] == 'dc/0hyp6tkn18vcb'): + # Response returned when querying for a dcid that does not exist. + res_json = json.dumps({ + 'geoId/05': { + 'data': { + '2011': 18136, + '2012': 17279, + '2013': 17459, + '2014': 16966, + '2015': 17173, + '2016': 17041, + '2017': 17783, + '2018': 18003 + }, + 'place_name': 'Arkansas' + } + }) + return MockResponse(json.dumps({'payload': res_json})) + if (data['place'] == ['dc/MadDcid', 'dc/MadderDcid'] and + data['stats_var'] == 'dc/0hyp6tkn18vcb'): + # Response returned when both given dcids do not exist. + res_json = json.dumps([]) + return MockResponse(json.dumps({'payload': res_json})) + if data['place'] == [] and data['stats_var'] == 'dc/0hyp6tkn18vcb': + res_json = json.dumps([]) + # Response returned when no dcids are given. + return MockResponse(json.dumps({'payload': res_json})) + # Otherwise, return an empty response and a 404. return urllib.error.HTTPError @@ -142,5 +206,89 @@ def test_no_dcids(self, urlopen): }) +class TestGetStats(unittest.TestCase): + """ Unit stests for get_stats. """ + + @mock.patch('urllib.request.urlopen', side_effect=request_mock) + def test_multiple_dcids(self, urlopen): + """ Calling get_stats with proper dcids returns valid results. """ + # Set the API key + dc.set_api_key('TEST-API-KEY') + + # Call get_stats + stats = dc.get_stats(['geoId/05', 'geoId/06'], 'dc/0hyp6tkn18vcb') + self.assertDictEqual( + stats, { + 'geoId/05': { + 'data': { + '2011': 18136, + '2012': 17279, + '2013': 17459, + '2014': 16966, + '2015': 17173, + '2016': 17041, + '2017': 17783, + '2018': 18003 + }, + 'place_name': 'Arkansas' + }, + 'geoId/06': { + 'data': { + '2011': 316667, + '2012': 324116, + '2013': 331853, + '2014': 342818, + '2015': 348979, + '2016': 354806, + '2017': 360645, + '2018': 366331 + }, + 'place_name': 'California' + } + }) + + @mock.patch('urllib.request.urlopen', side_effect=request_mock) + def test_bad_dcids(self, urlopen): + """ Calling get_stats with dcids that do not exist returns empty + results. + """ + # Set the API key + dc.set_api_key('TEST-API-KEY') + + # Call get_stats with one dcid that does not exist + bad_dcids_1 = dc.get_stats(['geoId/05', 'dc/MadDcid'], 'dc/0hyp6tkn18vcb') + self.assertDictEqual( + bad_dcids_1, { + 'geoId/05': { + 'data': { + '2011': 18136, + '2012': 17279, + '2013': 17459, + '2014': 16966, + '2015': 17173, + '2016': 17041, + '2017': 17783, + '2018': 18003 + }, + 'place_name': 'Arkansas' + } + }) + + # Call get_stats when both dcids do not exist + bad_dcids_2 = dc.get_stats(['dc/MadDcid', 'dc/MadderDcid'], + 'dc/0hyp6tkn18vcb') + self.assertFalse(bad_dcids_2) + + @mock.patch('urllib.request.urlopen', side_effect=request_mock) + def test_no_dcids(self, urlopen): + """ Calling get_stats with no dcids returns empty results. """ + # Set the API key + dc.set_api_key('TEST-API-KEY') + + # Call get_stats with no dcids. + no_dcids = dc.get_stats([], 'dc/0hyp6tkn18vcb') + self.assertFalse(no_dcids) + + if __name__ == '__main__': unittest.main() diff --git a/datacommons/utils.py b/datacommons/utils.py index 9189aee2..cbfe64f9 100644 --- a/datacommons/utils.py +++ b/datacommons/utils.py @@ -48,6 +48,7 @@ 'get_observations': '/node/observations', 'get_pop_obs': '/bulk/pop-obs', 'get_place_obs': '/bulk/place-obs', + 'get_stats': '/bulk/stats', } # The default value to limit to