Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion datacommons/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

# Data Commons Python Client API
from datacommons.core import get_property_labels, get_property_values, get_triples
from datacommons.places import get_places_in, get_related_places
from datacommons.places import get_places_in, get_related_places, get_stats
from datacommons.populations import get_populations, get_observations, get_pop_obs, get_place_obs

# Other utilities
Expand Down
15 changes: 10 additions & 5 deletions datacommons/examples/places.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,17 @@ def main():
for dcid in tracts[mc][:10]:
print(' - {}'.format(dcid))

# Get place stats.
print('Get place stats')
stats = dc.get_stats(['geoId/05', 'geoId/06', 'dc/madDcid'], 'dc/0hyp6tkn18vcb')
print(stats)

# Get related places.
print('Get related places')
# TODO(*): s/Cenus/Census/g when data is ready in BT.
related_places = dc.get_related_places(['geoId/06085'], 'Person', 'count',
'CenusACS5yrSurvey', "measuredValue", {"gender": "Female"})
print(related_places)
# TODO(*): Fix the related places example.
# print('Get related places')
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this commented?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was broken on current master, I've added a TODO.

# related_places = dc.get_related_places(['geoId/06085'], 'Person', 'count',
# 'CensusACS5yrSurvey', "measuredValue", {"gender": "Female"})
# print(related_places)


if __name__ == '__main__':
Expand Down
95 changes: 80 additions & 15 deletions datacommons/places.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,15 @@ def get_places_in(dcids, place_type):

>>> get_places_in(["geoId/06"], "County")
{
'geoId/06': [
'geoId/06041',
'geoId/06089',
'geoId/06015',
'geoId/06023',
'geoId/06067',
...
# and 53 more
]
'geoId/06': [
'geoId/06041',
'geoId/06089',
'geoId/06015',
'geoId/06023',
'geoId/06067',
...
# and 53 more
]
}
"""
dcids = filter(lambda v: v==v, dcids) # Filter out NaN values
Expand All @@ -71,7 +71,72 @@ def get_places_in(dcids, place_type):
# Create the results and format it appropriately
result = utils._format_expand_payload(payload, 'place', must_exist=dcids)
return result

def get_stats(dcids, stats_var):
""" Returns :obj:`TimeSeries` for :code:`dcids` \
based on the :code:`stats_var`.

Args:
dcids (:obj:`iterable` of :obj:`str`): Dcids of places to query for.
stats_var (:obj:`str`): The dcid of the :obj:StatisticalVariable.
Returns:
A :obj:`dict` mapping the :obj:`Place` identified by the given :code:`dcid`
to its place name and the :obj:`TimeSeries` associated with the
:obj:`StatisticalVariable` identified by the given :code:`stats_var`.
See example below for more detail about how the returned :obj:`dict` is
structured.

Raises:
ValueError: If the payload returned by the Data Commons REST API is
malformed.

Examples:
We would like to get the :obj:`TimeSeries` of the number of males
at least 25 years old that attended 12th grade but did not receive
a high school diploma
(`dc/0hyp6tkn18vcb <https://browser.datacommons.org/kg?dcid=dc/0hyp6tkn18vcb>`_)
in `Arkansas <https://browser.datacommons.org/kg?dcid=geoId/05>`_
and `California <https://browser.datacommons.org/kg?dcid=geoId/06>`_.

>>> get_stats(["geoId/05", "geoId/06"], "dc/0hyp6tkn18vcb")
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mahesh says the rollout is complete, could you use a human-curated DCID in this example?

(Adding a TODO in this PR is fine, if it needs more work.)

{
'geoId/05': {
'place_name': 'Arkansas'
'data': {
'2011':18136,
'2012':17279,
'2013':17459,
'2014':16966,
'2015':17173,
'2016':17041,
'2017':17783,
'2018':18003
},
},
'geoId/05': {
'place_name': 'California'
'data': {
'2011':316667,
'2012':324116,
'2013':331853,
'2014':342818,
'2015':348979,
'2016':354806,
'2017':360645,
'2018':366331
},
},
}
"""
dcids = filter(lambda v: v==v, dcids) # Filter out NaN values
dcids = list(dcids)
url = utils._API_ROOT + utils._API_ENDPOINTS['get_stats']
payload = utils._send_request(url, req_json={
'place': dcids,
'stats_var': stats_var,
})

return payload

def get_related_places(dcids, population_type, measured_property,
measurement_method, stat_type, constraining_properties={},
Expand Down Expand Up @@ -112,12 +177,12 @@ def get_related_places(dcids, population_type, measured_property,
"gender": "Female"
}, "count", "CenusACS5yrSurvey", "measuredValue")
{
'geoId/06085': [
'geoId/06041',
'geoId/06089',
'geoId/06015',
'geoId/06023',
]
'geoId/06085': [
'geoId/06041',
'geoId/06089',
'geoId/06015',
'geoId/06023',
]
}
"""
dcids = filter(lambda v: v==v, dcids) # Filter out NaN values
Expand Down
148 changes: 148 additions & 0 deletions datacommons/test/places_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,70 @@ def read(self):
# Response returned when no dcids are given.
return MockResponse(json.dumps({'payload': res_json}))


# Mock responses for urlopen requests to get_stats.
if req.full_url == utils._API_ROOT + utils._API_ENDPOINTS['get_stats']:
if (data['place'] == ['geoId/05', 'geoId/06'] and
data['stats_var'] == 'dc/0hyp6tkn18vcb'):
# Response returned when querying for multiple valid dcids.
res_json = json.dumps({
'geoId/05': {
'data': {
'2011': 18136,
'2012': 17279,
'2013': 17459,
'2014': 16966,
'2015': 17173,
'2016': 17041,
'2017': 17783,
'2018': 18003
},
'place_name': 'Arkansas'
},
'geoId/06': {
'data': {
'2011': 316667,
'2012': 324116,
'2013': 331853,
'2014': 342818,
'2015': 348979,
'2016': 354806,
'2017': 360645,
'2018': 366331
},
'place_name': 'California'
}
})
return MockResponse(json.dumps({'payload': res_json}))
if (data['place'] == ['geoId/05', 'dc/MadDcid'] and
data['stats_var'] == 'dc/0hyp6tkn18vcb'):
# Response returned when querying for a dcid that does not exist.
res_json = json.dumps({
'geoId/05': {
'data': {
'2011': 18136,
'2012': 17279,
'2013': 17459,
'2014': 16966,
'2015': 17173,
'2016': 17041,
'2017': 17783,
'2018': 18003
},
'place_name': 'Arkansas'
}
})
return MockResponse(json.dumps({'payload': res_json}))
if (data['place'] == ['dc/MadDcid', 'dc/MadderDcid'] and
data['stats_var'] == 'dc/0hyp6tkn18vcb'):
# Response returned when both given dcids do not exist.
res_json = json.dumps([])
return MockResponse(json.dumps({'payload': res_json}))
if data['place'] == [] and data['stats_var'] == 'dc/0hyp6tkn18vcb':
res_json = json.dumps([])
# Response returned when no dcids are given.
return MockResponse(json.dumps({'payload': res_json}))

# Otherwise, return an empty response and a 404.
return urllib.error.HTTPError

Expand Down Expand Up @@ -142,5 +206,89 @@ def test_no_dcids(self, urlopen):
})


class TestGetStats(unittest.TestCase):
""" Unit stests for get_stats. """

@mock.patch('urllib.request.urlopen', side_effect=request_mock)
def test_multiple_dcids(self, urlopen):
""" Calling get_stats with proper dcids returns valid results. """
# Set the API key
dc.set_api_key('TEST-API-KEY')

# Call get_stats
stats = dc.get_stats(['geoId/05', 'geoId/06'], 'dc/0hyp6tkn18vcb')
self.assertDictEqual(
stats, {
'geoId/05': {
'data': {
'2011': 18136,
'2012': 17279,
'2013': 17459,
'2014': 16966,
'2015': 17173,
'2016': 17041,
'2017': 17783,
'2018': 18003
},
'place_name': 'Arkansas'
},
'geoId/06': {
'data': {
'2011': 316667,
'2012': 324116,
'2013': 331853,
'2014': 342818,
'2015': 348979,
'2016': 354806,
'2017': 360645,
'2018': 366331
},
'place_name': 'California'
}
})

@mock.patch('urllib.request.urlopen', side_effect=request_mock)
def test_bad_dcids(self, urlopen):
""" Calling get_stats with dcids that do not exist returns empty
results.
"""
# Set the API key
dc.set_api_key('TEST-API-KEY')

# Call get_stats with one dcid that does not exist
bad_dcids_1 = dc.get_stats(['geoId/05', 'dc/MadDcid'], 'dc/0hyp6tkn18vcb')
self.assertDictEqual(
bad_dcids_1, {
'geoId/05': {
'data': {
'2011': 18136,
'2012': 17279,
'2013': 17459,
'2014': 16966,
'2015': 17173,
'2016': 17041,
'2017': 17783,
'2018': 18003
},
'place_name': 'Arkansas'
}
})

# Call get_stats when both dcids do not exist
bad_dcids_2 = dc.get_stats(['dc/MadDcid', 'dc/MadderDcid'],
'dc/0hyp6tkn18vcb')
self.assertFalse(bad_dcids_2)

@mock.patch('urllib.request.urlopen', side_effect=request_mock)
def test_no_dcids(self, urlopen):
""" Calling get_stats with no dcids returns empty results. """
# Set the API key
dc.set_api_key('TEST-API-KEY')

# Call get_stats with no dcids.
no_dcids = dc.get_stats([], 'dc/0hyp6tkn18vcb')
self.assertFalse(no_dcids)


if __name__ == '__main__':
unittest.main()
1 change: 1 addition & 0 deletions datacommons/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
'get_observations': '/node/observations',
'get_pop_obs': '/bulk/pop-obs',
'get_place_obs': '/bulk/place-obs',
'get_stats': '/bulk/stats',
}

# The default value to limit to
Expand Down