Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions datacommons/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
package(default_visibility = ["//visibility:public"])
load("@requirements//:requirements.bzl", "requirement")

py_library(
name = "datacommons",
srcs = glob(["*.py"]),
deps = [
requirement("pandas"),
]
)
2 changes: 1 addition & 1 deletion datacommons/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@
from datacommons.populations import get_populations, get_observations, get_pop_obs, get_place_obs

# Other utilities
from .utils import set_api_key, clean_frame, flatten_frame
from .utils import set_api_key
39 changes: 9 additions & 30 deletions datacommons/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@

from collections import defaultdict

import pandas as pd

import datacommons.utils as utils
import requests

Expand All @@ -40,7 +38,7 @@ def get_property_labels(dcids, out=True):
""" Returns the labels of properties defined for the given :code:`dcids`.

Args:
dcids (:obj:`list` of :obj:`str`): A list of nodes identified by their
dcids (:obj:`iterable` of :obj:`str`): A list of nodes identified by their
dcids.
out (:obj:`bool`, optional): Whether or not the property points away from
the given list of nodes.
Expand Down Expand Up @@ -99,6 +97,7 @@ def get_property_labels(dcids, out=True):
}
"""
# Generate the GetProperty query and send the request
dcids = list(dcids)
url = utils._API_ROOT + utils._API_ENDPOINTS['get_property_labels']
payload = utils._send_request(url, req_json={'dcids': dcids})

Expand All @@ -120,8 +119,7 @@ def get_property_values(dcids,
""" Returns property values of given :code:`dcids` along the given property.

Args:
dcids (Union[:obj:`list` of :obj:`str`, :obj:`pandas.Series`]): dcids to get
property values for.
dcids (:obj:`iterable` of :obj:`str`): dcids to get property values for.
prop (:obj:`str`): The property to get property values for.
out (:obj:`bool`, optional): A flag that indicates the property is directed
away from the given nodes when set to true.
Expand All @@ -131,15 +129,8 @@ def get_property_values(dcids,
aggregated over all given nodes.

Returns:
When :code:`dcids` is an instance of :obj:`list`, the returned property
values are formatted as a :obj:`dict` from a given dcid to a list of its
property values.

When :code:`dcids` is an instance of :obj:`pandas.Series`, the returned
property values are formatted as a :obj:`pandas.Series` where the `i`-th
entry corresponds to property values associated with the `i`-th given dcid.
The cells of the returned series will always contain a :obj:`list` of
property values.
Returned property values are formatted as a :obj:`dict` from a given dcid
to a list of its property values.

Raises:
ValueError: If the payload returned by the Data Commons REST API is
Expand All @@ -160,21 +151,11 @@ def get_property_values(dcids,
"geoId/21": ["Kentucky"],
"geoId/24": ["Maryland"],
}

Next, we specify :code:`dcids` as a :obj:`pandas.Series`

>>> import pandas as pd
>>> dcids = pd.Series(["geoId/06", "geoId/21", "geoId/24"])
>>> get_property_values(dcids, "name")
0 [California]
1 [Kentucky]
2 [Maryland]
dtype: object
"""
# Convert the dcids field and format the request to GetPropertyValue
dcids, req_dcids = utils._convert_dcids_type(dcids)
dcids = list(dcids)
req_json = {
'dcids': req_dcids,
'dcids': dcids,
'property': prop,
'limit': limit
}
Expand Down Expand Up @@ -205,9 +186,6 @@ def get_property_values(dcids,
# Make sure each dcid is in the results dict, and convert all sets to lists.
results = {dcid: sorted(list(unique_results[dcid])) for dcid in dcids}

# Format the results as a Series if a Pandas Series is provided.
if isinstance(dcids, pd.Series):
return pd.Series([results[dcid] for dcid in dcids], index=dcids.index)
return results


Expand All @@ -221,7 +199,7 @@ def get_triples(dcids, limit=utils._MAX_LIMIT):
*predicate*).

Args:
dcids (:obj:`list` of :obj:`str`): A list of dcids to get triples for.
dcids (:obj:`iterable` of :obj:`str`): A list of dcids to get triples for.
limit (:obj:`int`, optional): The maximum total number of triples to get.

Returns:
Expand Down Expand Up @@ -249,6 +227,7 @@ def get_triples(dcids, limit=utils._MAX_LIMIT):
}
"""
# Generate the GetTriple query and send the request.
dcids = list(dcids)
url = utils._API_ROOT + utils._API_ENDPOINTS['get_triples']
payload = utils._send_request(url, req_json={'dcids': dcids, 'limit': limit})

Expand Down
4 changes: 2 additions & 2 deletions datacommons/examples/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def main():
# To expand on a column with get_property_values, the data frame has to be
# flattened first. Clients can use flatten_frame to do this.
utils._print_header('Flatten the Frame')
pd_frame = dc.flatten_frame(pd_frame)
pd_frame = pd_frame.explode('county')
print(pd_frame)

# Get the names for each city.
Expand All @@ -87,7 +87,7 @@ def main():

# Format the final frame.
utils._print_header('The Final Frame')
pd_frame = dc.flatten_frame(pd_frame)
pd_frame = pd_frame.explode('city')
print(pd_frame)


Expand Down
2 changes: 1 addition & 1 deletion datacommons/examples/places.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def main():
# Get all CensusTracts in these two counties.
utils._print_header('Get Census Tracts')
pd_frame['tracts'] = dc.get_places_in(pd_frame['county'], 'CensusTract')
pd_frame = dc.flatten_frame(pd_frame)
pd_frame = pd_frame.explode('tracts')
print(pd_frame)


Expand Down
12 changes: 6 additions & 6 deletions datacommons/examples/populations.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,16 +57,16 @@ def main():
# DataFrame with Santa Clara and Montgomery County.
utils._print_header('Initialize the DataFrame')
pd_frame = pd.DataFrame({'state': ['geoId/06', 'geoId/21', 'geoId/24']})
pd_frame['state_name'] = dc.get_property_values(pd_frame['state'], 'name')
pd_frame = dc.flatten_frame(pd_frame)
print(pd_frame)
pd_frame['state_name'] = pd_frame['state'].map(
dc.get_property_values(pd_frame['state'], 'name'))
pd_frame = pd_frame.explode('state_name').reset_index(drop=True)

# Get populations for employed individuals
utils._print_header('Add Population and Observation to DataFrame')
pd_frame['employed_pop'] = dc.get_populations(
pd_frame['employed_pop'] = pd_frame['state'].map(dc.get_populations(
pd_frame['state'],
'Person',
constraining_properties={'employment': 'BLS_Employed'})
constraining_properties={'employment': 'BLS_Employed'}))

# Add the observation for employed individuals
pd_frame['employed_count'] = dc.get_observations(
Expand All @@ -81,7 +81,7 @@ def main():
# Final dataframe. Use the convenience function "clean_frame" to convert
# columns to numerical types.
utils._print_header('Final Data Frame')
pd_frame = dc.clean_frame(pd_frame)
pd_frame = pd_frame.dropna().reset_index(drop=True)
print(pd_frame)


Expand Down
6 changes: 1 addition & 5 deletions datacommons/examples/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
from __future__ import print_function

import datacommons as dc
import pandas as pd


def main():
Expand All @@ -37,12 +36,9 @@ def main():
''')
print('> Issuing query.\n{}'.format(query))

# Initialize the Query instance.
dc_query = dc.Query(sparql=query)

# Iterate through all the rows in the results.
print('> Printing results.\n')
for row in dc_query.rows():
for row in dc.query(query_string=query):
print(' {}'.format(row))


Expand Down
31 changes: 5 additions & 26 deletions datacommons/places.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
from __future__ import print_function

import datacommons.utils as utils
import pandas as pd

import requests

Expand All @@ -33,21 +32,13 @@ def get_places_in(dcids, place_type):
:code:`place_type`.

Args:
dcids (Union[:obj:`list` of :obj:`str`, :obj:`pandas.Series`]): Dcids to get
contained in places.
dcids (:obj:`iterable` of :obj:`str`): Dcids to get contained in places.
place_type (:obj:`str`): The type of places contained in the given dcids to
filter by.

Returns:
When :code:`dcids` is an instance of :obj:`list`, the returned
:obj:`Place`'s are formatted as a :obj:`dict` from a given dcid to a list of
places identified by dcids of the given `place_type`.

When :code:`dcids` is an instance of :obj:`pandas.Series`, the returned
:obj:`Place`'s are formatted as a :obj:`pandas.Series` where the `i`-th
entry corresponds to places contained in the place identified by the dcid
in `i`-th cell if :code:`dcids`. The cells of the returned series will always
contain a :obj:`list` of place dcids of the given `place_type`.
The returned :obj:`Place`'s are formatted as a :obj:`dict` from a given
dcid to a list of places identified by dcids of the given `place_type`.

Raises:
ValueError: If the payload returned by the Data Commons REST API is
Expand All @@ -70,26 +61,14 @@ def get_places_in(dcids, place_type):
# and 53 more
]
}

We can also specify the :code:`dcids` as a :obj:`pandas.Series` like so.

>>> import pandas as pd
>>> dcids = pd.Series(["geoId/06"])
>>> get_places_in(dcids, "County")
0 [geoId/06041, geoId/06089, geoId/06015, geoId/...
dtype: object

"""
# Convert the dcids field and format the request to GetPlacesIn
dcids, req_dcids = utils._convert_dcids_type(dcids)
dcids = list(dcids)
url = utils._API_ROOT + utils._API_ENDPOINTS['get_places_in']
payload = utils._send_request(url, req_json={
'dcids': req_dcids,
'dcids': dcids,
'place_type': place_type,
})

# Create the results and format it appropriately
result = utils._format_expand_payload(payload, 'place', must_exist=dcids)
if isinstance(dcids, pd.Series):
return pd.Series([result[dcid] for dcid in dcids], index=dcids.index)
return result
Loading