Merge pull request #1216 from CartoDB/1183_catalog_reference

catalog reference API docstrings
CartoDB · Nov 26, 2019 · fff4343 · fff4343
2 parents a0df692 + 74620d8
commit fff4343
Show file tree

Hide file tree

Showing 23 changed files with 1,155 additions and 172 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
 ### Changed
 - Optimize local data visualizations size using gzip compression (#1202)
+- Discover API reference (#1183)
 
 ## [1.0b5] - 2019-11-14
 ### Added

diff --git a/cartoframes/data/observatory/__init__.py b/cartoframes/data/observatory/__init__.py
@@ -9,6 +9,9 @@
 from .catalog.variable import Variable
 from .enrichment.enrichment import Enrichment
 from .enrichment.enrichment_service import VariableAggregation, VariableFilter
+from .catalog.entity import CatalogEntity, CatalogList
+from .catalog.subscriptions import Subscriptions
+from .catalog.subscription_info import SubscriptionInfo
 
 __all__ = [
     'Catalog',
@@ -20,5 +23,9 @@
     'Variable',
     'Enrichment',
     'VariableAggregation',
-    'VariableFilter'
+    'VariableFilter',
+    'Subscriptions',
+    'SubscriptionInfo',
+    'CatalogEntity',
+    'CatalogList',
 ]
diff --git a/cartoframes/data/observatory/catalog/catalog.py b/cartoframes/data/observatory/catalog/catalog.py
@@ -12,18 +12,146 @@
 
 
 class Catalog(object):
-    """Data Observatory Catalog"""
+    """This class represents the Data Observatory metadata
+    :py:class:`Catalog <cartoframes.data.observatory.Catalog>`.
+
+    The catalog contains metadata that helps to discover and understand the
+    data available in the Data Observatory for :py:attr:`Dataset.download` and :obj:`Enrichment` purposes.
+
+    You can get more information about the Data Observatory catalog from the
+    `CARTO website <https://carto.com/platform/location-data-streams/>`__ and in your CARTO user account dashboard.
+
+    The Catalog has three main purposes:
+      - Explore and discover the datasets available in the repository (both public and premium datasets).
+      - Subscribe to some premium datasets and manage your datasets licenses.
+      - Download data and use your licensed datasets and variables to enrich your own data by means of the
+        :obj:`Enrichment` functions.
+
+    The Catalog is public and you can explore it without the need of a CARTO account. Once you discover a
+    :obj:`Dataset` of your interest and want to acquire a license to use it, then you'll need a CARTO account to
+    subscribe to it, by means of the :py:attr:`Dataset.subscribe` or :py:attr:`Geography.subscribe` functions.
+
+    The Catalog is composed of three main entities:
+      - :obj:`Dataset`: It is the main :obj:`CatalogEntity`. It contains metadata of the actual data
+        you can use to :py:attr:`Dataset.download` or for :obj:`Enrichment` purposes.
+      - :obj:`Geography`: Datasets in the Data Observatory are aggregated by different geographic boundaries.
+        The `Geography` entity contains metadata to understand the boundaries of a :obj:`Dataset`. It's used for
+        enrichment and you can also :py:attr:`Geography.download` the underlying data.
+      - :obj:`Variable`: Variables contain metadata about the columns available in each dataset for enrichment.
+        Let's say you explore a `dataset` with demographic data for the whole US at the Census tract level.
+        The variables give you information about the actual columns you have available, such as: total_population,
+        total_males, etc.
+        On the other hand, you use `Variable` instances or lists of :py:attr:`Variable.id` or :py:attr:`Dataset.slug`
+        to enrich your own data.
+
+    Every `Dataset` is related to a `Geography`. You can have for example, demographics data at the Census
+    tract, block groups or blocks levels.
+
+    When subscribing to a premium dataset, you should subscribe both to the :py:attr:`Dataset.subscribe` and the
+    :py:attr:`Geography.subscribe` to be able to access both tables to enrich your own data.
+
+    The two main entities of the Catalog (`Dataset` and `Geography`) are related to other entities, that
+    are useful for a hierarchical categorization and discovery of available data in the Data Observatory:
+      - :obj:`Category`: Groups datasets of the same topic, for example, `demographics`, `financial`, etc.
+      - :obj:`Country`: Groups datasets available by country
+      - :obj:`Provider`: Gives you information about the provider of the source data
+
+    You can just list all the grouping entities. Take into account this is not the preferred way
+    to discover the catalog metadata, since there can be thousands of entities on it:
+
+        .. code::
+
+            from cartoframes.data.observatory import Category
+            from cartoframes.data.observatory import Country
+            from cartoframes.data.observatory import Provider
+
+            Category.get_all()
+            Country.get_all()
+            Provider.get_all()
+
+    Or you can get them by ID:
+
+        .. code::
+
+            from cartoframes.data.observatory import Category
+            from cartoframes.data.observatory import Country
+            from cartoframes.data.observatory import Provider
+
+            Category.get('demographics')
+            Country.get('usa')
+            Provider.get('mrli')
+
+    Examples:
+        The preferred way of discover the available datasets in the Catalog is through nested filters
+
+        .. code::
+
+            from cartoframes.data.observatory import Catalog
+
+            catalog = Catalog()
+            catalog.country('usa').category('demographics').datasets
+
+        You can include the geography as part of the nested filter like this:
+
+        .. code::
+
+            from cartoframes.data.observatory import Catalog
+
+            catalog = Catalog()
+            catalog.country('usa').category('demographics').geography('ags_blockgroup_1c63771c').datasets
+
+        If a filter is already applied to a Catalog instance and you want to do a new hierarchical search,
+        clear the previous filters with the `Catalog().clear_filters()` method:
+
+        .. code::
+
+            from cartoframes.data.observatory import Catalog
+
+            catalog = Catalog()
+            catalog.country('usa').category('demographics').geography('ags_blockgroup_1c63771c').datasets
+            catalog.clear_filters()
+            catalog.country('esp').category('demographics').datasets
+
+        Otherwise the filters accumulate and you'll get unexpected results.
+
+        During the discovery process, it's useful to understand the related metadata to a given Geography or Dataset.
+        A useful way of reading or filtering by metadata values consists on converting the entities to a pandas
+        DataFrame:
+
+        .. code::
+
+            from cartoframes.data.observatory import Catalog
+
+            catalog = Catalog()
+            catalog.country('usa').category('demographics').geography('ags_blockgroup_1c63771c').datasets.to_dataframe()
+
+        For each dataset in the Catalog, you can explore its variables, get a summary of its stats, etc.
+
+        .. code::
+
+            from cartoframes.data.observatory import Catalog
+
+            catalog = Catalog()
+            dataset = catalog.country('usa').category('demographics').datasets.get('od_acs_13345497')
+            dataset.variables()
+
+    See the Catalog guides and examples in our
+    `public documentation website <https://carto.com/developers/cartoframes/guides/Introduction/>`__
+    for more information.
+    """
 
     def __init__(self):
         self.filters = {}
 
     @property
     def countries(self):
-        """Get all the countries in the Catalog.
+        """Get all the countries with datasets available in the Catalog.
 
         Returns:
             :py:class:`CatalogList <cartoframes.data.observatory.entity.CatalogList>`
 
+        :raises DiscoveryException: When no datasets are found.
+        :raises CartoException: If there's a problem when connecting to the catalog.
         """
 
         return Country.get_all(self.filters)
@@ -35,6 +163,8 @@ def categories(self):
         Returns:
             :py:class:`CatalogList <cartoframes.data.observatory.entity.CatalogList>`
 
+        :raises DiscoveryException: When no datasets are found.
+        :raises CartoException: If there's a problem when connecting to the catalog.
         """
 
         return Category.get_all(self.filters)
@@ -46,6 +176,8 @@ def datasets(self):
         Returns:
             :py:class:`CatalogList <cartoframes.data.observatory.entity.CatalogList>`
 
+        :raises DiscoveryException: When no datasets are found.
+        :raises CartoException: If there's a problem when connecting to the catalog.
         """
 
         return Dataset.get_all(self.filters)
@@ -57,6 +189,8 @@ def geographies(self):
         Returns:
             :py:class:`CatalogList <cartoframes.data.observatory.entity.CatalogList>`
 
+        :raises DiscoveryException: When no datasets are found.
+        :raises CartoException: If there's a problem when connecting to the catalog.
         """
 
         return Geography.get_all(self.filters)
@@ -69,7 +203,7 @@ def country(self, country_id):
               Id value of the country to be used for filtering the Catalog.
 
         Returns:
-            :py:class:`Catalog <cartoframes.data.observatory.catalog.Catalog>`
+            :py:class:`Catalog <cartoframes.data.observatory.Catalog>`
 
         """
 
@@ -84,7 +218,7 @@ def category(self, category_id):
               Id value of the category to be used for filtering the Catalog.
 
         Returns:
-            :py:class:`Catalog <cartoframes.data.observatory.catalog.Catalog>`
+            :py:class:`Catalog <cartoframes.data.observatory.Catalog>`
 
         """
 
@@ -99,7 +233,7 @@ def geography(self, geography_id):
               Id or slug value of the geography to be used for filtering the Catalog
 
         Returns:
-            :py:class:`Catalog <cartoframes.data.observatory.catalog.Catalog>`
+            :py:class:`Catalog <cartoframes.data.observatory.Catalog>`
 
         """
 
@@ -133,7 +267,8 @@ def clear_filters(self):
         self.filters = {}
 
     def subscriptions(self, credentials=None):
-        """Get all the subscriptions in the Catalog
+        """Get all the subscriptions in the Catalog. You'll get all the `Dataset` or `Geography` instances you have
+        previously subscribed to.
 
         Args:
             credentials (:py:class:`Credentials <cartoframes.auth.Credentials>`, optional):
@@ -142,8 +277,7 @@ def subscriptions(self, credentials=None):
                 <cartoframes.auth.set_default_credentials>`) will be used.
 
         Returns:
-            :py:class:`Datasets <cartoframes.data.observatory.Datasets>`
-
+            :py:class:`Subscriptions <cartoframes.data.observatory.Subscriptions>`
         """
 
         _no_filters = {}
@@ -160,7 +294,7 @@ def subscriptions(self, credentials=None):
     def datasets_filter(self, filter_dataset):
         """Get all the datasets in the Catalog filtered
         Returns:
-            :py:class:`Datasets <cartoframes.data.observatory.Datasets>`
+            :py:class:`Dataset <cartoframes.data.observatory.Dataset>`
         """
 
         return Dataset.get_datasets_spatial_filtered(filter_dataset)