Skip to content

Commit

Permalink
Merge pull request #1234 from CartoDB/validate-do-operations
Browse files Browse the repository at this point in the history
Validate BQ operations
  • Loading branch information
alasarr committed Nov 25, 2019
2 parents 35d3279 + 4c89fc7 commit 68af7cb
Show file tree
Hide file tree
Showing 20 changed files with 223 additions and 42 deletions.
9 changes: 8 additions & 1 deletion cartoframes/data/observatory/catalog/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,14 @@
ABC = ABCMeta('ABC', (object,), {'__slots__': ()})

_WORKING_PROJECT = 'carto-do-customers'
_PLATFORM_BQ = 'bq'


class CatalogEntity(ABC):

id_field = 'id'
entity_repo = None
export_excluded_fields = ['summary_json']
export_excluded_fields = ['summary_json', 'available_in']

def __init__(self, data):
self.data = data
Expand Down Expand Up @@ -74,6 +75,9 @@ def _get_print_id(self):
return self.id

def _download(self, credentials=None):
if not self._is_available_in('bq'):
raise CartoException('{} is not ready for Download. Please, contact us for more information.'.format(self))

credentials = self._get_credentials(credentials)
user_dataset = credentials.get_do_user_dataset()
bq_client = _get_bigquery_client(_WORKING_PROJECT, credentials)
Expand All @@ -91,6 +95,9 @@ def _download(self, credentials=None):

return file_path

def _is_available_in(self, platform=_PLATFORM_BQ):
return self.data['available_in'] and platform in self.data['available_in']

def _get_credentials(self, credentials=None):
_credentials = credentials or defaults.get_default_credentials()

Expand Down
6 changes: 6 additions & 0 deletions cartoframes/data/observatory/catalog/geography.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ def geom_coverage(self):

return self.data['geom_coverage']

@property
def geom_type(self):
"""Info about the type of geometry of this geography."""

return self.data['geom_type']

@property
def update_frequency(self):
"""Frequency in which the geography is updated."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ def _map_row(self, row):
'update_frequency': self._normalize_field(row, 'update_frequency'),
'version': self._normalize_field(row, 'version'),
'is_public_data': self._normalize_field(row, 'is_public_data'),
'summary_json': self._normalize_field(row, 'summary_json')
'summary_json': self._normalize_field(row, 'summary_json'),
'available_in': self._normalize_field(row, 'available_in')
}

def get_datasets_for_geographies(self, geographies):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,12 @@ def _map_row(self, row):
'provider_name': self._normalize_field(row, 'provider_name'),
'lang': self._normalize_field(row, 'lang'),
'geom_coverage': self._normalize_field(row, 'geom_coverage'),
'geom_type': self._normalize_field(row, 'geom_type'),
'update_frequency': self._normalize_field(row, 'update_frequency'),
'version': self._normalize_field(row, 'version'),
'is_public_data': self._normalize_field(row, 'is_public_data'),
'summary_json': self._normalize_field(row, 'summary_json')
'summary_json': self._normalize_field(row, 'summary_json'),
'available_in': self._normalize_field(row, 'available_in')
}

def get_geographies_gdf(self):
Expand Down
14 changes: 14 additions & 0 deletions cartoframes/data/observatory/enrichment/enrichment_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from ..catalog.variable import Variable
from ..catalog.dataset import Dataset
from ..catalog.geography import Geography
from ...clients import bigquery_client
from ....auth import get_default_credentials
from ....exceptions import EnrichmentException
Expand Down Expand Up @@ -190,9 +191,22 @@ def _prepare_variable(variable):
Variable `id` property or Variable `slug` property
""")

_is_available_in_bq(variable)

return variable


def _is_available_in_bq(variable):
dataset = Dataset.get(variable.dataset)
geography = Geography.get(dataset.geography)

if not (dataset._is_available_in('bq') and geography._is_available_in('bq')):
raise EnrichmentException("""
The Dataset or the Geography of the Variable '{}' is not ready for Enrichment.
Please, contact us for more information.
""".format(variable.slug))


def get_variable_aggregations(variables, aggregation):
return [VariableAggregation(variable, __get_aggregation(variable, aggregation)) for variable in variables]

Expand Down
8 changes: 4 additions & 4 deletions examples/_debug/do_catalog_discovery.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -203,9 +203,9 @@
"metadata": {},
"outputs": [],
"source": [
"from cartoframes.data.observatory.dataset import CatalogDataset\n",
"from cartoframes.data.observatory.dataset import Dataset\n",
"\n",
"isinstance(demographics_datasets[0], CatalogDataset)"
"isinstance(demographics_datasets[0], Dataset)"
]
},
{
Expand Down Expand Up @@ -276,7 +276,7 @@
"metadata": {},
"outputs": [],
"source": [
"CatalogDataset.get('od_acsquantile_928a2a23').to_dict()"
"Dataset.get('od_acsquantile_928a2a23').to_dict()"
]
},
{
Expand All @@ -292,7 +292,7 @@
"metadata": {},
"outputs": [],
"source": [
"CatalogDataset.get_list(['od_acsquantile_ae4e7c82', 'od_acs_13345497'])"
"Dataset.get_list(['od_acsquantile_ae4e7c82', 'od_acs_13345497'])"
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions examples/_debug/do_subscriptions.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"outputs": [],
"source": [
"from cartoframes.auth import Credentials, set_default_credentials\n",
"from cartoframes.data.observatory import Catalog, CatalogDataset, Geography"
"from cartoframes.data.observatory import Catalog, Dataset, Geography"
]
},
{
Expand Down Expand Up @@ -35,7 +35,7 @@
"metadata": {},
"outputs": [],
"source": [
"dataset = CatalogDataset.get('ags_climateandw_cffac915')"
"dataset = Dataset.get('ags_climateandw_cffac915')"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
{
"data": {
"text/plain": [
"(<CatalogDataset('ags_retailpoten_ddf56a1a')>,\n",
"(<Dataset('ags_retailpoten_ddf56a1a')>,\n",
" <Geography('ags_blockgroup_1c63771c')>)"
]
},
Expand All @@ -39,9 +39,9 @@
}
],
"source": [
"from cartoframes.data.observatory import Catalog, CatalogDataset, Geography\n",
"from cartoframes.data.observatory import Catalog, Dataset, Geography\n",
"\n",
"dataset = CatalogDataset.get(dataset_name)\n",
"dataset = Dataset.get(dataset_name)\n",
"geography = Geography.get(dataset.geography)\n",
"\n",
"dataset, geography"
Expand Down Expand Up @@ -103,7 +103,7 @@
{
"data": {
"text/plain": [
"Datasets: [<CatalogDataset('ags_retailpoten_ddf56a1a')>]\n",
"Datasets: [<Dataset('ags_retailpoten_ddf56a1a')>]\n",
"Geographies: [<Geography('ags_blockgroup_1c63771c')>]"
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,9 @@
"metadata": {},
"outputs": [],
"source": [
"from cartoframes.data.observatory import CatalogDataset\n",
"from cartoframes.data.observatory import Dataset\n",
"\n",
"dataset = CatalogDataset.get('carto-do.ags.demographics_retailpotential_usa_blockgroup_2015_yearly_2018')\n",
"dataset = Dataset.get('carto-do.ags.demographics_retailpotential_usa_blockgroup_2015_yearly_2018')\n",
"variables = dataset.variables"
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -456,9 +456,9 @@
}
],
"source": [
"from cartoframes.data.observatory import CatalogDataset, Catalog\n",
"from cartoframes.data.observatory import Dataset, Catalog\n",
"\n",
"dataset = CatalogDataset.get('carto-do-public-data.usa_acs.demographics_acs_usa_censustractclipped_2015_5yrs_20132017')\n",
"dataset = Dataset.get('carto-do-public-data.usa_acs.demographics_acs_usa_censustractclipped_2015_5yrs_20132017')\n",
"variables = dataset.variables\n",
"variables"
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,9 @@
"metadata": {},
"outputs": [],
"source": [
"from cartoframes.data.observatory import CatalogDataset\n",
"from cartoframes.data.observatory import Dataset\n",
"\n",
"dataset = CatalogDataset.get('carto-do.ags.demographics_retailpotential_usa_blockgroup_2015_yearly_2018')\n",
"dataset = Dataset.get('carto-do.ags.demographics_retailpotential_usa_blockgroup_2015_yearly_2018')\n",
"variables = dataset.variables"
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,9 @@
"metadata": {},
"outputs": [],
"source": [
"from cartoframes.data.observatory import CatalogDataset\n",
"from cartoframes.data.observatory import Dataset\n",
"\n",
"dataset = CatalogDataset.get('carto-do-public-data.usa_acs.demographics_acs_usa_censustractclipped_2015_5yrs_20132017')\n",
"dataset = Dataset.get('carto-do-public-data.usa_acs.demographics_acs_usa_censustractclipped_2015_5yrs_20132017')\n",
"variables = dataset.variables"
]
},
Expand Down
14 changes: 10 additions & 4 deletions tests/unit/data/observatory/catalog/examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,12 @@
'country_id': 'esp',
'lang': 'esp',
'geom_coverage': '',
'geom_type': '',
'update_frequency': 'monthly',
'version': '20190203',
'is_public_data': True,
'summary_json': {}
'summary_json': {},
'available_in': ['bq']
}
db_geography2 = {
'id': 'carto-do-public.tiger.geography_esp_municipalities_2019',
Expand All @@ -50,10 +52,12 @@
'country_id': 'esp',
'lang': 'esp',
'geom_coverage': '',
'geom_type': '',
'update_frequency': 'monthly',
'version': '20190203',
'is_public_data': False,
'summary_json': {}
'summary_json': {},
'available_in': []
}
test_geography1 = Geography(db_geography1)
test_geography2 = Geography(db_geography2)
Expand All @@ -79,7 +83,8 @@
'update_frequency': 'monthly',
'version': '20190203',
'is_public_data': True,
'summary_json': {}
'summary_json': {},
'available_in': ['bq']
}
db_dataset2 = {
'id': 'carto-do-public.project.basicstats-municipalities',
Expand All @@ -101,7 +106,8 @@
'update_frequency': 'monthly',
'version': '20190203',
'is_public_data': False,
'summary_json': {}
'summary_json': {},
'available_in': []
}
test_dataset1 = Dataset(db_dataset1)
test_dataset2 = Dataset(db_dataset2)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,8 @@ def test_missing_fields_are_mapped_as_None(self, mocked_repo):
'update_frequency': None,
'version': None,
'is_public_data': None,
'summary_json': None
'summary_json': None,
'available_in': None
})])

# When
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,12 @@ def test_missing_fields_are_mapped_as_None(self, mocked_repo):
'country_id': None,
'lang': None,
'geom_coverage': None,
'geom_type': None,
'update_frequency': None,
'version': None,
'is_public_data': None,
'summary_json': None
'summary_json': None,
'available_in': None
})])

# When
Expand Down
38 changes: 37 additions & 1 deletion tests/unit/data/observatory/catalog/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,8 @@ def test_dataset_is_exported_as_series(self):
def test_dataset_is_exported_as_dict(self):
# Given
dataset = Dataset(db_dataset1)
expected_dict = {key: value for key, value in db_dataset1.items() if key != 'summary_json'}
excluded_fields = ['summary_json', 'available_in']
expected_dict = {key: value for key, value in db_dataset1.items() if key not in excluded_fields}

# When
dataset_dict = dataset.to_dict()
Expand Down Expand Up @@ -266,6 +267,28 @@ def test_dataset_download(self, mocked_bq_client, mocked_repo):

assert response == file_path

@patch.object(DatasetRepository, 'get_by_id')
@patch('cartoframes.data.observatory.catalog.entity._get_bigquery_client')
def test_dataset_not_available_in_bq_download_fails(self, mocked_bq_client, mocked_repo):
# mock dataset
mocked_repo.return_value = test_dataset2

# mock big query client
file_path = 'fake_path'
mocked_bq_client.return_value = BigQueryClientMock(file_path)

# test
username = 'fake_user'
credentials = Credentials(username, '1234')

dataset = Dataset.get(test_dataset2.id)

with pytest.raises(CartoException) as e:
dataset.download(credentials)

error = '{} is not ready for Download. Please, contact us for more information.'.format(dataset)
assert str(e.value) == error

@patch.object(DatasetRepository, 'get_by_id')
@patch('cartoframes.data.observatory.catalog.entity._get_bigquery_client')
def test_dataset_download_raises_with_nonpurchased(self, mocked_bq_client, mocked_repo):
Expand Down Expand Up @@ -399,3 +422,16 @@ def test_dataset_subscription_info_wrong_credentials(self):

# Then
assert str(e.value) == '`credentials` must be a Credentials class instance'

def test_dataset_is_available_in(self):
dataset_in_bq = Dataset(db_dataset1)
dataset_not_in_bq = Dataset(db_dataset2)

assert dataset_in_bq._is_available_in('bq')
assert not dataset_not_in_bq._is_available_in('bq')

def test_dataset_is_available_in_with_empty_field(self):
db_dataset = dict(db_dataset1)
db_dataset['available_in'] = None
dataset_null = Dataset(db_dataset)
assert not dataset_null._is_available_in('bq')

0 comments on commit 68af7cb

Please sign in to comment.