Skip to content

Commit

Permalink
Merge pull request #1196 from CartoDB/fix-and-docs-enrichment-aggrega…
Browse files Browse the repository at this point in the history
…tion

Fix enrichment aggregation and filters
  • Loading branch information
Jesus89 committed Nov 14, 2019
2 parents c9c3b54 + 229b289 commit 5bb904a
Show file tree
Hide file tree
Showing 13 changed files with 2,129 additions and 735 deletions.
5 changes: 4 additions & 1 deletion cartoframes/data/observatory/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from .catalog.provider import Provider
from .catalog.variable import Variable
from .enrichment.enrichment import Enrichment
from .enrichment.enrichment_service import VariableAggregation, VariableFilter

__all__ = [
'Catalog',
Expand All @@ -17,5 +18,7 @@
'Geography',
'Provider',
'Variable',
'Enrichment'
'Enrichment',
'VariableAggregation',
'VariableFilter'
]
353 changes: 200 additions & 153 deletions cartoframes/data/observatory/enrichment/enrichment.py

Large diffs are not rendered by default.

106 changes: 63 additions & 43 deletions cartoframes/data/observatory/enrichment/enrichment_service.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import uuid
import logging
import geopandas as gpd

from collections import defaultdict
Expand All @@ -18,6 +17,38 @@
_WORKING_PROJECT = 'carto-do-customers'
_PUBLIC_PROJECT = 'carto-do-public-data'

AGGREGATION_DEFAULT = 'default'
AGGREGATION_NONE = 'none'


class VariableAggregation(object):
"""Class to overwrite a `<cartoframes.data.observatory> Variable` default aggregation method in
enrichment funcitons
Example:
VariableAggregation(variable, 'SUM')
"""
def __init__(self, variable, aggregation=None):
self.variable = _prepare_variable(variable)
self.aggregation = aggregation


class VariableFilter(object):
"""Class for filtering in enrichment. It receives 3 parameters: variable: a
`<cartoframes.data.observatory> Variable` instance,
operator: the operation to do over the variable column in SQL syntax and
value: the value to be used in the SQL operation
Examples:
Equal to number: VariableFilter(variable, '=', 3)
Equal to string: VariableFilter(variable, '=', 'the string')
Greater that 3: VariableFilter(variable, '>', 3)
"""
def __init__(self, variable, operator, value):
self.variable = _prepare_variable(variable)
self.operator = operator
self.value = value


class EnrichmentService(object):
"""Base class for the Enrichment utility with commons auxiliary methods"""
Expand Down Expand Up @@ -72,9 +103,14 @@ def _get_tables_metadata(self, variables):
tables_metadata = defaultdict(lambda: defaultdict(list))

for variable in variables:
table_name = self.__get_enrichment_table(variable)

tables_metadata[table_name]['variables'].append(variable.column_name)
if isinstance(variable, VariableAggregation):
variable_aggregation = variable
table_name = self.__get_enrichment_table(variable_aggregation.variable)
tables_metadata[table_name]['variables'].append(variable_aggregation)
variable = variable_aggregation.variable
else:
table_name = self.__get_enrichment_table(variable)
tables_metadata[table_name]['variables'].append(variable)

if 'dataset' not in tables_metadata[table_name].keys():
tables_metadata[table_name]['dataset'] = self.__get_dataset(variable, table_name)
Expand Down Expand Up @@ -159,56 +195,40 @@ def __copy_data_and_generate_enrichment_id(self, data, geometry_column):

def prepare_variables(variables):
if isinstance(variables, list):
return [__prepare_variable(var) for var in variables]
return [_prepare_variable(var) for var in variables]
else:
return [__prepare_variable(variables)]
return [_prepare_variable(variables)]


def __prepare_variable(variable):
def _prepare_variable(variable):
if isinstance(variable, str):
variable = Variable.get(variable)

if not isinstance(variable, Variable):
raise EnrichmentException(
'Variable(s) to enrich should be an instance of Variable / CatalogList / str / list'
)
raise EnrichmentException("""
variable should be a `<cartoframes.data.observatory> Variable` instance,
Variable `id` property or Variable `slug` property
""")

return variable


def process_filters(filters_dict):
filters = ''
if filters_dict:
filters = ' AND '.join([__format_filter(key, value) for key, value in filters_dict.items()])
filters = 'WHERE {filters}'.format(filters=filters)

return filters
def get_variable_aggregations(variables, aggregation):
return [VariableAggregation(variable, __get_aggregation(variable, aggregation)) for variable in variables]


def __format_filter(key, value):
return "enrichment_table.{0}='{1}'".format(key, value)
def __get_aggregation(variable, aggregation):
if aggregation == AGGREGATION_NONE:
return None
elif aggregation == AGGREGATION_DEFAULT:
return variable.agg_method or 'array_agg'
elif isinstance(aggregation, str):
return aggregation
elif isinstance(aggregation, list):
agg = variable.agg_method or 'array_agg'
for variable_aggregation in aggregation:
if variable_aggregation.variable == variable:
agg = variable_aggregation.aggregation
break


def process_agg_operators(agg_operators, variables, default_agg):
agg_operators_result = None
if isinstance(agg_operators, str):
agg_operators_result = dict()

for variable in variables:
agg_operators_result[variable.column_name] = agg_operators

elif isinstance(agg_operators, dict):
agg_operators_result = agg_operators.copy()

for variable in variables:
if variable.column_name not in agg_operators_result:
agg_operators_result[variable.column_name] = variable.agg_method or default_agg
if not variable.agg_method:
logging.warning(
"Variable '{}' doesn't have defined agg_method.".format(variable.column_name) +
"Default one will be used: '{}' \n".format(default_agg) +
"You can change this by using the 'agg_operators' parameter." +
"See docs for further details and examples."
)

return agg_operators_result
return agg
4 changes: 2 additions & 2 deletions cartoframes/utils/geom_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ def extract_viz_columns(viz):
"""Extract columns ($name) in viz"""
columns = [RESERVED_GEO_COLUMN_NAME]
viz_nocomments = remove_comments(viz)
viz_columns = re.findall(r'\$([a-z_]+)', viz_nocomments)
viz_columns = re.findall(r'\$([A-Za-z0-9_]+)', viz_nocomments)
if viz_columns is not None:
columns += viz_columns
return columns
Expand All @@ -282,4 +282,4 @@ def replacer(match):
r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',
re.DOTALL | re.MULTILINE
)
return re.sub(pattern, replacer, text)
return re.sub(pattern, replacer, text).strip()

0 comments on commit 5bb904a

Please sign in to comment.