-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #256 from datosgobar/236-add-max-min-aggs
236 add max min aggs
- Loading branch information
Showing
6 changed files
with
186 additions
and
133 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
91 changes: 91 additions & 0 deletions
91
series_tiempo_ar_api/apps/api/query/es_query/response_formatter.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
#! coding: utf-8 | ||
from iso8601 import iso8601 | ||
|
||
from series_tiempo_ar_api.apps.api.helpers import get_relative_delta | ||
from series_tiempo_ar_api.apps.api.query import constants | ||
|
||
|
||
class ResponseFormatter(object): | ||
|
||
def __init__(self, series, responses, args): | ||
self.series = series | ||
self.responses = responses | ||
self.data_dict = {} | ||
self.args = args | ||
|
||
def format_response(self): | ||
"""Procesa la respuesta recibida de Elasticsearch, la guarda en | ||
el diccionario data_dict con el siguiente formato | ||
self.data_dict = { | ||
"1990-01-01": { "serie_1": valor1, "serie_2": valor2, ... }, | ||
"1990-02-01": { "serie_1": valor1, "serie_2": valor2, ... } | ||
} | ||
Luego el diccionario es pasado a la lista de listas final | ||
self.data para conformar la respuesta esperada de lista de listas | ||
""" | ||
final_data = [] | ||
for i, response in enumerate(self.responses): | ||
rep_mode = self.series[i].rep_mode | ||
|
||
if self.series[i].collapse_agg in (constants.AGG_MIN, constants.AGG_MAX): | ||
for hit in response.aggregations.test.buckets: | ||
data = hit['test'][rep_mode] | ||
timestamp_dict = self.data_dict.setdefault(hit['key_as_string'], {}) | ||
timestamp_dict[self._data_dict_series_key(self.series[i])] = data | ||
else: | ||
for hit in response: | ||
data = hit[rep_mode] if rep_mode in hit else None | ||
timestamp_dict = self.data_dict.setdefault(hit.timestamp, {}) | ||
timestamp_dict[self._data_dict_series_key(self.series[i])] = data | ||
|
||
if not self.data_dict: # No hay datos | ||
return [] | ||
|
||
self._make_date_index_continuous(min(self.data_dict.keys()), | ||
max(self.data_dict.keys())) | ||
|
||
# Ordeno las timestamp según si el sort es asc o desc usando función de comparación | ||
def cmp_func(one, other): | ||
if one == other: | ||
return 0 | ||
|
||
if self.args[constants.PARAM_SORT] == constants.SORT_ASCENDING: | ||
return -1 if one < other else 1 | ||
else: | ||
return 1 if one < other else -1 | ||
|
||
for timestamp in sorted(self.data_dict.keys(), cmp=cmp_func): | ||
row = [timestamp] | ||
|
||
for series in self.series: | ||
row.append(self.data_dict[timestamp].get(self._data_dict_series_key(series))) | ||
|
||
final_data.append(row) | ||
|
||
return final_data | ||
|
||
@staticmethod | ||
def _data_dict_series_key(series): | ||
"""Key única para identificar la serie pedida en el data_dict armado. Evita | ||
que se pisen series en queries que piden la misma serie con distintos rep modes | ||
o aggs (ver issue #243) | ||
""" | ||
return series.series_id + series.rep_mode + series.collapse_agg | ||
|
||
def _make_date_index_continuous(self, start_date, end_date): | ||
"""Hace el índice de tiempo de los resultados continuo (según | ||
el intervalo de resultados), sin saltos, entre start_date y end_date. | ||
Esto implica llenar el diccionario self.data_dict con claves de los | ||
timestamp faltantes para asegurar la continuidad | ||
""" | ||
|
||
# Si no hay datos cargados no hay nada que hacer | ||
if not len(self.data_dict): | ||
return | ||
|
||
current_date = iso8601.parse_date(start_date) | ||
end_date = iso8601.parse_date(end_date) | ||
|
||
while current_date < end_date: | ||
current_date += get_relative_delta(self.args[constants.PARAM_PERIODICITY]) | ||
self.data_dict.setdefault(unicode(current_date.date()), {}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,51 @@ | ||
#! coding: utf-8 | ||
from elasticsearch_dsl import Search | ||
from django.conf import settings | ||
from elasticsearch_dsl import Search, Q, A | ||
|
||
from series_tiempo_ar_api.apps.api.query import constants | ||
from series_tiempo_ar_api.libs.indexing.elastic import ElasticInstance | ||
|
||
|
||
class Series(object): | ||
def __init__(self, series_id, rep_mode, search, collapse_agg=None): | ||
def __init__(self, index, series_id, rep_mode, args, collapse_agg=None): | ||
self.index = index | ||
self.elastic = ElasticInstance.get() | ||
self.series_id = series_id | ||
self.rep_mode = rep_mode | ||
self.search = search or Search() | ||
self.collapse_agg = collapse_agg | ||
self.meta = None | ||
self.args = args.copy() | ||
self.collapse_agg = collapse_agg or constants.API_DEFAULT_VALUES[constants.PARAM_COLLAPSE_AGG] | ||
self.search = self.init_search() | ||
|
||
def init_search(self): | ||
search = Search(using=self.elastic, index=self.index) | ||
end = self.args[constants.PARAM_START] + self.args[constants.PARAM_LIMIT] | ||
search = search[self.args[constants.PARAM_START]:end] | ||
search = search.sort(settings.TS_TIME_INDEX_FIELD) # Default: ascending sort | ||
# Filtra los resultados por la serie pedida. Si se hace en memoria filtramos | ||
# por la agg default, y calculamos la agg pedida en runtime | ||
agg = self.collapse_agg if self.collapse_agg not in constants.IN_MEMORY_AGGS else constants.AGG_DEFAULT | ||
search = search.filter('bool', | ||
must=[Q('match', series_id=self.series_id), | ||
Q('match', aggregation=agg)]) | ||
|
||
return search | ||
|
||
def __getitem__(self, item): | ||
return self.__getattribute__(item) | ||
def add_range_filter(self, start, end): | ||
_filter = { | ||
'lte': end, | ||
'gte': start | ||
} | ||
self.search = self.search.filter('range', timestamp=_filter) | ||
|
||
def __setitem__(self, key, value): | ||
return self.__setattr__(key, value) | ||
def add_collapse(self, periodicity): | ||
if self.collapse_agg not in constants.IN_MEMORY_AGGS: | ||
self.search = self.search.filter('bool', must=[Q('match', interval=periodicity)]) | ||
|
||
def get(self, item, default=None): | ||
return getattr(self, item, default) | ||
else: # Agregamos la aggregation (?) para que se ejecute en ES en runtime | ||
self.search = self.search.filter('bool', must=[Q('match', interval=self.args['periodicity'])]) | ||
self.search.aggs.bucket('test', | ||
A('date_histogram', | ||
field='timestamp', | ||
interval=periodicity, | ||
format='yyyy-MM-dd'). | ||
metric('test', self.collapse_agg, field=self.rep_mode)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.