Skip to content

Commit

Permalink
Support 'calendar_interval' and 'fixed_interval' in DateHistogramFacet
Browse files Browse the repository at this point in the history
  • Loading branch information
sethmlarson committed Dec 4, 2020
1 parent fac8787 commit 919ca24
Show file tree
Hide file tree
Showing 5 changed files with 177 additions and 64 deletions.
2 changes: 1 addition & 1 deletion docs/faceted_search.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ There are several different facets available:
provides an option to split documents into groups based on a value of a field, for example ``TermsFacet(field='category')``

``DateHistogramFacet``
split documents into time intervals, example: ``DateHistogramFacet(field="published_date", interval="day")``
split documents into time intervals, example: ``DateHistogramFacet(field="published_date", calendar_interval="day")``

``HistogramFacet``
similar to ``DateHistogramFacet`` but for numerical values: ``HistogramFacet(field="rating", interval=2)``
Expand Down
38 changes: 33 additions & 5 deletions elasticsearch_dsl/faceted_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,14 +168,34 @@ def get_value_filter(self, filter_value):
)


def _date_interval_month(d):
return (d + timedelta(days=32)).replace(day=1)


def _date_interval_week(d):
return d + timedelta(days=7)


def _date_interval_day(d):
return d + timedelta(days=1)


def _date_interval_hour(d):
return d + timedelta(hours=1)


class DateHistogramFacet(Facet):
agg_type = "date_histogram"

DATE_INTERVALS = {
"month": lambda d: (d + timedelta(days=32)).replace(day=1),
"week": lambda d: d + timedelta(days=7),
"day": lambda d: d + timedelta(days=1),
"hour": lambda d: d + timedelta(hours=1),
"month": _date_interval_month,
"1M": _date_interval_month,
"week": _date_interval_week,
"1w": _date_interval_week,
"day": _date_interval_day,
"1d": _date_interval_day,
"hour": _date_interval_hour,
"1h": _date_interval_hour,
}

def __init__(self, **kwargs):
Expand All @@ -194,12 +214,20 @@ def get_value(self, bucket):
return bucket["key"]

def get_value_filter(self, filter_value):
for interval_type in ("calendar_interval", "fixed_interval"):
if interval_type in self._params:
break
else:
interval_type = "interval"

return Range(
_expand__to_dot=False,
**{
self._params["field"]: {
"gte": filter_value,
"lt": self.DATE_INTERVALS[self._params["interval"]](filter_value),
"lt": self.DATE_INTERVALS[self._params[interval_type]](
filter_value
),
}
}
)
Expand Down
11 changes: 11 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@


import os
import re
from datetime import datetime

from elasticsearch.helpers import bulk
Expand Down Expand Up @@ -47,6 +48,16 @@ def client():
skip()


@fixture(scope="session")
def es_version(client):
info = client.info()
print(info)
yield tuple(
int(x)
for x in re.match(r"^([0-9.]+)", info["version"]["number"]).group(1).split(".")
)


@fixture
def write_client(client):
yield client
Expand Down
44 changes: 44 additions & 0 deletions tests/test_faceted_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

from datetime import datetime

import pytest

from elasticsearch_dsl.faceted_search import (
DateHistogramFacet,
FacetedSearch,
Expand Down Expand Up @@ -144,3 +146,45 @@ def test_date_histogram_facet_with_1970_01_01_date():
dhf = DateHistogramFacet()
assert dhf.get_value({"key": None}) == datetime(1970, 1, 1, 0, 0)
assert dhf.get_value({"key": 0}) == datetime(1970, 1, 1, 0, 0)


@pytest.mark.parametrize(
["interval_type", "interval"],
[
("interval", "month"),
("calendar_interval", "month"),
("interval", "week"),
("calendar_interval", "week"),
("interval", "day"),
("calendar_interval", "day"),
("fixed_interval", "day"),
("interval", "hour"),
("fixed_interval", "hour"),
("interval", "1M"),
("calendar_interval", "1M"),
("interval", "1w"),
("calendar_interval", "1w"),
("interval", "1d"),
("calendar_interval", "1d"),
("fixed_interval", "1d"),
("interval", "1h"),
("fixed_interval", "1h"),
],
)
def test_date_histogram_interval_types(interval_type, interval):
dhf = DateHistogramFacet(field="@timestamp", **{interval_type: interval})
assert dhf.get_aggregation().to_dict() == {
"date_histogram": {
"field": "@timestamp",
interval_type: interval,
"min_doc_count": 0,
}
}
dhf.get_value_filter(datetime.now())


def test_date_histogram_no_interval_keyerror():
dhf = DateHistogramFacet(field="@timestamp")
with pytest.raises(KeyError) as e:
dhf.get_value_filter(datetime.now())
assert str(e.value) == "'interval'"
146 changes: 88 additions & 58 deletions tests/test_integration/test_faceted_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

from datetime import datetime

import pytest

from elasticsearch_dsl import A, Boolean, Date, Document, Keyword
from elasticsearch_dsl.faceted_search import (
DateHistogramFacet,
Expand All @@ -29,25 +31,6 @@
from .test_document import PullRequest


class CommitSearch(FacetedSearch):
index = "flat-git"
fields = (
"description",
"files",
)

facets = {
"files": TermsFacet(field="files"),
"frequency": DateHistogramFacet(
field="authored_date", interval="day", min_doc_count=1
),
"deletions": RangeFacet(
field="stats.deletions",
ranges=[("ok", (None, 1)), ("good", (1, 5)), ("better", (5, None))],
),
}


class Repos(Document):
is_public = Boolean()
created_at = Date()
Expand All @@ -64,19 +47,6 @@ class Index:
name = "git"


class RepoSearch(FacetedSearch):
index = "git"
doc_types = [Repos]
facets = {
"public": TermsFacet(field="is_public"),
"created": DateHistogramFacet(field="created_at", interval="month"),
}

def search(self):
s = super(RepoSearch, self).search()
return s.filter("term", commit_repo="repo")


class MetricSearch(FacetedSearch):
index = "git"
doc_types = [Commit]
Expand All @@ -86,15 +56,72 @@ class MetricSearch(FacetedSearch):
}


class PRSearch(FacetedSearch):
index = "test-prs"
doc_types = [PullRequest]
facets = {
"comments": NestedFacet(
"comments",
DateHistogramFacet(field="comments.created_at", interval="month"),
@pytest.fixture(scope="session")
def commit_search_cls(es_version):
if es_version >= (7, 2):
interval_kwargs = {"fixed_interval": "1d"}
else:
interval_kwargs = {"interval": "day"}

class CommitSearch(FacetedSearch):
index = "flat-git"
fields = (
"description",
"files",
)
}

facets = {
"files": TermsFacet(field="files"),
"frequency": DateHistogramFacet(
field="authored_date", min_doc_count=1, **interval_kwargs
),
"deletions": RangeFacet(
field="stats.deletions",
ranges=[("ok", (None, 1)), ("good", (1, 5)), ("better", (5, None))],
),
}

return CommitSearch


@pytest.fixture(scope="session")
def repo_search_cls(es_version):
interval_type = "calendar_interval" if es_version >= (7, 2) else "interval"

class RepoSearch(FacetedSearch):
index = "git"
doc_types = [Repos]
facets = {
"public": TermsFacet(field="is_public"),
"created": DateHistogramFacet(
field="created_at", **{interval_type: "month"}
),
}

def search(self):
s = super(RepoSearch, self).search()
return s.filter("term", commit_repo="repo")

return RepoSearch


@pytest.fixture(scope="session")
def pr_search_cls(es_version):
interval_type = "calendar_interval" if es_version >= (7, 2) else "interval"

class PRSearch(FacetedSearch):
index = "test-prs"
doc_types = [PullRequest]
facets = {
"comments": NestedFacet(
"comments",
DateHistogramFacet(
field="comments.created_at", **{interval_type: "month"}
),
)
}

return PRSearch


def test_facet_with_custom_metric(data_client):
Expand All @@ -106,36 +133,36 @@ def test_facet_with_custom_metric(data_client):
assert dates[0] == 1399038439000


def test_nested_facet(pull_request):
prs = PRSearch()
def test_nested_facet(pull_request, pr_search_cls):
prs = pr_search_cls()
r = prs.execute()

assert r.hits.total.value == 1
assert [(datetime(2018, 1, 1, 0, 0), 1, False)] == r.facets.comments


def test_nested_facet_with_filter(pull_request):
prs = PRSearch(filters={"comments": datetime(2018, 1, 1, 0, 0)})
def test_nested_facet_with_filter(pull_request, pr_search_cls):
prs = pr_search_cls(filters={"comments": datetime(2018, 1, 1, 0, 0)})
r = prs.execute()

assert r.hits.total.value == 1
assert [(datetime(2018, 1, 1, 0, 0), 1, True)] == r.facets.comments

prs = PRSearch(filters={"comments": datetime(2018, 2, 1, 0, 0)})
prs = pr_search_cls(filters={"comments": datetime(2018, 2, 1, 0, 0)})
r = prs.execute()
assert not r.hits


def test_datehistogram_facet(data_client):
rs = RepoSearch()
def test_datehistogram_facet(data_client, repo_search_cls):
rs = repo_search_cls()
r = rs.execute()

assert r.hits.total.value == 1
assert [(datetime(2014, 3, 1, 0, 0), 1, False)] == r.facets.created


def test_boolean_facet(data_client):
rs = RepoSearch()
def test_boolean_facet(data_client, repo_search_cls):
rs = repo_search_cls()
r = rs.execute()

assert r.hits.total.value == 1
Expand All @@ -144,9 +171,8 @@ def test_boolean_facet(data_client):
assert value is True


def test_empty_search_finds_everything(data_client):
cs = CommitSearch()

def test_empty_search_finds_everything(data_client, es_version, commit_search_cls):
cs = commit_search_cls()
r = cs.execute()

assert r.hits.total.value == 52
Expand Down Expand Up @@ -190,8 +216,10 @@ def test_empty_search_finds_everything(data_client):
] == r.facets.deletions


def test_term_filters_are_shown_as_selected_and_data_is_filtered(data_client):
cs = CommitSearch(filters={"files": "test_elasticsearch_dsl"})
def test_term_filters_are_shown_as_selected_and_data_is_filtered(
data_client, commit_search_cls
):
cs = commit_search_cls(filters={"files": "test_elasticsearch_dsl"})

r = cs.execute()

Expand Down Expand Up @@ -234,16 +262,18 @@ def test_term_filters_are_shown_as_selected_and_data_is_filtered(data_client):
] == r.facets.deletions


def test_range_filters_are_shown_as_selected_and_data_is_filtered(data_client):
cs = CommitSearch(filters={"deletions": "better"})
def test_range_filters_are_shown_as_selected_and_data_is_filtered(
data_client, commit_search_cls
):
cs = commit_search_cls(filters={"deletions": "better"})

r = cs.execute()

assert 19 == r.hits.total.value


def test_pagination(data_client):
cs = CommitSearch()
def test_pagination(data_client, commit_search_cls):
cs = commit_search_cls()
cs = cs[0:20]

assert 52 == cs.count()
Expand Down

0 comments on commit 919ca24

Please sign in to comment.