Skip to content

Commit

Permalink
Merge pull request Yelp#84 from Yelp/document_terms_size
Browse files Browse the repository at this point in the history
Added docs for terms_size, upped the default, and fixed top_count_number
  • Loading branch information
Qmando committed May 11, 2015
2 parents b081f25 + 1ae6bed commit ca4696c
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 5 deletions.
10 changes: 8 additions & 2 deletions docs/source/ruletypes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,10 @@ of tens of thousands or more. ``doc_type`` must be set to use this.
``doc_type``: Specify the ``_type`` of document to search for. This must be present if ``use_count_query`` or ``use_terms_query`` is set.

``use_terms_query``: If true, ElastAlert will make an aggregation query against Elasticsearch to get counts of documents matching
each unique value of ``query_key``. This be used with ``query_key`` and ``doc_type``.
each unique value of ``query_key``. This must be used with ``query_key`` and ``doc_type``. This will only return a maximum of ``terms_size``,
default 50, unique terms.

``terms_size``: When used with ``use_terms_query``, this is the maximum number of terms returned per query. Default is 50.

``query_key``: The number of events is remembered separately for each unique ``query_key`` field. If this option
is set, the field must be present for all events.
Expand Down Expand Up @@ -374,7 +377,10 @@ of tens of thousands or more. ``doc_type`` must be set to use this.
``doc_type``: Specify the ``_type`` of document to search for. This must be present if ``use_count_query`` or ``use_terms_query`` is set.

``use_terms_query``: If true, ElastAlert will make an aggregation query against Elasticsearch to get counts of documents matching
each unique value of ``query_key``. This be used with ``query_key``. ``doc_type`` must be set to use this.
each unique value of ``query_key``. This must be used with ``query_key`` and ``doc_type``. This will only return a maximum of ``terms_size``,
default 50, unique terms.

``terms_size``: When used with ``use_terms_query``, this is the maximum number of terms returned per query. Default is 50.

Flatline
~~~~~~~~
Expand Down
8 changes: 5 additions & 3 deletions elastalert/elastalert.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,15 +247,17 @@ def get_hits_count(self, rule, starttime, endtime, index):
logging.info("Queried rule %s from %s to %s: %s hits" % (rule['name'], pretty_ts(starttime, lt), pretty_ts(endtime, lt), res['count']))
return {endtime: res['count']}

def get_hits_terms(self, rule, starttime, endtime, index, key, qk=None):
def get_hits_terms(self, rule, starttime, endtime, index, key, qk=None, size=None):
rule_filter = copy.copy(rule['filter'])
if qk:
filter_key = rule['query_key']
if rule.get('raw_count_keys', True) and not rule['query_key'].endswith('.raw'):
filter_key += '.raw'
rule_filter.extend([{'term': {filter_key: qk}}])
base_query = self.get_query(rule_filter, starttime, endtime, timestamp_field=rule['timestamp_field'], sort=False)
query = self.get_terms_query(base_query, rule.get('terms_size', 5), key)
if size is None:
size = rule.get('terms_size', 50)
query = self.get_terms_query(base_query, size, key)

try:
res = self.current_es.search(index=index, doc_type=rule['doc_type'], body=query, search_type='count', ignore_unavailable=True)
Expand Down Expand Up @@ -1033,7 +1035,7 @@ def get_top_counts(self, rule, starttime, endtime, keys, number=5, qk=None):
all_counts = {}
for key in keys:
index = self.get_index(rule, starttime, endtime)
buckets = self.get_hits_terms(rule, starttime, endtime, index, key, qk).values()[0]
buckets = self.get_hits_terms(rule, starttime, endtime, index, key, qk, number).values()[0]
# get_hits_terms adds to num_hits, but we don't want to count these
self.num_hits -= len(buckets)
terms = {}
Expand Down

0 comments on commit ca4696c

Please sign in to comment.