Skip to content

Commit

Permalink
Merge pull request #1081 from untergeek/feature/1044
Browse files Browse the repository at this point in the history
Add pattern feature for count filter
  • Loading branch information
untergeek committed Oct 12, 2017
2 parents 3d70de7 + 2e88c11 commit ce58168
Show file tree
Hide file tree
Showing 11 changed files with 284 additions and 79 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ env:
- ES_VERSION=5.3.3
- ES_VERSION=5.4.3
- ES_VERSION=5.5.2
- ES_VERSION=5.6.2
- ES_VERSION=5.6.3

os: linux

Expand Down
5 changes: 5 additions & 0 deletions curator/defaults/filter_elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ def max_num_segments(**kwargs):
Required('max_num_segments'): All(Coerce(int), Range(min=1))
}

def pattern(**kwargs):
return {
Optional('pattern'): Any(str, unicode)
}

def range_from(**kwargs):
return { Required('range_from'): Coerce(int) }

Expand Down
1 change: 1 addition & 0 deletions curator/defaults/filtertypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def count(action, config):
retval = [
filter_elements.count(),
filter_elements.use_age(),
filter_elements.pattern(),
filter_elements.reverse(),
filter_elements.exclude(exclude=True),
]
Expand Down
1 change: 1 addition & 0 deletions curator/defaults/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def structural_filter_elements():
Optional('key'): Any(str, unicode),
Optional('kind'): Any(str, unicode),
Optional('max_num_segments'): Coerce(int),
Optional('pattern'): Any(str, unicode),
Optional('reverse'): Any(int, str, unicode, bool, None),
Optional('range_from'): Coerce(int),
Optional('range_to'): Coerce(int),
Expand Down
100 changes: 74 additions & 26 deletions curator/indexlist.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from datetime import timedelta, datetime, date
import time
import re
import itertools
import logging
import elasticsearch
from .defaults import settings
Expand Down Expand Up @@ -766,7 +767,7 @@ def filter_by_alias(self, aliases=None, exclude=False):
self.__excludify(condition, exclude, index, msg)

def filter_by_count(
self, count=None, reverse=True, use_age=False,
self, count=None, reverse=True, use_age=False, pattern=None,
source='creation_date', timestring=None, field=None,
stats_result='min_value', exclude=True):
"""
Expand All @@ -791,6 +792,16 @@ def filter_by_count(
:arg reverse: The filtering direction. (default: `True`).
:arg use_age: Sort indices by age. ``source`` is required in this
case.
:arg pattern: Select indices to count from a regular expression
pattern. This pattern must have one and only one capture group.
This can allow a single ``count`` filter instance to operate against
any number of matching patterns, and keep ``count`` of each index
in that group. For example, given a ``pattern`` of ``'^(.*)-\d{6}$'``,
it will match both ``rollover-000001`` and ``index-999990``, but not
``logstash-2017.10.12``. Following the same example, if my cluster
also had ``rollover-000002`` through ``rollover-000010`` and
``index-888888`` through ``index-999999``, it will process both
groups of indices, and include or exclude the ``count`` of each.
:arg source: Source of index age. Can be one of ``name``,
``creation_date``, or ``field_stats``. Default: ``creation_date``
:arg timestring: An strftime string to match the datestamp in an index
Expand All @@ -811,35 +822,72 @@ def filter_by_count(

# Create a copy-by-value working list
working_list = self.working_list()

if use_age:
if source != 'name':
self.loggit.warn(
'Cannot get age information from closed indices unless '
'source="name". Omitting any closed indices.'
if pattern:
try:
r = re.compile(pattern)
if r.groups < 1:
raise ConfigurationError('No regular expression group found in {0}'.format(pattern))
elif r.groups > 1:
raise ConfigurationError('More than 1 regular expression group found in {0}'.format(pattern))
# Prune indices not matching the regular expression the object (and filtered_indices)
# We do not want to act on them by accident.
prune_these = list(filter(lambda x: r.match(x) is None, working_list))
filtered_indices = working_list
for index in prune_these:
msg = (
'{0} does not match regular expression {1}.'.format(
index, pattern
)
)
condition = True
exclude = True
self.__excludify(condition, exclude, index, msg)
# also remove it from filtered_indices
filtered_indices.remove(index)
# Presort these filtered_indices using the lambda
presorted = sorted(filtered_indices, key=lambda x: r.match(x).group(1))
except Exception as e:
raise ActionError('Unable to process pattern: "{0}". Error: {1}'.format(pattern, e))
# Initialize groups here
groups = []
# We have to pull keys k this way, but we don't need to keep them
# We only need g for groups
for k, g in itertools.groupby(presorted, key=lambda x: r.match(x).group(1)):
groups.append(list(g))
else:
# Since pattern will create a list of lists, and we iterate over that,
# we need to put our single list inside a list
groups = [ working_list ]
for group in groups:
if use_age:
if source != 'name':
self.loggit.warn(
'Cannot get age information from closed indices unless '
'source="name". Omitting any closed indices.'
)
self.filter_closed()
self._calculate_ages(
source=source, timestring=timestring, field=field,
stats_result=stats_result
)
self.filter_closed()
self._calculate_ages(
source=source, timestring=timestring, field=field,
stats_result=stats_result
)
# Using default value of reverse=True in self._sort_by_age()
sorted_indices = self._sort_by_age(working_list, reverse=reverse)
# Using default value of reverse=True in self._sort_by_age()
sorted_indices = self._sort_by_age(group, reverse=reverse)

else:
# Default to sorting by index name
sorted_indices = sorted(working_list, reverse=reverse)
else:
# Default to sorting by index name
sorted_indices = sorted(group, reverse=reverse)

idx = 1
for index in sorted_indices:
msg = (
'{0} is {1} of specified count of {2}.'.format(
index, idx, count

idx = 1
for index in sorted_indices:
msg = (
'{0} is {1} of specified count of {2}.'.format(
index, idx, count
)
)
)
condition = True if idx <= count else False
self.__excludify(condition, exclude, index, msg)
idx += 1
condition = True if idx <= count else False
self.__excludify(condition, exclude, index, msg)
idx += 1

def filter_period(
self, source='name', range_from=None, range_to=None, timestring=None,
Expand Down
2 changes: 2 additions & 0 deletions docs/Changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ Changelog
Requested in #1045. (untergeek)
* Add a ``restore`` function to ``curator_cli`` singleton. Mentioned in
#851 (alexef)
* Add ``pattern`` to the ``count`` filter. This is particularly useful
when working with rollover indices. Requested in #1044 (untergeek)

**Bug Fixes**

Expand Down
37 changes: 37 additions & 0 deletions docs/asciidoc/filter_elements.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,43 @@ will be raised, and execution will halt.



[[fe_pattern]]
== pattern

NOTE: This setting is only used with the <<filtertype_count,count>> filtertype

[source,yaml]
-------------
- filtertype: count
count: 1
pattern: '^(.*)-\d{6}$'
reverse: true
-------------

This particular example will match indices following the basic rollover pattern
of `indexname-######`, and keep the highest numbered index for each group.

For example, given indices `a-000001`, `a-000002`, `a-000003` and `b-000006`,
and `b-000007`, the indices will would be matched are `a-000003` and `b-000007`.
Indices that do not match the regular expression in `pattern` will be
automatically excluded.

This is particularly useful with indices created and managed using the
{ref}/indices-rollover-index.html[Rollover API], as you can select only the
active indices with the above example (<<fe_exclude,`exclude`>> defaults to `False`).
Setting <<fe_exclude,`exclude`>> to `True` with the above example will _remove_
the active rollover indices, leaving only those which have been rolled-over.

While this is perhaps most useful for the aforementioned scenario, it can
also be used with age-based indices as well.

Items will remain in the actionable list depending on the value of
<<fe_exclude,exclude>>, and <<fe_reverse,reverse>>.

There is no default value. The value must include a capture group, defined by
parenthesis, or left empty. If a value is provided, and there is no capture
group, and exception will be raised and execution will halt.

[[fe_range_from]]
== range_from

Expand Down
28 changes: 28 additions & 0 deletions docs/asciidoc/filters.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,33 @@ All of the age-related settings from the <<filtertype_age,`age`>> filter are
supported, and the same restrictions apply with regard to filtering indices vs.
snapshots.

=== Pattern-based sorting

[source,yaml]
-------------
- filtertype: count
count: 1
pattern: '^(.*)-\d{6}$'
reverse: true
-------------

This particular example will match indices following the basic rollover pattern
of `indexname-######`, and keep the highest numbered index for each group.

For example, given indices `a-000001`, `a-000002`, `a-000003` and `b-000006`,
and `b-000007`, the indices will would be matched are `a-000003` and `b-000007`.
Indices that do not match the regular expression in `pattern` will be
automatically excluded.

This is particularly useful with indices created and managed using the
{ref}/indices-rollover-index.html[Rollover API], as you can select only the
active indices with the above example (<<fe_exclude,`exclude`>> defaults to `False`).
Setting <<fe_exclude,`exclude`>> to `True` with the above example will _remove_
the active rollover indices, leaving only those which have been rolled-over.

While this is perhaps most useful for the aforementioned scenario, it can
also be used with age-based indices as well.

=== Reversing sorting

Using the default configuration, <<fe_reverse,`reverse`>> is `True`. Given
Expand Down Expand Up @@ -398,6 +425,7 @@ removed from the actionable list, leaving `index-2017.03.03`,

* <<fe_reverse,reverse>>
* <<fe_use_age,use_age>>
* <<fe_pattern,pattern>>
* <<fe_source,source>> (required if `use_age` is `True`)
* <<fe_timestring,timestring>> (required if `source` is `name`)
* <<fe_exclude,exclude>> (default is `False`)
Expand Down
99 changes: 99 additions & 0 deletions test/integration/test_count_pattern.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import elasticsearch
import curator
import os
import json
import string, random, tempfile
import time
from click import testing as clicktest
from mock import patch, Mock
import unittest
from . import CuratorTestCase
from . import testvars as testvars

import logging
logger = logging.getLogger(__name__)

host, port = os.environ.get('TEST_ES_SERVER', 'localhost:9200').split(':')
port = int(port) if port else 9200
# ' - filtertype: {0}\n'
# ' source: {1}\n'
# ' direction: {2}\n'
# ' timestring: {3}\n'
# ' unit: {4}\n'
# ' unit_count: {5}\n'
# ' field: {6}\n'
# ' stats_result: {7}\n'
# ' epoch: {8}\n')

global_client = elasticsearch.Elasticsearch(host=host, port=port)

delete_count_pattern = ('---\n'
'actions:\n'
' 1:\n'
' description: "Delete indices as filtered"\n'
' action: delete_indices\n'
' options:\n'
' continue_if_exception: False\n'
' disable_action: False\n'
' filters:\n'
' - filtertype: count\n'
' pattern: {0}\n'
' use_age: {1}\n'
' source: {2}\n'
' timestring: {3}\n'
' reverse: {4}\n'
' count: {5}\n')

class TestCLICountPattern(CuratorTestCase):
def test_match_proper_indices(self):
for i in range(1, 4):
self.create_index('a-{0}'.format(i))
for i in range(4, 7):
self.create_index('b-{0}'.format(i))
for i in range(5, 9):
self.create_index('c-{0}'.format(i))
self.create_index('not_a_match')
self.write_config(
self.args['configfile'], testvars.client_config.format(host, port))
self.write_config(
self.args['actionfile'],
delete_count_pattern.format(
'\'^(a|b|c)-\d$\'', 'false', 'name', '\'%Y.%m.%d\'', 'true', 1
)
)
test = clicktest.CliRunner()
result = test.invoke(
curator.cli,
[
'--config', self.args['configfile'],
self.args['actionfile']
],
)
indices = sorted(list(self.client.indices.get('_all')))
self.assertEquals(['a-3', 'b-6', 'c-8', 'not_a_match'], indices)
def test_match_proper_indices_by_age(self):
self.create_index('a-2017.10.01')
self.create_index('a-2017.10.02')
self.create_index('a-2017.10.03')
self.create_index('b-2017.09.01')
self.create_index('b-2017.09.02')
self.create_index('b-2017.09.03')
self.create_index('not_a_match')
self.write_config(
self.args['configfile'], testvars.client_config.format(host, port))
self.write_config(
self.args['actionfile'],
delete_count_pattern.format(
'\'^(a|b)-\d{4}\.\d{2}\.\d{2}$\'', 'true', 'name', '\'%Y.%m.%d\'', 'true', 1
)
)
test = clicktest.CliRunner()
result = test.invoke(
curator.cli,
[
'--config', self.args['configfile'],
self.args['actionfile']
],
)
indices = sorted(list(self.client.indices.get('_all')))
self.assertEquals(['a-2017.10.03', 'b-2017.09.03', 'not_a_match'], indices)
1 change: 1 addition & 0 deletions test/integration/testvars.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,7 @@
' unit_count: {5}\n'
' unit_count_pattern: {6}\n')


delete_period_proto = ('---\n'
'actions:\n'
' 1:\n'
Expand Down

0 comments on commit ce58168

Please sign in to comment.