Skip to content

Commit

Permalink
Merge pull request #997 from soenkeliebau/issue996
Browse files Browse the repository at this point in the history
#996: Added functionality to dynamically filter indices based on …
  • Loading branch information
untergeek committed Aug 23, 2017
2 parents 31449ed + 1bf73fd commit 0e58a5c
Show file tree
Hide file tree
Showing 8 changed files with 319 additions and 4 deletions.
6 changes: 6 additions & 0 deletions curator/defaults/filter_elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,12 @@ def unit_count(**kwargs):
# filtertype if use_age is set to True.
return { Required('unit_count'): Coerce(int) }

def unit_count_pattern(**kwargs):
# This setting is used with the age filtertype to define, whether
# the unit_count value is taken from the configuration or read from
# the index name via a regular expression
return { Optional('unit_count_pattern'): Any(str, unicode) }

def use_age(**kwargs):
# Use of this setting requires the additional setting, source.
return { Optional('use_age', default=False): Any(bool, All(Any(str, unicode), Boolean())) }
Expand Down
1 change: 1 addition & 0 deletions curator/defaults/filtertypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def age(action, config):
filter_elements.direction(),
filter_elements.unit(),
filter_elements.unit_count(),
filter_elements.unit_count_pattern(),
filter_elements.epoch(),
filter_elements.exclude(),
]
Expand Down
1 change: 1 addition & 0 deletions curator/defaults/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ def structural_filter_elements():
Optional('timestring'): Any(str, unicode, None),
Optional('unit'): Any(str, unicode),
Optional('unit_count'): Coerce(int),
Optional('unit_count_pattern'): Any(str, unicode),
Optional('use_age'): Boolean(),
Optional('value'): Any(int, float, str, unicode, bool),
Optional('week_starts_on'): Any(str, unicode, None),
Expand Down
34 changes: 31 additions & 3 deletions curator/indexlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ def filter_by_regex(self, kind=None, value=None, exclude=False):

def filter_by_age(self, source='name', direction=None, timestring=None,
unit=None, unit_count=None, field=None, stats_result='min_value',
epoch=None, exclude=False,
epoch=None, exclude=False, unit_count_pattern=False
):
"""
Match `indices` by relative age calculations.
Expand All @@ -404,6 +404,8 @@ def filter_by_age(self, source='name', direction=None, timestring=None,
``weeks``, ``months``, or ``years``.
:arg unit_count: The number of ``unit`` (s). ``unit_count`` * ``unit`` will
be calculated out to the relative number of seconds.
:arg unit_count_pattern: A regular expression whose capture group identifies
the value for ``unit_count``.
:arg field: A timestamp field name. Only used for ``field_stats`` based
calculations.
:arg stats_result: Either `min_value` or `max_value`. Only used in
Expand Down Expand Up @@ -431,6 +433,12 @@ def filter_by_age(self, source='name', direction=None, timestring=None,
source=source, timestring=timestring, field=field,
stats_result=stats_result
)
if unit_count_pattern:
try:
unit_count_matcher = re.compile(unit_count_pattern)
except:
# We got an illegal regex, so won't be able to match anything
unit_count_matcher = None
for index in self.working_list():
try:
age = int(self.index_info[index]['age'][self.age_keyfield])
Expand All @@ -445,10 +453,30 @@ def filter_by_age(self, source='name', direction=None, timestring=None,
)
# Because time adds to epoch, smaller numbers are actually older
# timestamps.
if unit_count_pattern:
self.loggit.debug("unit_count_pattern is set, trying to match pattern to index " + index)
unit_count_from_index = get_unit_count_from_name(index, unit_count_matcher)
if unit_count_from_index:
self.loggit.debug("pattern matched, applying unit_count of " + str(unit_count_from_index))
adjustedPoR = get_point_of_reference(unit, unit_count_from_index, epoch)
test = 0
elif unit_count == -1:
# Unable to match pattern and unit_count is -1, meaning no fallback, so this
# index is removed from the list
self.loggit.debug("Unable to match pattern and no fallback value set. Removing index " + index + " from actionable list")
exclude = True
adjustedPoR = PoR # necessary to avoid exception if the first index is excluded
else:
# Unable to match the pattern and unit_count is set, so fall back to using unit_count
# for determining whether to keep this index in the list
self.loggit.debug("unable to match pattern using fallback value of " + str(unit_count))
adjustedPoR = PoR
else:
adjustedPoR = PoR
if direction == 'older':
agetest = age < PoR
agetest = age < adjustedPoR
else:
agetest = age > PoR
agetest = age > adjustedPoR
self.__excludify(agetest, exclude, index, msg)
except KeyError:
self.loggit.debug(
Expand Down
14 changes: 13 additions & 1 deletion curator/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,19 @@ def get_point_of_reference(unit, count, epoch=None):
epoch = time.time()
epoch = fix_epoch(epoch)
return epoch - multiplier * count


def get_unit_count_from_name(index_name, pattern):
if (pattern == None):
return None
match = pattern.search(index_name)
if match:
try:
return int(match.group(1))
except Exception:
return None
else:
return None

def date_range(unit, range_from, range_to, epoch=None, week_starts_on='sunday'):
"""
Get the epoch start time and end time of a range of ``unit``s, reckoning the
Expand Down
86 changes: 86 additions & 0 deletions docs/asciidoc/filter_elements.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
* <<fe_timestring,timestring>>
* <<fe_unit,unit>>
* <<fe_unit_count,unit_count>>
* <<fe_unit_count_pattern,unit_count_pattern>>
* <<fe_use_age,use_age>>
* <<fe_value,value>>
* <<fe_week_starts_on,week_starts_on>>
Expand Down Expand Up @@ -620,11 +621,96 @@ a point of reference in the future by using a negative value for
This setting must be set by the user or an exception will be raised, and
execution will halt.

If this setting is used in conjunction with <<fe_unit_count_pattern,unit_count_pattern>>, the configured
value will only be used as a fallback value in case the pattern could not be matched. The value _-1_ has
a special meaning in this case and causes the index to be ignored when pattern matching fails.

TIP: See the <<filtertype_age,age filter documentation>> for more information
about time calculation.



[[fe_unit_count_pattern]]
== unit_count_pattern

NOTE: This setting is only used with the age filtertype to define, whether the
<<fe_unit_count,unit_count>> value is taken from the configuration or read from
the index name via a regular expression.

[source,yaml]
-------------
- filtertype: age
source: creation_date
direction: older
unit: days
unit_count: 3
unit_count_pattern: -([0-9]+)-
-------------

This setting can be used in cases where the value against which index age should be assessed is not a static
value but can be different for every index. For this case, there is the option of extracting the index
specific value from the index names via a regular expression defined in this parameter.

Consider for example the following index name patterns that contain the retention time in their name:
_logstash-30-yyyy.mm.dd_, _logstash-12-yyyy.mm_, __3_logstash-yyyy.mm.dd_.

To extract a value from the index names, this setting will be compiled as a regular expression and matched
against index names, for a successful match, the value of the first capture group from the regular expression
is used as the value for <<fe_unit_count,unit_count>>.

If there is any error during compiling or matching the expression, or the expression does not
contain a capture group, the value configured in <<fe_unit_count,unit_count>> is used as a fallback value,
unless it is set to _-1_, in which case the index will be skipped.

TIP: Regular expressions and match groups are not explained here as they are a fairly large and complex topic,
but there are numerous resources online that will help. Using an online tool for testing regular expressions
like https://regex101.com/[regex101.com] will help a lot when developing patterns.

*Examples*

* _logstash-30-yyyy.mm.dd_: Daily index that should be deleted after 30 days, indices that
don't match the pattern will be deleted after 365 days
[source,yaml]
-------------
- filtertype: age
source: creation_date
direction: older
unit: days
unit_count: 365
unit_count_pattern: -([0-9]+)-
-------------


* _logstash-12-yyyy.mm_: Monthly index that should be deleted after 12 months, indices that
don't match the pattern will be deleted after 3 months

[source,yaml]
-------------
- filtertype: age
source: creation_date
direction: older
unit: months
unit_count: 3
unit_count_pattern: -([0-9]+)-
-------------


* __3_logstash-yyyy.mm.dd_: Daily index that should be deleted after 3 years, indices that
don't match the pattern will be ignored

[source,yaml]
-------------
- filtertype: age
source: creation_date
direction: older
unit: years
unit_count: -1
unit_count_pattern: ^_([0-9]+)_
-------------

IMPORTANT: Be sure to pay attention to the interaction of this parameter and <<fe_unit_count,unit_count>>!


[[fe_use_age]]
== use_age

Expand Down
164 changes: 164 additions & 0 deletions test/integration/test_delete_indices.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,170 @@
global_client = elasticsearch.Elasticsearch(host=host, port=port)

class TestCLIDeleteIndices(CuratorTestCase):
def test_retention_from_name_months(self):
# Test extraction of unit_count from index name
# Create indices for 10 months with retention time of 2 months in index name
# Expected: 8 oldest indices are deleted, 2 remain
self.args['prefix'] = 'logstash_2_'
self.args['time_unit'] = 'months'
self.create_indices(10)
self.write_config(
self.args['configfile'], testvars.client_config.format(host, port))
self.write_config(self.args['actionfile'],
testvars.delete_pattern_proto.format(
'age', 'name', 'older', '\'%Y.%m\'', 'months', -1, '_([0-9]+)_', ' ', ' ', ' '
)
)
test = clicktest.CliRunner()
result = test.invoke(
curator.cli,
[
'--config', self.args['configfile'],
self.args['actionfile']
],
)
self.assertEquals(2, len(curator.get_indices(self.client)))
def test_retention_from_name_days(self):
# Test extraction of unit_count from index name
# Create indices for 10 days with retention time of 5 days in index name
# Expected: 5 oldest indices are deleted, 5 remain
self.args['prefix'] = 'logstash_5_'
self.create_indices(10)
self.write_config(
self.args['configfile'], testvars.client_config.format(host, port))
self.write_config(self.args['actionfile'],
testvars.delete_pattern_proto.format(
'age', 'name', 'older', '\'%Y.%m.%d\'', 'days', 30, '_([0-9]+)_', ' ', ' ', ' '
)
)
test = clicktest.CliRunner()
result = test.invoke(
curator.cli,
[
'--config', self.args['configfile'],
self.args['actionfile']
],
)
self.assertEquals(5, len(curator.get_indices(self.client)))
def test_retention_from_name_days_ignore_failed_match(self):
# Test extraction of unit_count from index name
# Create indices for 10 days with retention time of 5 days in index name
# Create indices for 10 days with no retention time in index name
# Expected: 5 oldest indices are deleted, 5 remain - 10 indices without retention time are ignored and remain
self.args['prefix'] = 'logstash_5_'
self.create_indices(10)
self.args['prefix'] = 'logstash_'
self.create_indices(10)
self.write_config(
self.args['configfile'], testvars.client_config.format(host, port))
self.write_config(self.args['actionfile'],
testvars.delete_pattern_proto.format(
'age', 'name', 'older', '\'%Y.%m.%d\'', 'days', 30, '_([0-9]+)_', ' ', ' ', ' '
)
)
test = clicktest.CliRunner()
result = test.invoke(
curator.cli,
[
'--config', self.args['configfile'],
self.args['actionfile']
],
)
self.assertEquals(15, len(curator.get_indices(self.client)))
def test_retention_from_name_days_failed_match_with_fallback(self):
# Test extraction of unit_count from index name
# Create indices for 10 days with retention time of 5 days in index name
# Create indices for 10 days with no retention time in index name but configure fallback value of 7
# Expected: 5 oldest indices are deleted, 5 remain - 7 indices without retention time are ignored and remain due to the fallback value
self.args['prefix'] = 'logstash_5_'
self.create_indices(10)
self.args['prefix'] = 'logstash_'
self.create_indices(10)
self.write_config(
self.args['configfile'], testvars.client_config.format(host, port))
self.write_config(self.args['actionfile'],
testvars.delete_pattern_proto.format(
'age', 'name', 'older', '\'%Y.%m.%d\'', 'days', 7, '_([0-9]+)_', ' ', ' ', ' '
)
)
test = clicktest.CliRunner()
result = test.invoke(
curator.cli,
[
'--config', self.args['configfile'],
self.args['actionfile']
],
)
self.assertEquals(12, len(curator.get_indices(self.client)))
def test_retention_from_name_no_capture_group(self):
# Test extraction of unit_count from index name when pattern contains no capture group
# Create indices for 10 months with retention time of 2 months in index name
# Expected: all indices remain as the pattern cannot be used to extract a retention time
self.args['prefix'] = 'logstash_2_'
self.args['time_unit'] = 'months'
self.create_indices(10)
self.write_config(
self.args['configfile'], testvars.client_config.format(host, port))
self.write_config(self.args['actionfile'],
testvars.delete_pattern_proto.format(
'age', 'name', 'older', '\'%Y.%m\'', 'months', -1, '_[0-9]+_', ' ', ' ', ' '
)
)
test = clicktest.CliRunner()
result = test.invoke(
curator.cli,
[
'--config', self.args['configfile'],
self.args['actionfile']
],
)
self.assertEquals(10, len(curator.get_indices(self.client)))
def test_retention_from_name_illegal_regex_no_fallback(self):
# Test extraction of unit_count from index name when pattern contains an illegal regular expression
# Create indices for 10 months with retention time of 2 months in index name
# Expected: all indices remain as the pattern cannot be used to extract a retention time
self.args['prefix'] = 'logstash_2_'
self.args['time_unit'] = 'months'
self.create_indices(10)
self.write_config(
self.args['configfile'], testvars.client_config.format(host, port))
self.write_config(self.args['actionfile'],
testvars.delete_pattern_proto.format(
'age', 'name', 'older', '\'%Y.%m\'', 'months', -1, '_[0-9+_', ' ', ' ', ' '
)
)
test = clicktest.CliRunner()
result = test.invoke(
curator.cli,
[
'--config', self.args['configfile'],
self.args['actionfile']
],
)
self.assertEquals(10, len(curator.get_indices(self.client)))
def test_retention_from_name_illegal_regex_with_fallback(self):
# Test extraction of unit_count from index name when pattern contains an illegal regular expression
# Create indices for 10 months with retention time of 2 months in index name
# Expected: Fallback value of 3 is used and 3 most recent indices remain in place
self.args['prefix'] = 'logstash_2_'
self.args['time_unit'] = 'months'
self.create_indices(10)
self.write_config(
self.args['configfile'], testvars.client_config.format(host, port))
self.write_config(self.args['actionfile'],
testvars.delete_pattern_proto.format(
'age', 'name', 'older', '\'%Y.%m\'', 'months', 3, '_[0-9+_', ' ', ' ', ' '
)
)
test = clicktest.CliRunner()
result = test.invoke(
curator.cli,
[
'--config', self.args['configfile'],
self.args['actionfile']
],
)
self.assertEquals(3, len(curator.get_indices(self.client)))
def test_name_older_than_now(self):
self.create_indices(10)
self.write_config(
Expand Down

0 comments on commit 0e58a5c

Please sign in to comment.