Skip to content

Commit

Permalink
Merge pull request #1076 from untergeek/feature/1045
Browse files Browse the repository at this point in the history
Add intersect per #1045
  • Loading branch information
untergeek committed Sep 29, 2017
2 parents 4e95048 + a7b80e2 commit 635d0ea
Show file tree
Hide file tree
Showing 9 changed files with 137 additions and 22 deletions.
4 changes: 4 additions & 0 deletions curator/defaults/filter_elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ def field(**kwargs):
else:
return { Optional('field'): Any(str, unicode) }

def intersect(**kwargs):
# This setting is only used with the age filtertype when using field_stats.
return { Optional('intersect', default=False): Any(bool, All(Any(str, unicode), Boolean())) }

def key(**kwargs):
# This setting is only used with the allocated filtertype.
return { Required('key'): Any(str, unicode) }
Expand Down
1 change: 1 addition & 0 deletions curator/defaults/filtertypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ def period(action, config):
filter_elements.week_starts_on(),
filter_elements.epoch(),
filter_elements.exclude(),
filter_elements.intersect(),
]
retval += _age_elements(action, config)
return retval
Expand Down
1 change: 1 addition & 0 deletions curator/defaults/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ def structural_filter_elements():
Optional('epoch'): Any(Coerce(int), None),
Optional('exclude'): Any(int, str, unicode, bool, None),
Optional('field'): Any(str, unicode, None),
Optional('intersect'): Any(int, str, unicode, bool, None),
Optional('key'): Any(str, unicode),
Optional('kind'): Any(str, unicode),
Optional('max_num_segments'): Coerce(int),
Expand Down
51 changes: 37 additions & 14 deletions curator/indexlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -843,7 +843,7 @@ def filter_by_count(

def filter_period(
self, source='name', range_from=None, range_to=None, timestring=None,
unit=None, field=None, stats_result='min_value',
unit=None, field=None, stats_result='min_value', intersect=False,
week_starts_on='sunday', epoch=None, exclude=False,
):
"""
Expand All @@ -862,8 +862,12 @@ def filter_period(
:arg field: A timestamp field name. Only used for ``field_stats`` based
calculations.
:arg stats_result: Either `min_value` or `max_value`. Only used in
conjunction with `source`=``field_stats`` to choose whether to
conjunction with ``source``=``field_stats`` to choose whether to
reference the minimum or maximum result value.
:arg intersect: Only used when ``source``=``field_stats``.
If `True`, only indices where both `min_value` and `max_value` are
within the period will be selected. If `False`, it will use whichever
you specified. Default is `False` to preserve expected behavior.
:arg week_starts_on: Either ``sunday`` or ``monday``. Default is
``sunday``
:arg epoch: An epoch timestamp used to establish a point of reference
Expand All @@ -888,19 +892,38 @@ def filter_period(
)
for index in self.working_list():
try:
age = int(self.index_info[index]['age'][self.age_keyfield])
msg = (
'Index "{0}" age ({1}), period start: "{2}", period '
'end, "{3}"'.format(
index,
age,
start,
end
if source == 'field_stats' and intersect:
min_age = int(self.index_info[index]['age']['min_value'])
max_age = int(self.index_info[index]['age']['max_value'])
msg = (
'Index "{0}", timestamp field "{1}", min_value ({2}), '
'max_value ({3}), period start: "{4}", period '
'end, "{5}"'.format(
index,
field,
min_age,
max_age,
start,
end
)
)
)
# Because time adds to epoch, smaller numbers are actually older
# timestamps.
inrange = ((age >= start) and (age <= end))
# Because time adds to epoch, smaller numbers are actually older
# timestamps.
inrange = ((min_age >= start) and (max_age <= end))
else:
age = int(self.index_info[index]['age'][self.age_keyfield])
msg = (
'Index "{0}" age ({1}), period start: "{2}", period '
'end, "{3}"'.format(
index,
age,
start,
end
)
)
# Because time adds to epoch, smaller numbers are actually older
# timestamps.
inrange = ((age >= start) and (age <= end))
self.__excludify(inrange, exclude, index, msg)
except KeyError:
self.loggit.debug(
Expand Down
6 changes: 6 additions & 0 deletions docs/Changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,16 @@ Changelog

**New Features**

* With the period filter and field_stats, it is useful to match indices
that fit `within` the period, rather than just their start dates. This
is now possible with ``intersect``. See more in the documentation.
Requested in #1045. (untergeek)

**Bug Fixes**

* Delete the target index (if it exists) in the event that a shrink fails.
Requested in #1058 (untergeek)
* Fixed an integration test that could fail in the waning days of a month.

5.2.0 (1 September 2017)
------------------------
Expand Down
36 changes: 36 additions & 0 deletions docs/asciidoc/filter_elements.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
* <<fe_epoch,epoch>>
* <<fe_exclude,exclude>>
* <<fe_field,field>>
* <<fe_intersect,intersect>>
* <<fe_key,key>>
* <<fe_kind,kind>>
* <<fe_max_num_segments,max_num_segments>>
Expand Down Expand Up @@ -281,6 +282,41 @@ The default value for this setting is `@timestamp`.



[[fe_intersect]]
== intersect

NOTE: This setting is only available in the <<filtertype_age,period>> filtertype.
This setting is strictly optional.

[source,yaml]
-------------
- filtertype: period
source: field_stats
direction: older
intersect: true
unit: weeks
range_from: -1
range_to: -1
field: '@timestamp'
stats_result: min_value
-------------

The value of this setting must be `True` or `False`.

`field_stats` uses the {ref}/search-field-stats.html[Field Stats API] to
calculate either the `min_value` and the `max_value` of the <<fe_field,`field`>>
as the <<fe_stats_result,`stats_result`>>. If `intersect` is `True`, then
only indices where the `min_value` _and_ the `max_value` are within the `range_from`
and `range_to` (relative to `unit`) will match. This means that either `min_value`
or `max_value` can be used for <<fe_stats_result,`stats_result`>> when `intersect`
is `True` with identical results.

This setting is only used when <<fe_source,source>> is `field_stats`.

The default value for this setting is `False`.



[[fe_key]]
== key

Expand Down
1 change: 1 addition & 0 deletions docs/asciidoc/filters.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,7 @@ that whole unit will be selected, in this case, a month.
* <<fe_timestring,timestring>> (required if `source` is `name`)
* <<fe_field,field>> (required if `source` is `field_stats`) [Indices only]
* <<fe_stats_result,stats_result>> (only used if `source` is `field_stats`) [Indices only]
* <<fe_intersect,intersect>> (optional if `source` is `field_stats`) [Indices only]

=== Optional settings

Expand Down
54 changes: 48 additions & 6 deletions test/integration/test_delete_indices.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,16 +171,15 @@ def test_retention_from_name_illegal_regex_no_fallback(self):
self.assertEquals(10, len(curator.get_indices(self.client)))
def test_retention_from_name_illegal_regex_with_fallback(self):
# Test extraction of unit_count from index name when pattern contains an illegal regular expression
# Create indices for 10 months with retention time of 2 months in index name
# Create indices for 10 days with retention time of 2 days in index name
# Expected: Fallback value of 3 is used and 3 most recent indices remain in place
self.args['prefix'] = 'logstash_2_'
self.args['time_unit'] = 'months'
self.create_indices(10)
self.write_config(
self.args['configfile'], testvars.client_config.format(host, port))
self.write_config(self.args['actionfile'],
testvars.delete_pattern_proto.format(
'age', 'name', 'older', '\'%Y.%m\'', 'months', 3, '_[0-9+_', ' ', ' ', ' '
'age', 'name', 'older', '\'%Y.%m.%d\'', 'days', 3, '_[0-9+_', ' ', ' ', ' '
)
)
test = clicktest.CliRunner()
Expand Down Expand Up @@ -238,15 +237,16 @@ def test_delete_in_period(self):
# unit: {5}
# field: {6}
# stats_result: {7}
# epoch: {8}
# week_starts_on: {9}
# intersect: {8}
# epoch: {9}
# week_starts_on: {10}
self.create_indices(10)
self.write_config(
self.args['configfile'], testvars.client_config.format(host, port))
self.write_config(self.args['actionfile'],
testvars.delete_period_proto.format(
'period', 'name', '-5', '-1', "'%Y.%m.%d'", 'days',
' ', ' ', ' ', 'monday'
' ', ' ', ' ', ' ', 'monday'
)
)
test = clicktest.CliRunner()
Expand All @@ -259,6 +259,48 @@ def test_delete_in_period(self):
)
self.assertEqual(0, result.exit_code)
self.assertEquals(5, len(curator.get_indices(self.client)))
def test_delete_in_period_intersect(self):
# filtertype: {0}
# source: {1}
# range_from: {2}
# range_to: {3}
# timestring: {4}
# unit: {5}
# field: {6}
# stats_result: {7}
# intersect: {8}
# epoch: {9}
# week_starts_on: {10}
# 2017-09-01T01:00:00 = 1504227600
# 2017-09-25T01:00:00 = 1506301200
# 2017-09-29T01:00:00 = 1506646800
self.create_index('intersecting')
self.create_index('notintersecting')
self.client.index(index='intersecting', doc_type='log', id='1', body={'@timestamp': '2017-09-25T01:00:00Z', 'doc' :'Earliest'})
self.client.index(index='intersecting', doc_type='log', id='2', body={'@timestamp': '2017-09-29T01:00:00Z', 'doc' :'Latest'})
self.client.index(index='notintersecting', doc_type='log', id='1', body={'@timestamp': '2017-09-01T01:00:00Z', 'doc' :'Earliest'})
self.client.index(index='notintersecting', doc_type='log', id='2', body={'@timestamp': '2017-09-29T01:00:00Z', 'doc' :'Latest'})
self.client.indices.flush(index='_all', force=True)
self.write_config(
self.args['configfile'], testvars.client_config.format(host, port))
self.write_config(self.args['actionfile'],
testvars.delete_period_proto.format(
'period', 'field_stats', '0', '0', ' ', 'weeks',
"'@timestamp'", 'min_value', 'true', 1506716040, 'sunday'
)
)
test = clicktest.CliRunner()
result = test.invoke(
curator.cli,
[
'--config', self.args['configfile'],
self.args['actionfile']
],
)
self.assertEqual(0, result.exit_code)
indices = curator.get_indices(self.client)
self.assertEquals(1, len(indices))
self.assertEqual('notintersecting', indices[0])
def test_empty_list(self):
self.create_indices(10)
self.write_config(
Expand Down
5 changes: 3 additions & 2 deletions test/integration/testvars.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,8 +427,9 @@
' unit: {5}\n'
' field: {6}\n'
' stats_result: {7}\n'
' epoch: {8}\n'
' week_starts_on: {9}\n')
' intersect: {8}\n'
' epoch: {9}\n'
' week_starts_on: {10}\n')

delete_ignore_proto = ('---\n'
'actions:\n'
Expand Down

0 comments on commit 635d0ea

Please sign in to comment.