From b5a2cf2a7a538deecf46ece2252af1eb0afd6cf4 Mon Sep 17 00:00:00 2001 From: Hiroshi Hatake Date: Fri, 5 Apr 2024 11:40:13 +0900 Subject: [PATCH] in_tail: Expand glob capability for square brackets and one character matcher (#4401) Signed-off-by: Hiroshi Hatake --- lib/fluent/plugin/in_tail.rb | 36 ++++++- test/plugin/data/log_numeric/01.log | 0 test/plugin/data/log_numeric/02.log | 0 test/plugin/data/log_numeric/12.log | 0 test/plugin/data/log_numeric/14.log | 0 test/plugin/test_in_tail.rb | 141 ++++++++++++++++++++++++++++ 6 files changed, 175 insertions(+), 2 deletions(-) create mode 100644 test/plugin/data/log_numeric/01.log create mode 100644 test/plugin/data/log_numeric/02.log create mode 100644 test/plugin/data/log_numeric/12.log create mode 100644 test/plugin/data/log_numeric/14.log diff --git a/lib/fluent/plugin/in_tail.rb b/lib/fluent/plugin/in_tail.rb index 7e0289f65b..fc5761f917 100644 --- a/lib/fluent/plugin/in_tail.rb +++ b/lib/fluent/plugin/in_tail.rb @@ -65,6 +65,8 @@ def initialize config_param :path, :string desc 'path delimiter used for spliting path config' config_param :path_delimiter, :string, default: ',' + desc 'Choose using glob patterns. Adding capabilities to handle [] and ?, and {}.' + config_param :glob_policy, :enum, list: [:backward_compatible, :extended, :always], default: :backward_compatible desc 'The tag of the event.' config_param :tag, :string desc 'The paths to exclude the files from watcher list.' @@ -141,6 +143,14 @@ def configure(conf) raise Fluent::ConfigError, "either of enable_watch_timer or enable_stat_watcher must be true" end + if @glob_policy == :always && @path_delimiter == ',' + raise Fluent::ConfigError, "cannot use glob_policy as always with the default path_delimitor: `,\"" + end + + if @glob_policy == :extended && /\{.*,.*\}/.match(@path) && extended_glob_pattern(@path) + raise Fluent::ConfigError, "cannot include curly braces with glob patterns in `#{@path}\". Use glob_policy always instead." + end + if RESERVED_CHARS.include?(@path_delimiter) rc = RESERVED_CHARS.join(', ') raise Fluent::ConfigError, "#{rc} are reserved words: #{@path_delimiter}" @@ -288,6 +298,28 @@ def have_read_capability? @capability.have_capability?(:effective, :dac_override) end + def extended_glob_pattern(path) + path.include?('*') || path.include?('?') || /\[.*\]/.match(path) + end + + # Curly braces is not supported with default path_delimiter + # because the default delimiter of path is ",". + # This should be collided for wildcard pattern for curly braces and + # be handled as an error on #configure. + def use_glob?(path) + if @glob_policy == :always + # For future extensions, we decided to use `always' term to handle + # regular expressions as much as possible. + # This is because not using `true' as a returning value + # when choosing :always here. + extended_glob_pattern(path) || /\{.*,.*\}/.match(path) + elsif @glob_policy == :extended + extended_glob_pattern(path) + elsif @glob_policy == :backward_compatible + path.include?('*') + end + end + def expand_paths date = Fluent::EventTime.now paths = [] @@ -297,7 +329,7 @@ def expand_paths else date.to_time.strftime(path) end - if path.include?('*') + if use_glob?(path) paths += Dir.glob(path).select { |p| begin is_file = !File.directory?(p) @@ -332,7 +364,7 @@ def expand_paths else date.to_time.strftime(path) end - path.include?('*') ? Dir.glob(path) : path + use_glob?(path) ? Dir.glob(path) : path }.flatten.uniq # filter out non existing files, so in case pattern is without '*' we don't do unnecessary work hash = {} diff --git a/test/plugin/data/log_numeric/01.log b/test/plugin/data/log_numeric/01.log new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/plugin/data/log_numeric/02.log b/test/plugin/data/log_numeric/02.log new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/plugin/data/log_numeric/12.log b/test/plugin/data/log_numeric/12.log new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/plugin/data/log_numeric/14.log b/test/plugin/data/log_numeric/14.log new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/plugin/test_in_tail.rb b/test/plugin/test_in_tail.rb index 58006c0b98..d30595ce9a 100644 --- a/test/plugin/test_in_tail.rb +++ b/test/plugin/test_in_tail.rb @@ -1538,6 +1538,147 @@ def test_expand_paths assert_equal(ex_paths - [ex_paths.last], plugin.expand_paths.values.sort_by { |path_ino| path_ino.path }) end + sub_test_case "expand_paths with glob" do |data| + sub_test_case "extended_glob" do + data("curly braces" => [true, "always", "test/plugin/data/log_numeric/{0,1}*.log"], + "square brackets" => [true, "always", "test/plugin/data/log_numeric/[0-1][2-4].log"], + "asterisk" => [true, "always", "test/plugin/data/log/*.log"], + "one character matcher" => [true, "always", "test/plugin/data/log/tes?.log"], + ) + def test_expand_paths_with_use_glob_p_and_almost_set_of_patterns + result, option, path = data + config = config_element("", "", { + "tag" => "tail", + "path" => path, + "format" => "none", + "pos_file" => "#{@tmp_dir}/tail.pos", + "read_from_head" => true, + "refresh_interval" => 30, + "glob_policy" => option, + "path_delimiter" => "|", + "rotate_wait" => "#{EX_ROTATE_WAIT}s", + "follow_inodes" => "#{EX_FOLLOW_INODES}", + }) + plugin = create_driver(config, false).instance + assert_equal(result, !!plugin.use_glob?(path)) + end + + data("curly braces" => [true, false, "extended", "test/plugin/data/log_numeric/{0,1}*.log"], + "square brackets" => [false, true, "extended", "test/plugin/data/log_numeric/[0-1][2-4].log"], + "asterisk" => [false, true, "extended", "test/plugin/data/log/*.log"], + "one character matcher" => [false, true, "extended", "test/plugin/data/log/tes?.log"], + ) + def test_expand_paths_with_use_glob_p + emit_exception_p, result, option, path = data + config = config_element("", "", { + "tag" => "tail", + "path" => path, + "format" => "none", + "pos_file" => "#{@tmp_dir}/tail.pos", + "read_from_head" => true, + "refresh_interval" => 30, + "glob_policy" => option, + "rotate_wait" => "#{EX_ROTATE_WAIT}s", + "follow_inodes" => "#{EX_FOLLOW_INODES}", + }) + if emit_exception_p + assert_raise(Fluent::ConfigError) do + plugin = create_driver(config, false).instance + end + else + plugin = create_driver(config, false).instance + assert_equal(result, !!plugin.use_glob?(path)) + end + end + end + + sub_test_case "only_use_backward_compatible" do + data("square brackets" => [false, "backward_compatible", "test/plugin/data/log_numeric/[0-1][2-4].log"], + "asterisk" => [true, "backward_compatible", "test/plugin/data/log/*.log"], + "one character matcher" => [false, "backward_compatible", "test/plugin/data/log/tes?.log"], + ) + def test_expand_paths_with_use_glob_p + result, option, path = data + config = config_element("", "", { + "tag" => "tail", + "path" => path, + "format" => "none", + "pos_file" => "#{@tmp_dir}/tail.pos", + "read_from_head" => true, + "refresh_interval" => 30, + "glob_policy" => option, + "rotate_wait" => "#{EX_ROTATE_WAIT}s", + "follow_inodes" => "#{EX_FOLLOW_INODES}", + }) + plugin = create_driver(config, false).instance + assert_equal(result, !!plugin.use_glob?(path)) + end + end + end + + def ex_config_with_brackets + config_element("", "", { + "tag" => "tail", + "path" => "test/plugin/data/log_numeric/[0-1][2-4].log", + "format" => "none", + "pos_file" => "#{@tmp_dir}/tail.pos", + "read_from_head" => true, + "refresh_interval" => 30, + "glob_policy" => "extended", + "rotate_wait" => "#{EX_ROTATE_WAIT}s", + "follow_inodes" => "#{EX_FOLLOW_INODES}", + }) + end + + def test_config_with_always_with_default_delimiter + assert_raise(Fluent::ConfigError) do + config = config_element("", "", { + "tag" => "tail", + "path" => "test/plugin/data/log_numeric/[0-1][2-4].log", + "format" => "none", + "pos_file" => "#{@tmp_dir}/tail.pos", + "read_from_head" => true, + "refresh_interval" => 30, + "glob_policy" => "always", + "rotate_wait" => "#{EX_ROTATE_WAIT}s", + "follow_inodes" => "#{EX_FOLLOW_INODES}", + }) + + create_driver(config, false).instance + end + end + + def test_config_with_always_with_custom_delimiter + assert_nothing_raised do + config = config_element("", "", { + "tag" => "tail", + "path" => "test/plugin/data/log_numeric/[0-1][2-4].log", + "format" => "none", + "pos_file" => "#{@tmp_dir}/tail.pos", + "read_from_head" => true, + "refresh_interval" => 30, + "glob_policy" => "always", + "path_delimiter" => "|", + "rotate_wait" => "#{EX_ROTATE_WAIT}s", + "follow_inodes" => "#{EX_FOLLOW_INODES}", + }) + + create_driver(config, false).instance + end + end + + def test_expand_paths_with_brackets + expanded_paths = [ + create_target_info('test/plugin/data/log_numeric/01.log'), + create_target_info('test/plugin/data/log_numeric/02.log'), + create_target_info('test/plugin/data/log_numeric/12.log'), + create_target_info('test/plugin/data/log_numeric/14.log'), + ] + + plugin = create_driver(ex_config_with_brackets, false).instance + assert_equal(expanded_paths - [expanded_paths.first], plugin.expand_paths.values.sort_by { |path_ino| path_ino.path }) + end + def test_expand_paths_with_duplicate_configuration expanded_paths = [ create_target_info('test/plugin/data/log/foo/bar.log'),