From 7c902744d5488892f8ecd23469f917a8c2c818fe Mon Sep 17 00:00:00 2001 From: Kenji Okimoto Date: Fri, 10 Aug 2018 11:47:02 +0900 Subject: [PATCH 1/5] Add grok_name_key If grok_name_key is specified, parsed records will include the key which is name (defined by grok.name) or index. Signed-off-by: Kenji Okimoto --- lib/fluent/plugin/grok.rb | 8 ++++---- lib/fluent/plugin/parser_grok.rb | 7 ++++++- lib/fluent/plugin/parser_multiline_grok.rb | 3 ++- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/lib/fluent/plugin/grok.rb b/lib/fluent/plugin/grok.rb index c6c35a0..e02ad25 100644 --- a/lib/fluent/plugin/grok.rb +++ b/lib/fluent/plugin/grok.rb @@ -23,7 +23,7 @@ class GrokPatternNotFoundError < StandardError def initialize(plugin, conf) @pattern_map = {} - @parsers = [] + @parsers = {} @multiline_mode = false @conf = conf @plugin = plugin @@ -45,10 +45,10 @@ def add_patterns_from_file(path) def setup if @plugin.grok_pattern - @parsers << expand_pattern_expression(@plugin.grok_pattern, @conf) + @parsers[:grok_pattern] = expand_pattern_expression(@plugin.grok_pattern, @conf) else - @plugin.grok_confs.each do |grok_conf| - @parsers << expand_pattern_expression(grok_conf.pattern, grok_conf) + @plugin.grok_confs.each.with_index do |grok_conf, index| + @parsers[grok_conf.name || index] = expand_pattern_expression(grok_conf.pattern, grok_conf) end end @parsers.compact! diff --git a/lib/fluent/plugin/parser_grok.rb b/lib/fluent/plugin/parser_grok.rb index b2ef4fb..b41dd90 100644 --- a/lib/fluent/plugin/parser_grok.rb +++ b/lib/fluent/plugin/parser_grok.rb @@ -14,8 +14,12 @@ class GrokParser < Parser config_param :custom_pattern_path, :string, default: nil desc "The key has grok failure reason" config_param :grok_failure_key, :string, default: nil + desc "The key name to store grok section's name" + config_param :grok_name_key, :string, default: nil config_section :grok, param_name: "grok_confs", multi: true do + desc "The name of this grok section" + config_param :name, :string, default: nil desc "The pattern of grok" config_param :pattern, :string end @@ -49,9 +53,10 @@ def configure(conf={}) end def parse(text) - @grok.parsers.each do |parser| + @grok.parsers.each do |name_or_index, parser| parser.parse(text) do |time, record| if time and record + record[@grok_name_key] = name_or_index if @grok_name_key yield time, record return end diff --git a/lib/fluent/plugin/parser_multiline_grok.rb b/lib/fluent/plugin/parser_multiline_grok.rb index 29671d0..a4e29fe 100644 --- a/lib/fluent/plugin/parser_multiline_grok.rb +++ b/lib/fluent/plugin/parser_multiline_grok.rb @@ -17,9 +17,10 @@ def firstline?(text) end def parse(text) - @grok.parsers.each do |parser| + @grok.parsers.each do |name_or_index, parser| parser.parse(text) do |time, record| if time and record + record[@grok_name_key] = name_or_index if @grok_name_key yield time, record return end From 901249aeda6274752b42aaa73afa2cb5d45d69bc Mon Sep 17 00:00:00 2001 From: Kenji Okimoto Date: Fri, 10 Aug 2018 12:31:06 +0900 Subject: [PATCH 2/5] Add test for grok_name_key Signed-off-by: Kenji Okimoto --- test/test_grok_parser.rb | 110 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) diff --git a/test/test_grok_parser.rb b/test/test_grok_parser.rb index 16243bb..2d80e5b 100644 --- a/test/test_grok_parser.rb +++ b/test/test_grok_parser.rb @@ -191,6 +191,116 @@ class GrokParserTest < ::Test::Unit::TestCase assert_equal("unknown value conversion for key:'path', type:'foo'", error_message) end + sub_test_case "grok_name_key" do + test "one grok section with name" do + d = create_driver(%[ + grok_name_key grok_name + + name path + pattern %{PATH:path} + + ]) + expected = { + "path" => "/", + "grok_name" => "path" + } + d.instance.parse("/") do |time, record| + assert_equal(expected, record) + end + end + + test "one grok section without name" do + d = create_driver(%[ + grok_name_key grok_name + + pattern %{PATH:path} + + ]) + expected = { + "path" => "/", + "grok_name" => 0 + } + d.instance.parse("/") do |time, record| + assert_equal(expected, record) + end + end + + test "multiple grok sections with name" do + d = create_driver(%[ + grok_name_key grok_name + + name path + pattern %{PATH:path} + + + name ip + pattern %{IP:ip_address} + + ]) + expected = [ + { "path" => "/", "grok_name" => "path" }, + { "ip_address" => "127.0.0.1", "grok_name" => "ip" }, + ] + records = [] + d.instance.parse("/") do |time, record| + records << record + end + d.instance.parse("127.0.0.1") do |time, record| + records << record + end + assert_equal(expected, records) + end + + test "multiple grok sections without name" do + d = create_driver(%[ + grok_name_key grok_name + + pattern %{PATH:path} + + + pattern %{IP:ip_address} + + ]) + expected = [ + { "path" => "/", "grok_name" => 0 }, + { "ip_address" => "127.0.0.1", "grok_name" => 1 }, + ] + records = [] + d.instance.parse("/") do |time, record| + records << record + end + d.instance.parse("127.0.0.1") do |time, record| + records << record + end + assert_equal(expected, records) + end + + test "multiple grok sections with both name and index" do + d = create_driver(%[ + grok_name_key grok_name + + name path + pattern %{PATH:path} + + + pattern %{IP:ip_address} + + ]) + expected = [ + { "path" => "/", "grok_name" => "path" }, + { "ip_address" => "127.0.0.1", "grok_name" => 1 }, + ] + records = [] + d.instance.parse("/") do |time, record| + records << record + end + d.instance.parse("127.0.0.1") do |time, record| + records << record + end + assert_equal(expected, records) + end + end + private def create_driver(conf) From 29c5d29a69b2f8ab6b1387d2b27c31b60388ecce Mon Sep 17 00:00:00 2001 From: Kenji Okimoto Date: Fri, 10 Aug 2018 12:50:24 +0900 Subject: [PATCH 3/5] Update README.md Signed-off-by: Kenji Okimoto --- README.md | 63 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index adc352d..2998794 100644 --- a/README.md +++ b/README.md @@ -95,21 +95,19 @@ You can use this parser without `multiline_start_regexp` when you know your data ## Configurations -**time_format** +* See also: [TimeParameters Plugin Overview](https://docs.fluentd.org/v1.0/articles/timeparameters-plugin-overview) +* See also: [Parser Plugin Overview](https://docs.fluentd.org/v1.0/articles/parser-plugin-overview) -The format of the time field. +* **time_format** (string) (optional): The format of the time field. +* **grok_pattern** (string) (optional): The pattern of grok. You cannot specify multiple grok pattern with this. +* **custom_pattern_path** (string) (optional): Path to the file that includes custom grok patterns +* **grok_failure_key** (string) (optional): The key has grok failure reason. +* **grok_name_key** (string) (optional): The key name to store grok section's name +* **multi_line_start_regexp** (string) (optional): The regexp to match beginning of multiline. This is only for "multiline_grok". -**grok_pattern** +## Examples -The pattern of grok. You cannot specify multiple grok pattern with this. - -**custom_pattern_path** - -Path to the file that includes custom grok patterns - -**grok_failure_key** - -The key has grok failure reason. Default is `nil`. +### Using grok\_failure\_key ```aconf @@ -149,21 +147,42 @@ This generates following events: 2016-11-28 13:07:09.010400923 +0900 dummy.log: {"message1":"/","prog":"bar","path":"/"} ``` - -**grok/pattern** - -Section for grok patterns. You can use multiple grok patterns with -multiple `` sections. +### Using grok\_name\_key ```aconf - - pattern %{IP:ipaddress} - + + @type tail + path /path/to/log + tag grokked_log + grok_name_key grok_name + grok_failure_key grokfailure + + @type grok + + name apache_log + pattern %{COMBINEDAPACHELOG} + time_format "%d/%b/%Y:%H:%M:%S %z" + + + name ip_address + pattern %{IP:ip_address} + + + name rest_message + pattern %{GREEDYDATA:message} + + + ``` -**multiline_start_regexp** +This will add keys like following: + +* Add `grok_name: "apache_log"` if the record matches `COMBINEDAPACHELOG` +* Add `grok_name: "ip_address"` if the record matches `IP` +* Add `grok_name: "rest_message"` if the record matches `GREEDYDATA` -The regexp to match beginning of multiline. This is only for "multiline_grok". +Add `grokfailure` key to the record if the record does not match any grok pattern. +See also test code for more details. ## How to write Grok patterns From 75a3b3ac52091350e768d1249c60e378efa3d0cc Mon Sep 17 00:00:00 2001 From: Kenji Okimoto Date: Fri, 10 Aug 2018 12:53:44 +0900 Subject: [PATCH 4/5] Use Hash#reject! instead of Hash#compact! Because Hash#compact! has been added since Ruby 2.4.0 Signed-off-by: Kenji Okimoto --- lib/fluent/plugin/grok.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/fluent/plugin/grok.rb b/lib/fluent/plugin/grok.rb index e02ad25..eef8277 100644 --- a/lib/fluent/plugin/grok.rb +++ b/lib/fluent/plugin/grok.rb @@ -51,7 +51,9 @@ def setup @parsers[grok_conf.name || index] = expand_pattern_expression(grok_conf.pattern, grok_conf) end end - @parsers.compact! + @parsers.reject! do |key, parser| + parser.nil? + end if @parsers.empty? raise Fluent::ConfigError, 'no grok patterns. Check configuration, e.g. typo, configuration syntax, etc' end From ffec54c15012778ad4225b0af5c1eb6897d1d13a Mon Sep 17 00:00:00 2001 From: Kenji Okimoto Date: Fri, 10 Aug 2018 12:56:24 +0900 Subject: [PATCH 5/5] travis: Organize Ruby versions Signed-off-by: Kenji Okimoto --- .travis.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 00147ee..a4ed7d7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,8 +2,8 @@ sudo: false language: ruby rvm: - - 2.1 - - 2.2 - - 2.3.3 - - 2.4.0 + - 2.2.10 + - 2.3.7 + - 2.4.4 + - 2.5.1