Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ extracts the first IP address that matches in the log.
<parse>
@type grok
<grok>
pattern %{COMBINEDAPACHELOG}
pattern %{HTTPD_COMBINEDLOG}
time_format "%d/%b/%Y:%H:%M:%S %z"
</grok>
<grok>
Expand Down Expand Up @@ -106,6 +106,8 @@ You can use this parser without `multiline_start_regexp` when you know your data
* **grok_failure_key** (string) (optional): The key has grok failure reason.
* **grok_name_key** (string) (optional): The key name to store grok section's name
* **multi_line_start_regexp** (string) (optional): The regexp to match beginning of multiline. This is only for "multiline_grok".
* **grok_pattern_series** (enum) (optional): Specify grok pattern series set.
* Default value: `legacy`.

### \<grok\> section (optional) (multiple)

Expand Down Expand Up @@ -173,7 +175,7 @@ This generates following events:
grok_failure_key grokfailure
<grok>
name apache_log
pattern %{COMBINEDAPACHELOG}
pattern %{HTTPD_COMBINEDLOG}
time_format "%d/%b/%Y:%H:%M:%S %z"
</grok>
<grok>
Expand All @@ -190,7 +192,7 @@ This generates following events:

This will add keys like following:

* Add `grok_name: "apache_log"` if the record matches `COMBINEDAPACHELOG`
* Add `grok_name: "apache_log"` if the record matches `HTTPD_COMBINEDLOG`
* Add `grok_name: "ip_address"` if the record matches `IP`
* Add `grok_name: "rest_message"` if the record matches `GREEDYDATA`

Expand Down
47 changes: 25 additions & 22 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@ end

desc "Import patterns from submodules"
task "patterns:import" do
`git submodule --quiet foreach pwd`.split($\).each do |submodule_path|
Dir.glob(File.join(submodule_path, "patterns/*")) do |pattern|
cp(pattern, "patterns/", verbose: true)
["legacy", "ecs-v1"].each do |series|
`git submodule --quiet foreach pwd`.split($\).each do |submodule_path|
Dir.glob(File.join(submodule_path, "patterns/#{series}/*")) do |pattern|
cp(pattern, "patterns/#{series}", verbose: true)
end
end
end

Expand All @@ -32,29 +34,30 @@ task "patterns:import" do
array(?::.)?)))?)?
)
\}/x

Dir.glob("patterns/*") do |pattern_file|
new_lines = ""
File.readlines(pattern_file).each do |line|
case
when line.strip.empty?
new_lines << line
when line.start_with?("#")
new_lines << line
else
name, pattern = line.split(/\s+/, 2)
new_pattern = pattern.gsub(pattern_re) do |m|
matched = $~
if matched[:type] == "int"
"%{#{matched[:pattern]}:#{matched[:subname]}:integer}"
else
m
["legacy", "ecs-v1"].each do |series|
Dir.glob("patterns/#{series}/*") do |pattern_file|
new_lines = ""
File.readlines(pattern_file).each do |line|
case
when line.strip.empty?
new_lines << line
when line.start_with?("#")
new_lines << line
else
name, pattern = line.split(/\s+/, 2)
new_pattern = pattern.gsub(pattern_re) do |m|
matched = $~
if matched[:type] == "int"
"%{#{matched[:pattern]}:#{matched[:subname]}:integer}"
else
m
end
end
new_lines << "#{name} #{new_pattern}"
end
new_lines << "#{name} #{new_pattern}"
end
File.write(pattern_file, new_lines)
end
File.write(pattern_file, new_lines)
end
end

Expand Down
11 changes: 9 additions & 2 deletions lib/fluent/plugin/grok.rb
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,15 @@ def expand_pattern(pattern)
curr_pattern = @pattern_map[m["pattern"]]
raise GrokPatternNotFoundError, "grok pattern not found: #{pattern}" unless curr_pattern
if m["subname"]
replacement_pattern = "(?<#{m["subname"]}>#{curr_pattern})"
type_map[m["subname"]] = m["type"] || "string"
ecs = /(?<ecs-key>(^\[.*\]$))/.match(m["subname"])
subname = if ecs
# remove starting "[" and trailing "]" on matched data
ecs["ecs-key"][1..-2].split("][").join('.')
else
m["subname"]
end
replacement_pattern = "(?<#{subname}>#{curr_pattern})"
type_map[subname] = m["type"] || "string"
else
replacement_pattern = "(?:#{curr_pattern})"
end
Expand Down
4 changes: 3 additions & 1 deletion lib/fluent/plugin/parser_grok.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ class GrokParser < Parser
config_param :grok_failure_key, :string, default: nil
desc "The key name to store grok section's name"
config_param :grok_name_key, :string, default: nil
desc "Specify grok pattern series set"
config_param :grok_pattern_series, :enum, list: [:legacy, :"ecs-v1"], default: :legacy

config_section :grok, param_name: "grok_confs", multi: true do
desc "The name of this grok section"
Expand All @@ -42,7 +44,7 @@ def configure(conf={})

@grok = Grok.new(self, conf)

default_pattern_dir = File.expand_path("../../../../patterns/*", __FILE__)
default_pattern_dir = File.expand_path("../../../../patterns/#{@grok_pattern_series}/*", __FILE__)
Dir.glob(default_pattern_dir) do |pattern_file_path|
@grok.add_patterns_from_file(pattern_file_path)
end
Expand Down
3 changes: 0 additions & 3 deletions patterns/bind

This file was deleted.

28 changes: 28 additions & 0 deletions patterns/ecs-v1/aws
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
S3_REQUEST_LINE (?:%{WORD:[http][request][method]} %{NOTSPACE:[url][original]}(?: HTTP/%{NUMBER:[http][version]})?)

S3_ACCESS_LOG %{WORD:[aws][s3access][bucket_owner]} %{NOTSPACE:[aws][s3access][bucket]} \[%{HTTPDATE:timestamp}\] (?:-|%{IP:[client][ip]}) (?:-|%{NOTSPACE:[client][user][id]}) %{NOTSPACE:[aws][s3access][request_id]} %{NOTSPACE:[aws][s3access][operation]} (?:-|%{NOTSPACE:[aws][s3access][key]}) (?:-|"%{S3_REQUEST_LINE:[aws][s3access][request_uri]}") (?:-|%{INT:[http][response][status_code]:integer}) (?:-|%{NOTSPACE:[aws][s3access][error_code]}) (?:-|%{INT:[aws][s3access][bytes_sent]:integer}) (?:-|%{INT:[aws][s3access][object_size]:integer}) (?:-|%{INT:[aws][s3access][total_time]:integer}) (?:-|%{INT:[aws][s3access][turn_around_time]:integer}) "(?:-|%{DATA:[http][request][referrer]})" "(?:-|%{DATA:[user_agent][original]})" (?:-|%{NOTSPACE:[aws][s3access][version_id]})(?: (?:-|%{NOTSPACE:[aws][s3access][host_id]}) (?:-|%{NOTSPACE:[aws][s3access][signature_version]}) (?:-|%{NOTSPACE:[tls][cipher]}) (?:-|%{NOTSPACE:[aws][s3access][authentication_type]}) (?:-|%{NOTSPACE:[aws][s3access][host_header]}) (?:-|%{NOTSPACE:[aws][s3access][tls_version]}))?
# :long - %{INT:[aws][s3access][bytes_sent]:int}
# :long - %{INT:[aws][s3access][object_size]:int}

ELB_URIHOST %{IPORHOST:[url][domain]}(?::%{POSINT:[url][port]:integer})?
ELB_URIPATHQUERY %{URIPATH:[url][path]}(?:\?%{URIQUERY:[url][query]})?
# deprecated - old name:
ELB_URIPATHPARAM %{ELB_URIPATHQUERY}
ELB_URI %{URIPROTO:[url][scheme]}://(?:%{USER:[url][username]}(?::[^@]*)?@)?(?:%{ELB_URIHOST})?(?:%{ELB_URIPATHQUERY})?

ELB_REQUEST_LINE (?:%{WORD:[http][request][method]} %{ELB_URI:[url][original]}(?: HTTP/%{NUMBER:[http][version]})?)

# pattern supports 'regular' HTTP ELB format
ELB_V1_HTTP_LOG %{TIMESTAMP_ISO8601:timestamp} %{NOTSPACE:[aws][elb][name]} %{IP:[source][ip]}:%{INT:[source][port]:integer} (?:-|(?:%{IP:[aws][elb][backend][ip]}:%{INT:[aws][elb][backend][port]:integer})) (?:-1|%{NUMBER:[aws][elb][request_processing_time][sec]:float}) (?:-1|%{NUMBER:[aws][elb][backend_processing_time][sec]:float}) (?:-1|%{NUMBER:[aws][elb][response_processing_time][sec]:float}) %{INT:[http][response][status_code]:integer} (?:-|%{INT:[aws][elb][backend][http][response][status_code]:integer}) %{INT:[http][request][body][bytes]:integer} %{INT:[http][response][body][bytes]:integer} "%{ELB_REQUEST_LINE}"(?: "(?:-|%{DATA:[user_agent][original]})" (?:-|%{NOTSPACE:[tls][cipher]}) (?:-|%{NOTSPACE:[aws][elb][ssl_protocol]}))?
# :long - %{INT:[http][request][body][bytes]:int}
# :long - %{INT:[http][response][body][bytes]:int}

ELB_ACCESS_LOG %{ELB_V1_HTTP_LOG}

# pattern used to match a shorted format, that's why we have the optional part (starting with *http.version*) at the end
CLOUDFRONT_ACCESS_LOG (?<timestamp>%{YEAR}-%{MONTHNUM}-%{MONTHDAY}\t%{TIME})\t%{WORD:[aws][cloudfront][x_edge_location]}\t(?:-|%{INT:[destination][bytes]:integer})\t%{IPORHOST:[source][ip]}\t%{WORD:[http][request][method]}\t%{HOSTNAME:[url][domain]}\t%{NOTSPACE:[url][path]}\t(?:(?:000)|%{INT:[http][response][status_code]:integer})\t(?:-|%{DATA:[http][request][referrer]})\t%{DATA:[user_agent][original]}\t(?:-|%{DATA:[url][query]})\t(?:-|%{DATA:[aws][cloudfront][http][request][cookie]})\t%{WORD:[aws][cloudfront][x_edge_result_type]}\t%{NOTSPACE:[aws][cloudfront][x_edge_request_id]}\t%{HOSTNAME:[aws][cloudfront][http][request][host]}\t%{URIPROTO:[network][protocol]}\t(?:-|%{INT:[source][bytes]:integer})\t%{NUMBER:[aws][cloudfront][time_taken]:float}\t(?:-|%{IP:[network][forwarded_ip]})\t(?:-|%{DATA:[aws][cloudfront][ssl_protocol]})\t(?:-|%{NOTSPACE:[tls][cipher]})\t%{WORD:[aws][cloudfront][x_edge_response_result_type]}(?:\t(?:-|HTTP/%{NUMBER:[http][version]})\t(?:-|%{DATA:[aws][cloudfront][fle_status]})\t(?:-|%{DATA:[aws][cloudfront][fle_encrypted_fields]})\t%{INT:[source][port]:integer}\t%{NUMBER:[aws][cloudfront][time_to_first_byte]:float}\t(?:-|%{DATA:[aws][cloudfront][x_edge_detailed_result_type]})\t(?:-|%{NOTSPACE:[http][request][mime_type]})\t(?:-|%{INT:[aws][cloudfront][http][request][size]:integer})\t(?:-|%{INT:[aws][cloudfront][http][request][range][start]:integer})\t(?:-|%{INT:[aws][cloudfront][http][request][range][end]:integer}))?
# :long - %{INT:[destination][bytes]:int}
# :long - %{INT:[source][bytes]:int}
# :long - %{INT:[aws][cloudfront][http][request][size]:int}
# :long - %{INT:[aws][cloudfront][http][request][range][start]:int}
# :long - %{INT:[aws][cloudfront][http][request][range][end]:int}
53 changes: 53 additions & 0 deletions patterns/ecs-v1/bacula
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
BACULA_TIMESTAMP %{MONTHDAY}-%{MONTH}(?:-%{YEAR})? %{HOUR}:%{MINUTE}
BACULA_HOST %{HOSTNAME}
BACULA_VOLUME %{USER}
BACULA_DEVICE %{USER}
BACULA_DEVICEPATH %{UNIXPATH}
BACULA_CAPACITY %{INT}{1,3}(,%{INT}{3})*
BACULA_VERSION %{USER}
BACULA_JOB %{USER}

BACULA_LOG_MAX_CAPACITY User defined maximum volume capacity %{BACULA_CAPACITY:[bacula][volume][max_capacity]} exceeded on device \"%{BACULA_DEVICE:[bacula][volume][device]}\" \(%{BACULA_DEVICEPATH:[bacula][volume][path]}\).?
BACULA_LOG_END_VOLUME End of medium on Volume \"%{BACULA_VOLUME:[bacula][volume][name]}\" Bytes=%{BACULA_CAPACITY:[bacula][volume][bytes]} Blocks=%{BACULA_CAPACITY:[bacula][volume][blocks]} at %{BACULA_TIMESTAMP:[bacula][timestamp]}.
BACULA_LOG_NEW_VOLUME Created new Volume \"%{BACULA_VOLUME:[bacula][volume][name]}\" in catalog.
BACULA_LOG_NEW_LABEL Labeled new Volume \"%{BACULA_VOLUME:[bacula][volume][name]}\" on (?:file )?device \"%{BACULA_DEVICE:[bacula][volume][device]}\" \(%{BACULA_DEVICEPATH:[bacula][volume][path]}\).
BACULA_LOG_WROTE_LABEL Wrote label to prelabeled Volume \"%{BACULA_VOLUME:[bacula][volume][name]}\" on device \"%{BACULA_DEVICE:[bacula][volume][device]}\" \(%{BACULA_DEVICEPATH:[bacula][volume][path]}\)
BACULA_LOG_NEW_MOUNT New volume \"%{BACULA_VOLUME:[bacula][volume][name]}\" mounted on device \"%{BACULA_DEVICE:[bacula][volume][device]}\" \(%{BACULA_DEVICEPATH:[bacula][volume][path]}\) at %{BACULA_TIMESTAMP:[bacula][timestamp]}.
BACULA_LOG_NOOPEN \s*Cannot open %{DATA}: ERR=%{GREEDYDATA:[error][message]}
BACULA_LOG_NOOPENDIR \s*Could not open directory \"?%{DATA:[file][path]}\"?: ERR=%{GREEDYDATA:[error][message]}
BACULA_LOG_NOSTAT \s*Could not stat %{DATA:[file][path]}: ERR=%{GREEDYDATA:[error][message]}
BACULA_LOG_NOJOBS There are no more Jobs associated with Volume \"%{BACULA_VOLUME:[bacula][volume][name]}\". Marking it purged.
BACULA_LOG_ALL_RECORDS_PRUNED .*?All records pruned from Volume \"%{BACULA_VOLUME:[bacula][volume][name]}\"; marking it \"Purged\"
BACULA_LOG_BEGIN_PRUNE_JOBS Begin pruning Jobs older than %{INT} month %{INT} days .
BACULA_LOG_BEGIN_PRUNE_FILES Begin pruning Files.
BACULA_LOG_PRUNED_JOBS Pruned %{INT} Jobs* for client %{BACULA_HOST:[bacula][client][name]} from catalog.
BACULA_LOG_PRUNED_FILES Pruned Files from %{INT} Jobs* for client %{BACULA_HOST:[bacula][client][name]} from catalog.
BACULA_LOG_ENDPRUNE End auto prune.
BACULA_LOG_STARTJOB Start Backup JobId %{INT}, Job=%{BACULA_JOB:[bacula][job][name]}
BACULA_LOG_STARTRESTORE Start Restore Job %{BACULA_JOB:[bacula][job][name]}
BACULA_LOG_USEDEVICE Using Device \"%{BACULA_DEVICE:[bacula][volume][device]}\"
BACULA_LOG_DIFF_FS \s*%{UNIXPATH} is a different filesystem. Will not descend from %{UNIXPATH} into it.
BACULA_LOG_JOBEND Job write elapsed time = %{DATA:[bacula][job][elapsed_time]}, Transfer rate = %{NUMBER} (K|M|G)? Bytes/second
BACULA_LOG_NOPRUNE_JOBS No Jobs found to prune.
BACULA_LOG_NOPRUNE_FILES No Files found to prune.
BACULA_LOG_VOLUME_PREVWRITTEN Volume \"?%{BACULA_VOLUME:[bacula][volume][name]}\"? previously written, moving to end of data.
BACULA_LOG_READYAPPEND Ready to append to end of Volume \"%{BACULA_VOLUME:[bacula][volume][name]}\" size=%{INT:[bacula][volume][size]:integer}
# :long - %{INT:[bacula][volume][size]:int}
BACULA_LOG_CANCELLING Cancelling duplicate JobId=%{INT:[bacula][job][other_id]}.
BACULA_LOG_MARKCANCEL JobId %{INT:[bacula][job][id]}, Job %{BACULA_JOB:[bacula][job][name]} marked to be canceled.
BACULA_LOG_CLIENT_RBJ shell command: run ClientRunBeforeJob \"%{GREEDYDATA:[bacula][job][client_run_before_command]}\"
BACULA_LOG_VSS (Generate )?VSS (Writer)?
BACULA_LOG_MAXSTART Fatal [eE]rror: Job canceled because max start delay time exceeded.
BACULA_LOG_DUPLICATE Fatal [eE]rror: JobId %{INT:[bacula][job][other_id]} already running. Duplicate job not allowed.
BACULA_LOG_NOJOBSTAT Fatal [eE]rror: No Job status returned from FD.
BACULA_LOG_FATAL_CONN Fatal [eE]rror: bsock.c:133 Unable to connect to (Client: %{BACULA_HOST:[bacula][client][name]}|Storage daemon) on %{IPORHOST:[client][address]}:%{POSINT:[client][port]:integer}. ERR=%{GREEDYDATA:[error][message]}
BACULA_LOG_NO_CONNECT Warning: bsock.c:127 Could not connect to (Client: %{BACULA_HOST:[bacula][client][name]}|Storage daemon) on %{IPORHOST:[client][address]}:%{POSINT:[client][port]:integer}. ERR=%{GREEDYDATA:[error][message]}
BACULA_LOG_NO_AUTH Fatal error: Unable to authenticate with File daemon at \"?%{IPORHOST:[client][address]}(?::%{POSINT:[client][port]:integer})?\"?. Possible causes:
BACULA_LOG_NOSUIT No prior or suitable Full backup found in catalog. Doing FULL backup.
BACULA_LOG_NOPRIOR No prior Full backup Job record found.

BACULA_LOG_JOB (Error: )?Bacula %{BACULA_HOST} %{BACULA_VERSION} \(%{BACULA_VERSION}\):

BACULA_LOG %{BACULA_TIMESTAMP:timestamp} %{BACULA_HOST:[host][hostname]}(?: JobId %{INT:[bacula][job][id]})?:? (%{BACULA_LOG_MAX_CAPACITY}|%{BACULA_LOG_END_VOLUME}|%{BACULA_LOG_NEW_VOLUME}|%{BACULA_LOG_NEW_LABEL}|%{BACULA_LOG_WROTE_LABEL}|%{BACULA_LOG_NEW_MOUNT}|%{BACULA_LOG_NOOPEN}|%{BACULA_LOG_NOOPENDIR}|%{BACULA_LOG_NOSTAT}|%{BACULA_LOG_NOJOBS}|%{BACULA_LOG_ALL_RECORDS_PRUNED}|%{BACULA_LOG_BEGIN_PRUNE_JOBS}|%{BACULA_LOG_BEGIN_PRUNE_FILES}|%{BACULA_LOG_PRUNED_JOBS}|%{BACULA_LOG_PRUNED_FILES}|%{BACULA_LOG_ENDPRUNE}|%{BACULA_LOG_STARTJOB}|%{BACULA_LOG_STARTRESTORE}|%{BACULA_LOG_USEDEVICE}|%{BACULA_LOG_DIFF_FS}|%{BACULA_LOG_JOBEND}|%{BACULA_LOG_NOPRUNE_JOBS}|%{BACULA_LOG_NOPRUNE_FILES}|%{BACULA_LOG_VOLUME_PREVWRITTEN}|%{BACULA_LOG_READYAPPEND}|%{BACULA_LOG_CANCELLING}|%{BACULA_LOG_MARKCANCEL}|%{BACULA_LOG_CLIENT_RBJ}|%{BACULA_LOG_VSS}|%{BACULA_LOG_MAXSTART}|%{BACULA_LOG_DUPLICATE}|%{BACULA_LOG_NOJOBSTAT}|%{BACULA_LOG_FATAL_CONN}|%{BACULA_LOG_NO_CONNECT}|%{BACULA_LOG_NO_AUTH}|%{BACULA_LOG_NOSUIT}|%{BACULA_LOG_JOB}|%{BACULA_LOG_NOPRIOR})
# old (deprecated) name :
BACULA_LOGLINE %{BACULA_LOG}
13 changes: 13 additions & 0 deletions patterns/ecs-v1/bind
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
BIND9_TIMESTAMP %{MONTHDAY}[-]%{MONTH}[-]%{YEAR} %{TIME}

BIND9_DNSTYPE (?:A|AAAA|CAA|CDNSKEY|CDS|CERT|CNAME|CSYNC|DLV|DNAME|DNSKEY|DS|HINFO|LOC|MX|NAPTR|NS|NSEC|NSEC3|OPENPGPKEY|PTR|RRSIG|RP|SIG|SMIMEA|SOA|SRV|TSIG|TXT|URI)
BIND9_CATEGORY (?:queries)

# dns.question.class is static - only 'IN' is supported by Bind9
# bind.log.question.name is expected to be a 'duplicate' (same as the dns.question.name capture)
BIND9_QUERYLOGBASE client(:? @0x(?:[0-9A-Fa-f]+))? %{IP:[client][ip]}#%{POSINT:[client][port]:integer} \(%{GREEDYDATA:[bind][log][question][name]}\): query: %{GREEDYDATA:[dns][question][name]} (?<[dns][question][class]>IN) %{BIND9_DNSTYPE:[dns][question][type]}(:? %{DATA:[bind][log][question][flags]})? \(%{IP:[server][ip]}\)

# for query-logging category and severity are always fixed as "queries: info: "
BIND9_QUERYLOG %{BIND9_TIMESTAMP:timestamp} %{BIND9_CATEGORY:[bing][log][category]}: %{LOGLEVEL:[log][level]}: %{BIND9_QUERYLOGBASE}

BIND9 %{BIND9_QUERYLOG}
Loading