Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

- split the ruby grok bits into a separate repo

  • Loading branch information...
commit f8892b5067a354897ccf3e9297943959223bcb6e 1 parent a615285
Jordan Sissel authored
Showing with 42,734 additions and 0 deletions.
  1. +10 −0 Rakefile
  2. +131 −0 examples/grok-web.rb
  3. +39 −0 examples/pattern-discovery.rb
  4. +30 −0 examples/test.rb
  5. +25 −0 grok.gemspec
  6. +3 −0  lib/Grok.rb
  7. +145 −0 lib/grok-pure.rb
  8. +133 −0 lib/grok.rb
  9. +80 −0 lib/grok/c-ext/match.rb
  10. +56 −0 lib/grok/c-ext/pile.rb
  11. +45 −0 lib/grok/pure/match.rb
  12. +71 −0 lib/grok/pure/pile.rb
  13. +90 −0 patterns/pure-ruby/base
  14. +3 −0  patterns/pure-ruby/java
  15. +3 −0  patterns/pure-ruby/ruby
  16. +32 −0 sample.rb
  17. +19 −0 test/Makefile
  18. +17 −0 test/ffi-ruby/Makefile
  19. +8 −0 test/ffi-ruby/alltests.rb
  20. +58 −0 test/ffi-ruby/general/basic_test.rb
  21. +105 −0 test/ffi-ruby/general/captures_test.rb
  22. +23 −0 test/ffi-ruby/patterns/day.rb
  23. +31 −0 test/ffi-ruby/patterns/host.rb
  24. +10,000 −0 test/ffi-ruby/patterns/ip.input
  25. +32 −0 test/ffi-ruby/patterns/ip.rb
  26. +69 −0 test/ffi-ruby/patterns/iso8601.rb
  27. +25 −0 test/ffi-ruby/patterns/month.rb
  28. +70 −0 test/ffi-ruby/patterns/number.rb
  29. +32 −0 test/ffi-ruby/patterns/path.rb
  30. +21 −0 test/ffi-ruby/patterns/prog.rb
  31. +54 −0 test/ffi-ruby/patterns/quotedstring.rb
  32. +46 −0 test/ffi-ruby/patterns/uri.rb
  33. +65 −0 test/ffi-ruby/regression/grokmatch-subject-garbagecollected-early.rb
  34. +3 −0  test/ffi-ruby/run.sh
  35. +48 −0 test/ffi-ruby/speedtest.rb
  36. +17 −0 test/pure-ruby/Makefile
  37. +9 −0 test/pure-ruby/alltests.rb
  38. +58 −0 test/pure-ruby/general/basic_test.rb
  39. +118 −0 test/pure-ruby/general/captures_test.rb
  40. +20,465 −0 test/pure-ruby/logfile
  41. +23 −0 test/pure-ruby/patterns/day.rb
  42. +22 −0 test/pure-ruby/patterns/host.rb
  43. +10,000 −0 test/pure-ruby/patterns/ip.input
  44. +32 −0 test/pure-ruby/patterns/ip.rb
  45. +69 −0 test/pure-ruby/patterns/iso8601.rb
  46. +25 −0 test/pure-ruby/patterns/month.rb
  47. +70 −0 test/pure-ruby/patterns/number.rb
  48. +32 −0 test/pure-ruby/patterns/path.rb
  49. +21 −0 test/pure-ruby/patterns/prog.rb
  50. +54 −0 test/pure-ruby/patterns/quotedstring.rb
  51. +46 −0 test/pure-ruby/patterns/uri.rb
  52. +3 −0  test/pure-ruby/run.sh
  53. +48 −0 test/pure-ruby/speedtest.rb
10 Rakefile
View
@@ -0,0 +1,10 @@
+task :default => [:package]
+
+task :package do
+ system("gem build grok.gemspec")
+end
+
+task :publish do
+ latest_gem = %x{ls -t jls-grok*.gem}.split("\n").first
+ system("gem push #{latest_gem}")
+end
131 examples/grok-web.rb
View
@@ -0,0 +1,131 @@
+#!/usr/bin/env ruby
+#
+# Simple web application that will let you feed grok's discovery feature
+# a bunch of data, and grok will show you patterns found and the results
+# of that pattern as matched against the same input.
+
+require 'rubygems'
+require 'sinatra'
+require 'grok'
+
+get '/' do
+ redirect "/demo/grok-discover/index"
+end
+
+get "/demo/grok-discover/index" do
+ haml :index
+end
+
+post "/demo/grok-discover/grok" do
+ grok = Grok.new
+ grok.add_patterns_from_file("/usr/local/share/grok/patterns/base")
+ @results = []
+ params[:data].split("\n").each do |line|
+ pattern = grok.discover(line)
+ grok.compile(pattern)
+ match = grok.match(line)
+ puts "Got input: #{line}"
+ puts " => pattern: (#{match != false}) #{pattern}"
+ @results << {
+ :input => line,
+ :pattern => grok.pattern.gsub(/\\Q|\\E/, ""),
+ :full_pattern => grok.expanded_pattern,
+ :match => (match and match.captures or false),
+ }
+ end
+ haml :grok
+end
+
+get "/demo/grok-discover/style.css" do
+ sass :style
+end
+
+__END__
+@@ style
+h1
+ color: red
+.original
+.regexp
+ display: block
+ border: 1px solid grey
+ padding: 1em
+
+.results
+ width: 80%
+ margin-left: auto
+ th
+ text-align: left
+ td
+ border-top: 1px solid black
+@@ layout
+%html
+ %head
+ %title Grok Web
+ %link{:rel => "stylesheet", :href => "/demo/grok-discover/style.css"}
+ %body
+ =yield
+
+@@ index
+#header
+ %h1 Grok Web
+#content
+ Paste some log data below. I'll do my best to have grok generate a pattern for you.
+
+ %p
+ Learn more about grok here:
+ %a{:href => "http://code.google.com/p/semicomplete/wiki/Grok"} Grok
+
+ %p
+ This is running off of my cable modem for now, so if it's sluggish, that's
+ why. Be gentle.
+ %form{:action => "/demo/grok-discover/grok", :method => "post"}
+ %textarea{:name => "data", :rows => 10, :cols => 80}
+ %br
+ %input{:type => "submit", :value=>"submit"}
+
+@@ grok
+#header
+ %h1 Grok Results
+ %h3
+ %a{:href => "/demo/grok-discover/index"} Try more?
+#content
+ %p
+ Below is grok's analysis of the data you provided. Each line is analyzed
+ separately. It uses grok's standard library of known patterns to give you a
+ pattern that grok can use to match more logs like the lines you provided.
+ %p
+ The results may not be perfect, but it gives you a head start on coming up with
+ log patterns for
+ %a{:href => "http://code.google.com/p/semicomplete/wiki/Grok"} grok
+ and
+ %a{:href => "http://code.google.com/p/logstash/"} logstash
+ %ol
+ - @results.each do |result|
+ %li
+ %p.original
+ %b Original:
+ %br= result[:input]
+ %p
+ %b Pattern:
+ %br
+ %span.pattern= result[:pattern]
+ %p
+ %b
+ Generated Regular Expression
+ %small
+ %i You could have written this by hand, be glad you didn't have to.
+ %code.regexp= result[:full_pattern].gsub("<", "&lt;")
+ %p
+ If you wanted to test this, you can paste the above expression into
+ pcretest(1) and it should match your input.
+ %p
+ %b Capture Results
+ %table.results
+ %tr
+ %th Name
+ %th Value
+ - result[:match].each do |key,val|
+ - val.each do |v|
+ %tr
+ %td= key
+ %td= v
39 examples/pattern-discovery.rb
View
@@ -0,0 +1,39 @@
+#!/usr/bin/env ruby
+#
+
+require "rubygems"
+require "grok"
+require "pp"
+
+grok = Grok.new
+
+# Load some default patterns that ship with grok.
+# See also:
+# http://code.google.com/p/semicomplete/source/browse/grok/patterns/base
+grok.add_patterns_from_file("/usr/local/share/grok/patterns/base")
+
+# Using the patterns we know, try to build a grok pattern that best matches
+# a string we give. Let's try Time.now.to_s, which has this format;
+# => Fri Apr 16 19:15:27 -0700 2010
+input = "Time is #{Time.now}"
+pattern = grok.discover(input)
+
+puts "Input: #{input}"
+puts "Pattern: #{pattern}"
+grok.compile(pattern)
+
+# Sleep to change time.
+puts "Sleeping so time changes and we can test against another input."
+sleep(2)
+match = grok.match("Time is #{Time.now.to_s}")
+puts "Resulting capture:"
+pp match.captures
+
+# When run, the output should look something like this:
+# % ruby pattern-discovery.rb
+# Pattern: Time is Fri %{SYSLOGDATE} %{BASE10NUM} 2010
+# {"BASE10NUM"=>["-0700"],
+# "SYSLOGDATE"=>["Apr 16 19:17:38"],
+# "TIME"=>["19:17:38"],
+# "MONTH"=>["Apr"],
+# "MONTHDAY"=>["16"]}
30 examples/test.rb
View
@@ -0,0 +1,30 @@
+#!/usr/bin/env ruby
+#
+
+require "rubygems"
+require "grok"
+require "pp"
+
+grok = Grok.new
+
+# Load some default patterns that ship with grok.
+# See also:
+# http://code.google.com/p/semicomplete/source/browse/grok/patterns/base
+grok.add_patterns_from_file("../..//patterns/base")
+
+# Using the patterns we know, try to build a grok pattern that best matches
+# a string we give. Let's try Time.now.to_s, which has this format;
+# => Fri Apr 16 19:15:27 -0700 2010
+input = "2010-04-18T15:06:02Z"
+pattern = "%{TIMESTAMP_ISO8601}"
+grok.compile(pattern)
+grok.compile(pattern)
+puts "Input: #{input}"
+puts "Pattern: #{pattern}"
+puts "Full: #{grok.expanded_pattern}"
+
+match = grok.match(input)
+if match
+ puts "Resulting capture:"
+ pp match.captures
+end
25 grok.gemspec
View
@@ -0,0 +1,25 @@
+Gem::Specification.new do |spec|
+ files = Dir.glob("lib/**/*.rb")
+ files + Dir.glob("patterns/**")
+ files + Dir.glob("test/")
+
+ #svnrev = %x{svn info}.split("\n").grep(/Revision:/).first.split(" ").last.to_i
+ spec.name = "jls-grok"
+ spec.version = "0.9.2"
+
+ spec.summary = "grok bindings for ruby"
+ spec.description = "Grok ruby bindings - pattern match/extraction tool"
+ spec.files = files
+
+ # TODO(sissel): ffi is now optional, get rid of it?
+ #spec.add_dependency("ffi", "> 0.6.3")
+ spec.require_paths << "lib"
+
+ # Cabin for logging.
+ spec.add_dependency("cabin")
+
+ spec.authors = ["Jordan Sissel", "Pete Fritchman"]
+ spec.email = ["jls@semicomplete.com", "petef@databits.net"]
+ spec.homepage = "http://code.google.com/p/semicomplete/wiki/Grok"
+end
+
3  lib/Grok.rb
View
@@ -0,0 +1,3 @@
+require "grok.rb"
+
+# compat for when grok was Grok.so
145 lib/grok-pure.rb
View
@@ -0,0 +1,145 @@
+require "rubygems"
+require "logger"
+require "cabin"
+
+# TODO(sissel): Check if 'grok' c-ext has been loaded and abort?
+class Grok
+ attr_accessor :pattern
+ attr_accessor :expanded_pattern
+ attr_accessor :logger
+
+ PATTERN_RE = \
+ /%\{ # match '%{' not prefixed with '\'
+ (?<name> # match the pattern name
+ (?<pattern>[A-z0-9]+)
+ (?::(?<subname>[A-z0-9_:]+))?
+ )
+ (?:=(?<definition>
+ (?:
+ (?:[^{}\\]+|\\.+)+
+ |
+ (?<curly>\{(?:(?>[^{}]+|(?>\\[{}])+)|(\g<curly>))*\})+
+ )+
+ ))?
+ [^}]*
+ \}/x
+
+ GROK_OK = 0
+ GROK_ERROR_FILE_NOT_ACCESSIBLE = 1
+ GROK_ERROR_PATTERN_NOT_FOUND = 2
+ GROK_ERROR_UNEXPECTED_READ_SIZE = 3
+ GROK_ERROR_COMPILE_FAILED = 4
+ GROK_ERROR_UNINITIALIZED = 5
+ GROK_ERROR_PCRE_ERROR = 6
+ GROK_ERROR_NOMATCH = 7
+
+ public
+ def initialize
+ @patterns = {}
+ @logger = Cabin::Channel.new
+ @logger.subscribe(Logger.new(STDOUT))
+ @logger.level = :warn
+
+ # TODO(sissel): Throw exception if we aren't using Ruby 1.9.2 or newer.
+ end # def initialize
+
+ public
+ def add_pattern(name, pattern)
+ @logger.info("Adding pattern", name => pattern)
+ @patterns[name] = pattern
+ return nil
+ end
+
+ public
+ def add_patterns_from_file(path)
+ file = File.new(path, "r")
+ file.each do |line|
+ next if line =~ /^\s*#/
+ name, pattern = line.gsub(/^\s*/, "").split(/\s+/, 2)
+ next if pattern.nil?
+ add_pattern(name, pattern.chomp)
+ end
+ return nil
+ end # def add_patterns_from_file
+
+ public
+ def compile(pattern)
+ @capture_map = {}
+
+ iterations_left = 100
+ @pattern = pattern
+ @expanded_pattern = pattern
+ index = 0
+
+ # Replace any instances of '%{FOO}' with that pattern.
+ loop do
+ if iterations_left == 0
+ raise "Deep recursion pattern compilation of #{pattern.inspect} - expanded: #{@expanded_pattern.inspect}"
+ end
+ iterations_left -= 1
+ m = PATTERN_RE.match(@expanded_pattern)
+ break if !m
+
+ if m["definition"]
+ add_pattern(m["pattern"], m["definition"])
+ end
+
+ if @patterns.include?(m["pattern"])
+ # create a named capture index that we can push later as the named
+ # pattern. We do this because ruby regexp can't capture something
+ # by the same name twice.
+ p = @patterns[m["pattern"]]
+
+ capture = "a#{index}" # named captures have to start with letters?
+ #capture = "%04d" % "#{index}" # named captures have to start with letters?
+ replacement_pattern = "(?<#{capture}>#{p})"
+ #p(:input => m[0], :pattern => replacement_pattern)
+ @capture_map[capture] = m["name"]
+ @expanded_pattern.sub!(m[0], replacement_pattern)
+ index += 1
+ end
+ end
+
+ @regexp = Regexp.new(@expanded_pattern)
+ @logger.debug("Grok compiled OK", :pattern => pattern,
+ :expanded_pattern => @expanded_pattern)
+ end # def compile
+
+ public
+ def match(text)
+ match = @regexp.match(text)
+
+ if match
+ grokmatch = Grok::Match.new
+ grokmatch.subject = text
+ grokmatch.start, grokmatch.end = match.offset(0)
+ grokmatch.grok = self
+ grokmatch.match = match
+ @logger.debug("Regexp match object", :names => match.names, :captures => match.captures)
+ return grokmatch
+ else
+ return false
+ end
+ end # def match
+
+ public
+ def discover(input)
+ init_discover if @discover == nil
+
+ return @discover.discover(input)
+ end # def discover
+
+ private
+ def init_discover
+ @discover = GrokDiscover.new(self)
+ @discover.logmask = logmask
+ end # def init_discover
+
+ public
+ def capture_name(id)
+ return @capture_map[id]
+ end # def capture_name
+end # Grok
+
+require "grok/pure/match"
+require "grok/pure/pile"
133 lib/grok.rb
View
@@ -0,0 +1,133 @@
+require "rubygems"
+require "ffi"
+
+# TODO(sissel): Check if 'grok-pure' has been loaded and abort?
+class Grok < FFI::Struct
+ module CGrok
+ extend FFI::Library
+ ffi_lib "libgrok"
+
+ attach_function :grok_new, [], :pointer
+ attach_function :grok_compilen, [:pointer, :pointer, :int], :int
+ attach_function :grok_pattern_add,
+ [:pointer, :pointer, :int, :pointer, :int], :int
+ attach_function :grok_patterns_import_from_file, [:pointer, :pointer], :int
+ attach_function :grok_execn, [:pointer, :pointer, :int, :pointer], :int
+ end
+
+ include CGrok
+ layout :pattern, :string,
+ :pattern_len, :int,
+ :full_pattern, :string,
+ :full_pattern_len, :int,
+ :__patterns, :pointer, # TCTREE*, technically
+ :__re, :pointer, # pcre*
+ :__pcre_capture_vector, :pointer, # int*
+ :__pcre_num_captures, :int,
+ :__captures_by_id, :pointer, # TCTREE*
+ :__captures_by_name, :pointer, # TCTREE*
+ :__captures_by_subname, :pointer, # TCTREE*
+ :__captures_by_capture_number, :pointer, # TCTREE*
+ :__max_capture_num, :int,
+ :pcre_errptr, :string,
+ :pcre_erroffset, :int,
+ :pcre_errno, :int,
+ :logmask, :uint,
+ :logdepth, :uint,
+ :errstr, :string
+
+ GROK_OK = 0
+ GROK_ERROR_FILE_NOT_ACCESSIBLE = 1
+ GROK_ERROR_PATTERN_NOT_FOUND = 2
+ GROK_ERROR_UNEXPECTED_READ_SIZE = 3
+ GROK_ERROR_COMPILE_FAILED = 4
+ GROK_ERROR_UNINITIALIZED = 5
+ GROK_ERROR_PCRE_ERROR = 6
+ GROK_ERROR_NOMATCH = 7
+
+ public
+ def initialize
+ super(grok_new)
+ end
+
+ public
+ def add_pattern(name, pattern)
+ name_c = FFI::MemoryPointer.from_string(name)
+ pattern_c = FFI::MemoryPointer.from_string(pattern)
+ grok_pattern_add(self, name_c, name.length, pattern_c, pattern.length)
+ return nil
+ end
+
+ public
+ def add_patterns_from_file(path)
+ path_c = FFI::MemoryPointer.from_string(path)
+ ret = grok_patterns_import_from_file(self, path_c)
+ if ret != GROK_OK
+ raise ArgumentError, "Failed to add patterns from file #{path}"
+ end
+ return nil
+ end
+
+ public
+ def pattern
+ return self[:pattern]
+ end
+
+ public
+ def expanded_pattern
+ return self[:full_pattern]
+ end
+
+ public
+ def compile(pattern)
+ pattern_c = FFI::MemoryPointer.from_string(pattern)
+ ret = grok_compilen(self, pattern_c, pattern.length)
+ if ret != GROK_OK
+ raise ArgumentError, "Compile failed: #{self[:errstr]})"
+ end
+ return ret
+ end
+
+ public
+ def match(text)
+ match = Grok::Match.new
+ text_c = FFI::MemoryPointer.from_string(text)
+ rc = grok_execn(self, text_c, text.size, match)
+ case rc
+ when GROK_OK
+ # Give the Grok::Match object a reference to the 'text_c'
+ # object which is also Grok::Match#subject string;
+ # this will prevent Ruby from garbage collecting it until
+ # the match object is garbage collectd.
+ #
+ # If we don't do this, then 'text_c' will fall out of
+ # scope at the end of this function and become a candidate
+ # for garbage collection, causing Grok::Match#subject to become
+ # corrupt and any captures to point to those corrupt portions.
+ # http://code.google.com/p/logstash/issues/detail?id=47
+ match.subject_memorypointer = text_c
+
+ return match
+ when GROK_ERROR_NOMATCH
+ return false
+ end
+
+ raise ValueError, "unknown return from grok_execn: #{rc}"
+ end
+
+ public
+ def discover(input)
+ init_discover if @discover == nil
+
+ return @discover.discover(input)
+ end
+
+ private
+ def init_discover
+ @discover = GrokDiscover.new(self)
+ @discover.logmask = logmask
+ end
+end # Grok
+
+require "grok/c-ext/match"
+require "grok/c-ext/pile"
80 lib/grok/c-ext/match.rb
View
@@ -0,0 +1,80 @@
+require "rubygems"
+require "ffi"
+require "grok"
+
+class Grok::Match < FFI::Struct
+ module CGrokMatch
+ extend FFI::Library
+ ffi_lib "libgrok"
+
+ attach_function :grok_match_get_named_substring,
+ [:pointer, :pointer], :pointer
+ attach_function :grok_match_walk_init, [:pointer], :void
+ attach_function :grok_match_walk_next,
+ [:pointer, :pointer, :pointer, :pointer, :pointer], :int
+ attach_function :grok_match_walk_end, [:pointer], :void
+ end
+
+ include CGrokMatch
+ layout :grok, :pointer,
+ :subject, :string,
+ :start, :int,
+ :end, :int
+
+ # Placeholder for the FFI::MemoryPointer that we pass to
+ # grok_execn() during Grok#match; this should prevent ruby from
+ # garbage collecting us until the GrokMatch goes out of scope.
+ # http://code.google.com/p/logstash/issues/detail?id=47
+ attr_accessor :subject_memorypointer
+
+ public
+ def initialize
+ super
+
+ @captures = nil
+ end
+
+ public
+ def each_capture
+ @captures = Hash.new { |h, k| h[k] = Array.new }
+ grok_match_walk_init(self)
+ name_ptr = FFI::MemoryPointer.new(:pointer)
+ namelen_ptr = FFI::MemoryPointer.new(:int)
+ data_ptr = FFI::MemoryPointer.new(:pointer)
+ datalen_ptr = FFI::MemoryPointer.new(:int)
+ while grok_match_walk_next(self, name_ptr, namelen_ptr, data_ptr, datalen_ptr) == Grok::GROK_OK
+ namelen = namelen_ptr.read_int
+ name = name_ptr.get_pointer(0).get_string(0, namelen)
+ datalen = datalen_ptr.read_int
+ data = data_ptr.get_pointer(0).get_string(0, datalen)
+ yield name, data
+ end
+ grok_match_walk_end(self)
+ end # def each_capture
+
+ public
+ def captures
+ if @captures.nil?
+ @captures = Hash.new { |h,k| h[k] = [] }
+ each_capture do |key, val|
+ @captures[key] << val
+ end
+ end
+ return @captures
+ end # def captures
+
+ public
+ def start
+ return self[:start]
+ end
+
+ public
+ def end
+ return self[:end]
+ end
+
+ public
+ def subject
+ return self[:subject]
+ end
+end # Grok::Match
56 lib/grok/c-ext/pile.rb
View
@@ -0,0 +1,56 @@
+require "grok"
+
+# A grok pile is an easy way to have multiple patterns together so
+# that you can try to match against each one.
+# The API provided should be similar to the normal Grok
+# interface, but you can compile multiple patterns and match will
+# try each one until a match is found.
+class Grok
+ class Pile
+ def initialize
+ @groks = []
+ @patterns = {}
+ @pattern_files = []
+ end # def initialize
+
+ # see Grok#add_pattern
+ def add_pattern(name, string)
+ @patterns[name] = string
+ end # def add_pattern
+
+ # see Grok#add_patterns_from_file
+ def add_patterns_from_file(path)
+ if !File.exists?(path)
+ raise "File does not exist: #{path}"
+ end
+ @pattern_files << path
+ end # def add_patterns_from_file
+
+ # see Grok#compile
+ def compile(pattern)
+ grok = Grok.new
+ @patterns.each do |name, value|
+ grok.add_pattern(name, value)
+ end
+ @pattern_files.each do |path|
+ grok.add_patterns_from_file(path)
+ end
+ grok.compile(pattern)
+ @groks << grok
+ end # def compile
+
+ # Slight difference from Grok#match in that it returns
+ # the Grok instance that matched successfully in addition
+ # to the GrokMatch result.
+ # See also: Grok#match
+ def match(string)
+ @groks.each do |grok|
+ match = grok.match(string)
+ if match
+ return [grok, match]
+ end
+ end
+ return false
+ end # def match
+ end # class Pile
+end # class Grok
45 lib/grok/pure/match.rb
View
@@ -0,0 +1,45 @@
+require "grok-pure"
+
+class Grok::Match
+ attr_accessor :subject
+ attr_accessor :start
+ attr_accessor :end
+ attr_accessor :grok
+ attr_accessor :match
+
+ public
+ def initialize
+ @captures = nil
+ end
+
+ public
+ def each_capture
+ @captures = Hash.new { |h, k| h[k] = Array.new }
+
+ #p :expanded => @grok.expanded_pattern
+ #p :map => @grok.capture_map
+ @match.names.zip(@match.captures).each do |id, value|
+ #p :match => id, :value => value
+ name = @grok.capture_name(id)
+ #next if value == nil
+ yield name, value
+ end
+
+ end # def each_capture
+
+ public
+ def captures
+ if @captures.nil?
+ @captures = Hash.new { |h,k| h[k] = [] }
+ each_capture do |key, val|
+ @captures[key] << val
+ end
+ end
+ return @captures
+ end # def captures
+
+ public
+ def [](name)
+ return captures[name]
+ end # def []
+end # Grok::Match
71 lib/grok/pure/pile.rb
View
@@ -0,0 +1,71 @@
+require "grok-pure"
+require "logger"
+require "cabin"
+
+# A grok pile is an easy way to have multiple patterns together so
+# that you can try to match against each one.
+# The API provided should be similar to the normal Grok
+# interface, but you can compile multiple patterns and match will
+# try each one until a match is found.
+class Grok
+ class Pile
+ attr_accessor :logger
+
+ def initialize
+ @groks = []
+ @patterns = {}
+ @pattern_files = []
+ @logger = Cabin::Channel.new
+ @logger.subscribe(Logger.new(STDOUT))
+ @logger.level = :warn
+ end # def initialize
+
+ def logger=(logger)
+ @logger = logger
+ @groks.each { |g| g.logger = logger }
+ end # def logger=
+
+ # see Grok#add_pattern
+ def add_pattern(name, string)
+ @patterns[name] = string
+ end # def add_pattern
+
+ # see Grok#add_patterns_from_file
+ def add_patterns_from_file(path)
+ if !File.exists?(path)
+ raise "File does not exist: #{path}"
+ end
+ @pattern_files << path
+ end # def add_patterns_from_file
+
+ # see Grok#compile
+ def compile(pattern)
+ grok = Grok.new
+ grok.logger = @logger unless @logger.nil?
+ @patterns.each do |name, value|
+ grok.add_pattern(name, value)
+ end
+ @pattern_files.each do |path|
+ grok.add_patterns_from_file(path)
+ end
+ grok.compile(pattern)
+ @logger.info("Pile compiled new grok", :pattern => pattern,
+ :expanded_pattern => grok.expanded_pattern)
+ @groks << grok
+ end # def compile
+
+ # Slight difference from Grok#match in that it returns
+ # the Grok instance that matched successfully in addition
+ # to the GrokMatch result.
+ # See also: Grok#match
+ def match(string)
+ @groks.each do |grok|
+ match = grok.match(string)
+ if match
+ return [grok, match]
+ end
+ end
+ return false
+ end # def match
+ end # class Pile
+end # class Grok
90 patterns/pure-ruby/base
View
@@ -0,0 +1,90 @@
+USERNAME [a-zA-Z0-9_-]+
+USER %{USERNAME}
+INT (?:[+-]?(?:[0-9]+))
+BASE10NUM (?<![0-9.+-])(?>[+-]?(?:(?:[0-9]+(?:\.[0-9]+)?)|(?:\.[0-9]+)))
+NUMBER (?:%{BASE10NUM})
+BASE16NUM (?<![0-9A-Fa-f])(?:[+-]?(?:0x)?(?:[0-9A-Fa-f]+))
+BASE16FLOAT \b(?<![0-9A-Fa-f.])(?:[+-]?(?:0x)?(?:(?:[0-9A-Fa-f]+(?:\.[0-9A-Fa-f]*)?)|(?:\.[0-9A-Fa-f]+)))\b
+
+POSINT \b(?:[0-9]+)\b
+WORD \b\w+\b
+NOTSPACE \S+
+DATA .*?
+GREEDYDATA .*
+QUOTEDSTRING (?:(?<!\\\\)(?:"(?:\\\\.|[^\\\\"])*"|(?:'(?:\\\\.|[^\\\\'])*')|(?:`(?:\\\\.|[^\\\\`])*`)))
+
+# Networking
+MAC (?:%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC})
+CISCOMAC (?:(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4})
+WINDOWSMAC (?:(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2})
+COMMONMAC (?:(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2})
+IP (?<![0-9])(?:(?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2})[.](?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2})[.](?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2})[.](?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2}))(?![0-9])
+HOSTNAME \b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\.?|\b)
+HOST %{HOSTNAME}
+IPORHOST (?:%{HOSTNAME}|%{IP})
+HOSTPORT (?:%{IPORHOST=~/\./}:%{POSINT})
+
+# paths
+PATH (?:%{UNIXPATH}|%{WINPATH})
+UNIXPATH (?:/(?:[\w_%!$@:.,-]+|\\.)*)+
+#UNIXPATH (?<![\w\/])(?:/[^\/\s?*]*)+
+LINUXTTY (?:/dev/pts/%{POSINT})
+BSDTTY (?:/dev/tty[pq][a-z0-9])
+TTY (?:%{BSDTTY}|%{LINUXTTY})
+WINPATH (?:[A-Za-z]+:|\\\\)(?:\\\\[^\\\\?*]*)+
+URIPROTO [A-Za-z]+(\+[A-Za-z+]+)?
+URIHOST %{IPORHOST}(?::%{POSINT:port})?
+# uripath comes loosely from RFC1738, but mostly from what Firefox
+# doesn't turn into %XX
+URIPATH (?:/[A-Za-z0-9$.+!*'(),~:#%_-]*)+
+#URIPARAM \?(?:[A-Za-z0-9]+(?:=(?:[^&]*))?(?:&(?:[A-Za-z0-9]+(?:=(?:[^&]*))?)?)*)?
+URIPARAM \?[A-Za-z0-9$.+!*'(),~#%&/=:;_-]*
+URIPATHPARAM %{URIPATH}(?:%{URIPARAM})?
+URI %{URIPROTO}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST})?(?:%{URIPATHPARAM})?
+
+# Months: January, Feb, 3, 03, 12, December
+MONTH \b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\b
+MONTHNUM (?:0?[1-9]|1[0-2])
+MONTHDAY (?:3[01]|[1-2]?[0-9]|0?[1-9])
+
+# Days: Monday, Tue, Thu, etc...
+DAY (?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?)
+
+# Years?
+YEAR [0-9]+
+# Time: HH:MM:SS
+#TIME \d{2}:\d{2}(?::\d{2}(?:\.\d+)?)?
+# I'm still on the fence about using grok to perform the time match,
+# since it's probably slower.
+# TIME %{POSINT<24}:%{POSINT<60}(?::%{POSINT<60}(?:\.%{POSINT})?)?
+HOUR (?:2[0123]|[01][0-9])
+MINUTE (?:[0-5][0-9])
+# '60' is a leap second in most time standards and thus is valid.
+SECOND (?:(?:[0-5][0-9]|60)(?:[.,][0-9]+)?)
+TIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9])
+# datestamp is YYYY/MM/DD-HH:MM:SS.UUUU (or something like it)
+DATE_US %{MONTHNUM}[/-]%{MONTHDAY}[/-]%{YEAR}
+DATE_EU %{YEAR}[/-]%{MONTHNUM}[/-]%{MONTHDAY}
+ISO8601_TIMEZONE (?:Z|[+-]%{HOUR}(?::?%{MINUTE}))
+ISO8601_SECOND (?:%{SECOND}|60)
+TIMESTAMP_ISO8601 %{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}?
+DATE %{DATE_US}|%{DATE_EU}
+DATESTAMP %{DATE}[- ]%{TIME}
+TZ (?:[PMCE][SD]T)
+DATESTAMP_RFC822 %{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ}
+DATESTAMP_OTHER %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{TZ} %{YEAR}
+
+# Syslog Dates: Month Day HH:MM:SS
+SYSLOGTIMESTAMP %{MONTH} +%{MONTHDAY} %{TIME}
+PROG (?:[\w._/-]+)
+SYSLOGPROG %{PROG:program}(?:\[%{POSINT:pid}\])?
+SYSLOGHOST %{IPORHOST}
+SYSLOGFACILITY <%{POSINT:facility}.%{POSINT:priority}>
+HTTPDATE %{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{INT:ZONE}
+
+# Shortcuts
+QS %{QUOTEDSTRING}
+
+# Log formats
+SYSLOGBASE %{SYSLOGTIMESTAMP:timestamp} (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource} %{SYSLOGPROG}:
+COMBINEDAPACHELOG %{IPORHOST:clientip} %{USER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] "%{WORD:verb} %{URIPATHPARAM:request} HTTP/%{NUMBER:httpversion}" %{NUMBER:response} (?:%{NUMBER:bytes}|-) "(?:%{URI:referrer}|-)" %{QS:agent}
3  patterns/pure-ruby/java
View
@@ -0,0 +1,3 @@
+JAVACLASS (?:[a-z-]+\.)[A-Za-z0-9]+
+JAVAFILE (?:[A-Za-z0-9_.-]})
+JAVASTACKTRACEPART "at %{JAVACLASS:class}\.%{WORD:method}\(%{JAVAFILE:file}:%{NUMBER:line}\)
3  patterns/pure-ruby/ruby
View
@@ -0,0 +1,3 @@
+RUBY_LOGLEVEL (?:DEBUG|FATAL|ERROR|WARN|INFO)
+RUBY_LOGGER [DFEWI], \[%{TIMESTAMP_ISO8601} #{POSINT:pid}\] *%{RUBY_LOGLEVEL} -- : %{DATA:message}
+
32 sample.rb
View
@@ -0,0 +1,32 @@
+$: << File.join(File.dirname(__FILE__), "lib")
+require "grok-pure"
+
+patterns = {}
+
+matches = [
+ "%{FOO=\\d+}",
+ #"%{FOO=foo}",
+]
+
+pile = Grok::Pile.new
+pile.add_patterns_from_file("patterns/pure-ruby/base")
+matches.collect do |m|
+ pile.compile(m)
+end
+
+bytes = 0
+time_start = Time.now.to_f
+$stdin.each do |line|
+ grok, m = pile.match(line)
+ if m
+ m.each_capture do |key, value|
+ p key => value
+ end
+
+ #bytes += line.length
+ break
+ end
+end
+
+#time_end = Time.now.to_f
+#puts "parse rate: #{ (bytes / 1024) / (time_end - time_start) }"
19 test/Makefile
View
@@ -0,0 +1,19 @@
+
+PLATFORM=$(shell (uname -o || uname -s) 2> /dev/null)
+
+ifeq ($(PLATFORM), Darwin)
+LIBSUFFIX=dylib
+else
+LIBSUFFIX=so
+endif
+
+../../libgrok.$(LIBSUFFIX):
+ $(MAKE) -C ../../ libgrok.$(LIBSUFFIX)
+
+.PHONY: test
+test-pure:
+ JRUBY_OPTS=--1.9 LD_LIBRARY_PATH="$${LD_LIBRARY_PATH}:$$PWD/../../" RUBYLIB="$$PWD/../lib" ruby pure-ruby/alltests.rb
+
+
+test-ffi: ../../libgrok.$(LIBSUFFIX)
+ JRUBY_OPTS=--1.9 LD_LIBRARY_PATH="$${LD_LIBRARY_PATH}:$$PWD/../../" RUBYLIB="$$PWD/../lib" ruby ffi-ruby/alltests.rb
17 test/ffi-ruby/Makefile
View
@@ -0,0 +1,17 @@
+
+PLATFORM=$(shell (uname -o || uname -s) 2> /dev/null)
+
+ifeq ($(PLATFORM), Darwin)
+LIBSUFFIX=dylib
+else
+LIBSUFFIX=so
+endif
+
+.PHONY: test
+test:
+ $(MAKE) -C ../../ libgrok.$(LIBSUFFIX)
+ LD_LIBRARY_PATH="$${LD_LIBRARY_PATH}:$$PWD/../../" RUBYLIB="$$PWD/../lib" ruby alltests.rb
+
+test_jruby:
+ $(MAKE) -C ../../ libgrok.$(LIBSUFFIX)
+ LD_LIBRARY_PATH="$${LD_LIBRARY_PATH}:$$PWD/../../" RUBYLIB="$$PWD/../lib" jruby alltests.rb
8 test/ffi-ruby/alltests.rb
View
@@ -0,0 +1,8 @@
+require 'test/unit'
+$: << File.join(File.dirname(__FILE__), "..", "..", "lib")
+
+Dir["#{File.dirname(__FILE__)}/*/**/*.rb"].each do |file|
+ puts "Loading tests: #{file}"
+ load file
+end
+
58 test/ffi-ruby/general/basic_test.rb
View
@@ -0,0 +1,58 @@
+$: << File.join(File.dirname(__FILE__), "..", "..", "..", "lib")
+require 'grok'
+require 'test/unit'
+
+class GrokBasicTests < Test::Unit::TestCase
+ def setup
+ @grok = Grok.new
+ end
+
+ def test_grok_methods
+ assert_respond_to(@grok, :compile)
+ assert_respond_to(@grok, :match)
+ assert_respond_to(@grok, :expanded_pattern)
+ assert_respond_to(@grok, :pattern)
+ end
+
+ def test_grok_compile_fails_on_invalid_expressions
+ bad_regexps = ["[", "[foo", "?", "foo????", "(?-"]
+ bad_regexps.each do |regexp|
+ assert_raise ArgumentError do
+ @grok.compile(regexp)
+ end
+ end
+ end
+
+ def test_grok_compile_succeeds_on_valid_expressions
+ good_regexps = ["[hello]", "(test)", "(?:hello)", "(?=testing)"]
+ good_regexps.each do |regexp|
+ assert_nothing_raised do
+ @grok.compile(regexp)
+ end
+ end
+ end
+
+ def test_grok_pattern_is_same_as_compile_pattern
+ pattern = "Hello world"
+ @grok.compile(pattern)
+ assert_equal(pattern, @grok.pattern)
+ end
+
+ # TODO(sissel): Move this test to a separate test suite aimed
+ # at testing grok internals
+ def test_grok_expanded_pattern_works_correctly
+ @grok.add_pattern("test", "hello world")
+ @grok.compile("%{test}")
+ assert_equal("(?<0000>hello world)", @grok.expanded_pattern)
+ end
+
+ def test_grok_load_patterns_from_file
+ require 'tempfile'
+ fd = Tempfile.new("grok_test_patterns.XXXXX")
+ fd.puts "TEST \\d+"
+ fd.close
+ @grok.add_patterns_from_file(fd.path)
+ @grok.compile("%{TEST}")
+ assert_equal("(?<0000>\\d+)", @grok.expanded_pattern)
+ end
+end
105 test/ffi-ruby/general/captures_test.rb
View
@@ -0,0 +1,105 @@
+$: << File.join(File.dirname(__FILE__), "..", "..", "..", "lib")
+require 'grok'
+require 'test/unit'
+
+class GrokPatternCapturingTests < Test::Unit::TestCase
+ def setup
+ @grok = Grok.new
+ end
+
+ def test_capture_methods
+ @grok.add_pattern("foo", ".*")
+ @grok.compile("%{foo}")
+ match = @grok.match("hello world")
+ assert_respond_to(match, :captures)
+ assert_respond_to(match, :start)
+ assert_respond_to(match, :end)
+ assert_respond_to(match, :subject)
+ assert_respond_to(match, :each_capture)
+ end
+
+ def test_basic_capture
+ @grok.add_pattern("foo", ".*")
+ @grok.compile("%{foo}")
+ input = "hello world"
+ match = @grok.match(input)
+ assert_equal("(?<0000>.*)", @grok.expanded_pattern)
+ assert_kind_of(Grok::Match, match)
+ assert_kind_of(Hash, match.captures)
+ assert_equal(match.captures.length, 1)
+ assert_kind_of(Array, match.captures["foo"])
+ assert_equal(1, match.captures["foo"].length)
+ assert_kind_of(String, match.captures["foo"][0])
+ assert_equal(input, match.captures["foo"][0])
+
+ match.each_capture do |key, val|
+ assert(key.is_a?(String), "Grok::Match::each_capture should yield string,string, got #{key.class.name} as first argument.")
+ assert(val.is_a?(String), "Grok::Match::each_capture should yield string,string, got #{key.class.name} as first argument.")
+ end
+
+ assert_kind_of(Fixnum, match.start)
+ assert_kind_of(Fixnum, match.end)
+ assert_kind_of(String, match.subject)
+ assert_equal(0, match.start,
+ "Match of /.*/, start should equal 0")
+ assert_equal(input.length, match.end,
+ "Match of /.*/, end should equal input string length")
+ assert_equal(input, match.subject)
+ end
+
+ def test_multiple_captures_with_same_name
+ @grok.add_pattern("foo", "\\w+")
+ @grok.compile("%{foo} %{foo}")
+ match = @grok.match("hello world")
+ assert_not_equal(false, match)
+ assert_equal(1, match.captures.length)
+ assert_equal(2, match.captures["foo"].length)
+ assert_equal("hello", match.captures["foo"][0])
+ assert_equal("world", match.captures["foo"][1])
+ end
+
+ def test_multiple_captures
+ @grok.add_pattern("foo", "\\w+")
+ @grok.add_pattern("bar", "\\w+")
+ @grok.compile("%{foo} %{bar}")
+ match = @grok.match("hello world")
+ assert_not_equal(false, match)
+ assert_equal(2, match.captures.length)
+ assert_equal(1, match.captures["foo"].length)
+ assert_equal(1, match.captures["bar"].length)
+ assert_equal("hello", match.captures["foo"][0])
+ assert_equal("world", match.captures["bar"][0])
+ end
+
+ def test_nested_captures
+ @grok.add_pattern("foo", "\\w+ %{bar}")
+ @grok.add_pattern("bar", "\\w+")
+ @grok.compile("%{foo}")
+ match = @grok.match("hello world")
+ assert_not_equal(false, match)
+ assert_equal(2, match.captures.length)
+ assert_equal(1, match.captures["foo"].length)
+ assert_equal(1, match.captures["bar"].length)
+ assert_equal("hello world", match.captures["foo"][0])
+ assert_equal("world", match.captures["bar"][0])
+ end
+
+ def test_nesting_recursion
+ @grok.add_pattern("foo", "%{foo}")
+ assert_raises(ArgumentError) do
+ @grok.compile("%{foo}")
+ end
+ end
+
+ def test_valid_capture_subnames
+ name = "foo"
+ @grok.add_pattern(name, "\\w+")
+ subname = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_abc:def"
+ @grok.compile("%{#{name}:#{subname}}")
+ match = @grok.match("hello")
+ assert_not_equal(false, match)
+ assert_equal(1, match.captures.length)
+ assert_equal(1, match.captures["#{name}:#{subname}"].length)
+ assert_equal("hello", match.captures["#{name}:#{subname}"][0])
+ end
+end
23 test/ffi-ruby/patterns/day.rb
View
@@ -0,0 +1,23 @@
+$: << File.join(File.dirname(__FILE__), "..", "..", "..", "lib")
+require 'grok'
+require 'test/unit'
+
+class DayPatternsTest < Test::Unit::TestCase
+ def setup
+ @grok = Grok.new
+ path = "#{File.dirname(__FILE__)}/../../../../patterns/base"
+ @grok.add_patterns_from_file(path)
+ @grok.compile("%{DAY}")
+ end
+
+ def test_days
+ days = %w{Mon Monday Tue Tuesday Wed Wednesday Thu Thursday Fri Friday
+ Sat Saturday Sun Sunday}
+ days.each do |day|
+ match = @grok.match(day)
+ assert_not_equal(false, day, "Expected #{day} to match.")
+ assert_equal(day, match.captures["DAY"][0])
+ end
+ end
+
+end
31 test/ffi-ruby/patterns/host.rb
View
@@ -0,0 +1,31 @@
+$: << File.join(File.dirname(__FILE__), "..", "..", "..", "lib")
+require 'grok'
+require 'test/unit'
+
+class HostPattternTest < Test::Unit::TestCase
+ def setup
+ @grok = Grok.new
+ path = "#{File.dirname(__FILE__)}/../../../../patterns/base"
+ @grok.add_patterns_from_file(path)
+ @grok.compile("%{HOSTNAME}")
+ end
+
+ def test_hosts
+ hosts = ["www.google.com", "foo-234.14.AAc5-2.foobar.net",
+ "192-455.a.b.c.d."]
+ hosts.each do |host|
+ match = @grok.match(host)
+ assert_not_equal(false, match, "Expected this to match: #{host}")
+ assert_equal(host, match.captures["HOSTNAME"][0])
+ end
+ end
+
+ def test_hosts_in_string
+ @grok.compile("%{HOSTNAME =~ /\\./}")
+ host = "www.google.com"
+ line = "1 2 3 4 #{host} test"
+ match = @grok.match(line)
+ assert_not_equal(false, match, "Expected this to match: #{line}")
+ assert_equal(host, match.captures["HOSTNAME"][0])
+ end
+end
10,000 test/ffi-ruby/patterns/ip.input
View
10,000 additions, 0 deletions not shown
32 test/ffi-ruby/patterns/ip.rb
View
@@ -0,0 +1,32 @@
+$: << File.join(File.dirname(__FILE__), "..", "..", "..", "lib")
+require 'grok'
+require 'test/unit'
+
+class IPPatternsTest < Test::Unit::TestCase
+ def setup
+ @grok = Grok.new
+ path = "#{File.dirname(__FILE__)}/../../../../patterns/base"
+ @grok.add_patterns_from_file(path)
+ end
+
+ def test_ips
+ @grok.compile("%{IP}")
+ File.open("#{File.dirname(__FILE__)}/ip.input").each do |line|
+ line.chomp!
+ match = @grok.match(line)
+ assert_not_equal(false, match)
+ assert_equal(line, match.captures["IP"][0])
+ end
+ end
+
+ def test_non_ips
+ @grok.compile("%{IP}")
+ nonips = %w{255.255.255.256 0.1.a.33 300.1.2.3 300 400.4.3.a 1.2.3.b
+ 1..3.4.5 hello world}
+ nonips << "hello world"
+ nonips.each do |input|
+ match = @grok.match(input)
+ assert_equal(false, match)
+ end
+ end
+end
69 test/ffi-ruby/patterns/iso8601.rb
View
@@ -0,0 +1,69 @@
+$: << File.join(File.dirname(__FILE__), "..", "..", "..", "lib")
+require 'grok'
+require 'test/unit'
+
+class ISO8601PatternsTest < Test::Unit::TestCase
+ def setup
+ @grok = Grok.new
+ path = "#{File.dirname(__FILE__)}/../../../../patterns/base"
+ @grok.add_patterns_from_file(path)
+ @grok.compile("^%{TIMESTAMP_ISO8601}$")
+ end
+
+ def test_iso8601
+ times = [
+ "2001-01-01T00:00:00",
+ "1974-03-02T04:09:09",
+ "2010-05-03T08:18:18+00:00",
+ "2004-07-04T12:27:27-00:00",
+ "2001-09-05T16:36:36+0000",
+ "2001-11-06T20:45:45-0000",
+ "2001-12-07T23:54:54Z",
+ "2001-01-01T00:00:00.123456",
+ "1974-03-02T04:09:09.123456",
+ "2010-05-03T08:18:18.123456+00:00",
+ "2004-07-04T12:27:27.123456-00:00",
+ "2001-09-05T16:36:36.123456+0000",
+ "2001-11-06T20:45:45.123456-0000",
+ "2001-12-07T23:54:54.123456Z",
+ "2001-12-07T23:54:60.123456Z", # '60' second is a leap second.
+ ]
+ times.each do |time|
+ match = @grok.match(time)
+ assert_not_equal(false, match, "Expected #{time} to match TIMESTAMP_ISO8601")
+ assert_equal(time, match.captures["TIMESTAMP_ISO8601"][0])
+ end
+ end
+
+ def test_iso8601_nomatch
+ times = [
+ "2001-13-01T00:00:00", # invalid month
+ "2001-00-01T00:00:00", # invalid month
+ "2001-01-00T00:00:00", # invalid day
+ "2001-01-32T00:00:00", # invalid day
+ "2001-01-aT00:00:00", # invalid day
+ "2001-01-1aT00:00:00", # invalid day
+ "2001-01-01Ta0:00:00", # invalid hour
+ "2001-01-01T0:00:00", # invalid hour
+ "2001-01-01T25:00:00", # invalid hour
+ "2001-01-01T01:60:00", # invalid minute
+ "2001-01-01T00:aa:00", # invalid minute
+ "2001-01-01T00:00:aa", # invalid second
+ "2001-01-01T00:00:-1", # invalid second
+ "2001-01-01T00:00:61", # invalid second
+ "2001-01-01T00:00:00A", # invalid timezone
+ "2001-01-01T00:00:00+", # invalid timezone
+ "2001-01-01T00:00:00+25", # invalid timezone
+ "2001-01-01T00:00:00+2500", # invalid timezone
+ "2001-01-01T00:00:00+25:00", # invalid timezone
+ "2001-01-01T00:00:00-25", # invalid timezone
+ "2001-01-01T00:00:00-2500", # invalid timezone
+ "2001-01-01T00:00:00-00:61", # invalid timezone
+ ]
+ times.each do |time|
+ match = @grok.match(time)
+ assert_equal(false, match, "Expected #{time} to not match TIMESTAMP_ISO8601")
+ end
+ end
+
+end
25 test/ffi-ruby/patterns/month.rb
View
@@ -0,0 +1,25 @@
+$: << File.join(File.dirname(__FILE__), "..", "..", "..", "lib")
+require 'grok'
+require 'test/unit'
+
+class MonthPatternsTest < Test::Unit::TestCase
+ def setup
+ @grok = Grok.new
+ path = "#{File.dirname(__FILE__)}/../../../../patterns/base"
+ @grok.add_patterns_from_file(path)
+ @grok.compile("%{MONTH}")
+ end
+
+ def test_months
+ months = ["Jan", "January", "Feb", "February", "Mar", "March", "Apr",
+ "April", "May", "Jun", "June", "Jul", "July", "Aug", "August",
+ "Sep", "September", "Oct", "October", "Nov", "November", "Dec",
+ "December"]
+ months.each do |month|
+ match = @grok.match(month)
+ assert_not_equal(false, match, "Expected #{month} to match")
+ assert_equal(month, match.captures["MONTH"][0])
+ end
+ end
+
+end
70 test/ffi-ruby/patterns/number.rb
View
@@ -0,0 +1,70 @@
+$: << File.join(File.dirname(__FILE__), "..", "..", "..", "lib")
+require 'grok'
+require 'test/unit'
+
+class NumberPatternsTest < Test::Unit::TestCase
+ def setup
+ @grok = Grok.new
+ path = "#{File.dirname(__FILE__)}/../../../../patterns/base"
+ @grok.add_patterns_from_file(path)
+ end
+
+ def test_match_number
+ @grok.compile("%{NUMBER}")
+ # step of a prime number near 100 so we get about 2000 iterations
+ #puts @grok.expanded_pattern.inspect
+ -100000.step(100000, 97) do |value|
+ match = @grok.match(value.to_s)
+ assert_not_equal(false, match, "#{value} should not match false")
+ assert_equal(value.to_s, match.captures["NUMBER"][0])
+ end
+ end
+
+ def test_match_number_float
+ # generate some random floating point values
+ # always seed with the same random number, so the test is always the same
+ srand(0)
+ @grok.compile("%{NUMBER}")
+ 0.upto(1000) do |value|
+ value = (rand * 100000 - 50000).to_s
+ match = @grok.match(value)
+ assert_not_equal(false, match)
+ assert_equal(value, match.captures["NUMBER"][0])
+ end
+ end
+
+ def test_match_number_amid_things
+ @grok.compile("%{NUMBER}")
+ value = "hello 12345 world"
+ match = @grok.match(value)
+ assert_not_equal(false, match)
+ assert_equal("12345", match.captures["NUMBER"][0])
+
+ value = "Something costs $55.4!"
+ match = @grok.match(value)
+ assert_not_equal(false, match)
+ assert_equal("55.4", match.captures["NUMBER"][0])
+ end
+
+ def test_no_match_number
+ @grok.compile("%{NUMBER}")
+ ["foo", "", " ", ".", "hello world", "-abcd"].each do |value|
+ match = @grok.match(value.to_s)
+ assert_equal(false, match)
+ end
+ end
+
+ def test_match_base16num
+ @grok.compile("%{BASE16NUM}")
+ # Ruby represents negative values in a strange way, so only
+ # test positive numbers for now.
+ # I don't think anyone uses negative values in hex anyway...
+ 0.upto(1000) do |value|
+ [("%x" % value), ("0x%08x" % value), ("%016x" % value)].each do |hexstr|
+ match = @grok.match(hexstr)
+ assert_not_equal(false, match)
+ assert_equal(hexstr, match.captures["BASE16NUM"][0])
+ end
+ end
+ end
+end
32 test/ffi-ruby/patterns/path.rb
View
@@ -0,0 +1,32 @@
+$: << File.join(File.dirname(__FILE__), "..", "..", "..", "lib")
+require 'grok'
+require 'test/unit'
+
+class PathPatternsTest < Test::Unit::TestCase
+ def setup
+ @grok = Grok.new
+ path = "#{File.dirname(__FILE__)}/../../../../patterns/base"
+ @grok.add_patterns_from_file(path)
+ @grok.compile("%{PATH}")
+ end
+
+ def test_unix_paths
+ paths = %w{/ /usr /usr/bin /usr/bin/foo /etc/motd /home/.test
+ /foo/bar//baz //testing /.test /%foo% /asdf/asdf,v}
+ paths.each do |path|
+ match = @grok.match(path)
+ assert_not_equal(false, match)
+ assert_equal(path, match.captures["PATH"][0])
+ end
+ end
+
+ def test_windows_paths
+ paths = %w{C:\WINDOWS \\\\Foo\bar \\\\1.2.3.4\C$ \\\\some\path\here.exe}
+ paths << "C:\\Documents and Settings\\"
+ paths.each do |path|
+ match = @grok.match(path)
+ assert_not_equal(false, match, "Expected #{path} to match, but it didn't.")
+ assert_equal(path, match.captures["PATH"][0])
+ end
+ end
+end
21 test/ffi-ruby/patterns/prog.rb
View
@@ -0,0 +1,21 @@
+require 'grok'
+require 'test/unit'
+
+class ProgPatternsTest < Test::Unit::TestCase
+ def setup
+ @grok = Grok.new
+ path = "#{File.dirname(__FILE__)}/../../../../patterns/base"
+ @grok.add_patterns_from_file(path)
+ @grok.compile("^%{PROG}$")
+ end
+
+ def test_progs
+ progs = %w{kernel foo-bar foo_bar foo/bar/baz}
+ progs.each do |prog|
+ match = @grok.match(prog)
+ assert_not_equal(false, prog, "Expected #{prog} to match.")
+ assert_equal(prog, match.captures["PROG"][0], "Expected #{prog} to match capture.")
+ end
+ end
+
+end
54 test/ffi-ruby/patterns/quotedstring.rb
View
@@ -0,0 +1,54 @@
+#require 'rubygems'
+require 'grok'
+require 'test/unit'
+
+class QuotedStringPatternsTest < Test::Unit::TestCase
+ def setup
+ @grok = Grok.new
+ path = "#{File.dirname(__FILE__)}/../../../../patterns/base"
+ @grok.add_patterns_from_file(path)
+ end
+
+ def test_quoted_string_common
+ @grok.compile("%{QUOTEDSTRING}")
+ inputs = ["hello", ""]
+ quotes = %w{" ' `}
+ inputs.each do |value|
+ quotes.each do |quote|
+ str = "#{quote}#{value}#{quote}"
+ match = @grok.match(str)
+ assert_not_equal(false, match)
+ assert_equal(str, match.captures["QUOTEDSTRING"][0])
+ end
+ end
+ end
+
+ def test_quoted_string_inside_escape
+ @grok.compile("%{QUOTEDSTRING}")
+ quotes = %w{" ' `}
+ quotes.each do |quote|
+ str = "#{quote}hello \\#{quote}world\\#{quote}#{quote}"
+ match = @grok.match(str)
+ assert_not_equal(false, match)
+ assert_equal(str, match.captures["QUOTEDSTRING"][0])
+ end
+ end
+
+ def test_escaped_quotes_no_match_quoted_string
+ @grok.compile("%{QUOTEDSTRING}")
+ inputs = ["\\\"testing\\\"", "\\\'testing\\\'", "\\\`testing\\\`",]
+ inputs.each do |value|
+ match = @grok.match(value)
+ assert_equal(false, match)
+ end
+ end
+
+ def test_non_quoted_strings_no_match
+ @grok.compile("%{QUOTEDSTRING}")
+ inputs = ["\\\"testing", "testing", "hello world ' something ` foo"]
+ inputs.each do |value|
+ match = @grok.match(value)
+ assert_equal(false, match)
+ end
+ end
+end
46 test/ffi-ruby/patterns/uri.rb
View
@@ -0,0 +1,46 @@
+require 'grok'
+require 'test/unit'
+
+class URIPatternsTest < Test::Unit::TestCase
+ def setup
+ @grok = Grok.new
+ path = "#{File.dirname(__FILE__)}/../../../../patterns/base"
+ @grok.add_patterns_from_file(path)
+ @grok.compile("%{URI}")
+ end
+
+ def test_urls
+ urls = ["http://www.google.com", "telnet://helloworld",
+ "http://www.example.com/", "http://www.example.com/test.html",
+ "http://www.example.com/test.html?foo=bar",
+ "http://www.example.com/test.html?foo=bar&fizzle=baz",
+ "http://www.example.com:80/test.html?foo=bar&fizzle=baz",
+ "https://www.example.com:443/test.html?foo=bar&fizzle=baz",
+ "https://user@www.example.com:443/test.html?foo=bar&fizzle=baz",
+ "https://user:pass@somehost/fetch.pl",
+ "puppet:///",
+ "http://www.foo.com",
+ "http://www.foo.com/",
+ "http://www.foo.com/?testing",
+ "http://www.foo.com/?one=two",
+ "http://www.foo.com/?one=two&foo=bar",
+ "foo://somehost.com:12345",
+ "foo://user@somehost.com:12345",
+ "foo://user@somehost.com:12345/",
+ "foo://user@somehost.com:12345/foo.bar/baz/fizz",
+ "foo://user@somehost.com:12345/foo.bar/baz/fizz?test",
+ "foo://user@somehost.com:12345/foo.bar/baz/fizz?test=1&sink&foo=4",
+ "http://www.google.com/search?hl=en&source=hp&q=hello+world+%5E%40%23%24&btnG=Google+Search",
+ "http://www.freebsd.org/cgi/url.cgi?ports/sysutils/grok/pkg-descr",
+ "http://www.google.com/search?q=CAPTCHA+ssh&start=0&ie=utf-8&oe=utf-8&client=firefox-a&rls=org.mozilla:en-US:official",
+ "svn+ssh://somehost:12345/testing",
+ ]
+
+ urls.each do |url|
+ match = @grok.match(url)
+ assert_not_equal(false, match, "Expected this to match: #{url}")
+ assert_equal(url, match.captures["URI"][0])
+ end
+ end
+
+end
65 test/ffi-ruby/regression/grokmatch-subject-garbagecollected-early.rb
View
@@ -0,0 +1,65 @@
+# Relevant bug: http://code.google.com/p/logstash/issues/detail?id=47
+#
+
+require "test/unit"
+require "grok"
+
+class GrokRegressionIssue47 < Test::Unit::TestCase
+ def test_issue_47
+ # http://code.google.com/p/logstash/issues/detail?id=47
+ grok = Grok.new
+ pri = "(?:<(?:[0-9]{1,3})>)"
+ month = "(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)"
+ day = "(?: [1-9]|[12][0-9]|3[01])"
+ hour = "(?:[01][0-9]|2[0-4])"
+ minute = "(?:[0-5][0-9])"
+ second = "(?:[0-5][0-9])"
+
+ hostname = "(?:[A-Za-z0-9_.:]+)"
+ message = "(?:[ -~]+)"
+
+ grok.add_pattern("PRI", pri)
+ grok.add_pattern("MONTH", month)
+ grok.add_pattern("DAY", day)
+ grok.add_pattern("HOUR", hour)
+ grok.add_pattern("MINUTE", minute)
+ grok.add_pattern("SECOND", second)
+
+ grok.add_pattern("TIME", "%{HOUR}:%{MINUTE}:%{SECOND}")
+ grok.add_pattern("TIMESTAMP", "%{MONTH} %{DAY} %{TIME}")
+ grok.add_pattern("HOSTNAME", hostname)
+ grok.add_pattern("HEADER", "%{TIMESTAMP} %{HOSTNAME}")
+ grok.add_pattern("MESSAGE", message)
+
+ grok.compile("%{PRI}%{HEADER} %{MESSAGE}")
+
+ #start = Time.now
+ count = 10000
+
+ input = "<12>Mar 1 15:43:35 snack kernel: Kernel logging (proc) stopped."
+ count.times do |i|
+ begin
+ #GC.start
+ m = grok.match(input)
+ # Verify our pattern matches at least once successfully (for correctness)
+ captures = m.captures
+ errmsg = "on iteration #{i}\nInput: #{m.end - m.start} length\nSubject: #{m.subject.inspect}\nCaptures: #{captures.inspect}"
+ assert_equal("<12>", captures["PRI"].first, "pri #{errmsg}")
+ assert_equal("Mar 1 15:43:35", captures["TIMESTAMP"].first, "timestamp #{errmsg}")
+ assert_equal("snack", captures["HOSTNAME"].first, "hostname #{errmsg}")
+ assert_equal("kernel: Kernel logging (proc) stopped.", captures["MESSAGE"].first, "message #{errmsg}")
+ rescue => e
+ puts "Error on attempt #{i + 1}"
+ raise e
+ end
+ end
+
+ #duration = Time.now - start
+ # TODO(sissel): we could use the duration later?
+
+ #version = "#{RUBY_PLATFORM}/#{RUBY_VERSION}"
+ #version += "/#{JRUBY_VERSION}" if RUBY_PLATFORM == "java"
+ #puts "#{version}: duration: #{duration} / rate: #{count / duration} / iterations: #{count}"
+ #m = syslog3164_re.match(data)
+ end # def test_issue_47
+end # class GrokRegressionIssue47
3  test/ffi-ruby/run.sh
View
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:$PWD/../../" RUBYLIB="$PWD/../ext:$PWD/../lib" ruby "$@"
48 test/ffi-ruby/speedtest.rb
View
@@ -0,0 +1,48 @@
+#!/usr/bin/env ruby
+
+require 'rubygems'
+require 'grok'
+
+def main(args)
+
+ grok = Grok.new
+ grok.add_patterns_from_file("../../patterns/pure-ruby/base")
+ grok.compile("%{COMBINEDAPACHELOG}")
+
+ matches = 0
+ failures = 0
+ lines = File.new(args[0]).readlines
+ iterations = lines.length
+
+ #while lines.length < iterations
+ #lines += lines
+ #end
+ #lines = lines[0 .. iterations]
+
+ start = Time.now
+ lines.each do |line|
+ m = grok.match(line)
+ if m
+ matches += 1
+ m.captures
+ else
+ failures += 1
+ end
+ end
+ duration = Time.now - start
+
+ puts "Parse rate: #{iterations / duration}"
+ puts matches.inspect
+ puts failures.inspect
+end
+
+if ARGV.empty?
+ $stderr.puts "Usage: #{$0} access_log_path"
+ exit 1
+end
+
+threads = []
+1.upto(1) do |i|
+ threads << Thread.new { main(ARGV) }
+end
+threads.each(&:join)
17 test/pure-ruby/Makefile
View
@@ -0,0 +1,17 @@
+
+PLATFORM=$(shell (uname -o || uname -s) 2> /dev/null)
+
+ifeq ($(PLATFORM), Darwin)
+LIBSUFFIX=dylib
+else
+LIBSUFFIX=so
+endif
+
+.PHONY: test
+test:
+ $(MAKE) -C ../../ libgrok.$(LIBSUFFIX)
+ LD_LIBRARY_PATH="$${LD_LIBRARY_PATH}:$$PWD/../../" RUBYLIB="$$PWD/../lib" ruby alltests.rb
+
+test_jruby:
+ $(MAKE) -C ../../ libgrok.$(LIBSUFFIX)
+ LD_LIBRARY_PATH="$${LD_LIBRARY_PATH}:$$PWD/../../" RUBYLIB="$$PWD/../lib" jruby alltests.rb
9 test/pure-ruby/alltests.rb
View
@@ -0,0 +1,9 @@
+require 'test/unit'
+$: << File.join(File.dirname(__FILE__), "..", "..", "lib")
+$: << File.join(File.dirname(__FILE__))
+
+Dir["#{File.dirname(__FILE__)}/*/**/*.rb"].each do |file|
+ puts "Loading tests: #{file}"
+ load file
+end
+
58 test/pure-ruby/general/basic_test.rb
View
@@ -0,0 +1,58 @@
+$: << File.join(File.dirname(__FILE__), "..", "..", "..", "lib")
+require 'grok-pure'
+require 'test/unit'
+
+class GrokBasicTests < Test::Unit::TestCase
+ def setup
+ @grok = Grok.new
+ end
+
+ def test_grok_methods
+ assert_respond_to(@grok, :compile)
+ assert_respond_to(@grok, :match)
+ assert_respond_to(@grok, :expanded_pattern)
+ assert_respond_to(@grok, :pattern)
+ end
+
+ def test_grok_compile_fails_on_invalid_expressions
+ bad_regexps = ["[", "[foo", "?", "(?-"]
+ bad_regexps.each do |regexp|
+ assert_raise(RegexpError, "Should fail: /#{regexp}/") do
+ @grok.compile(regexp)
+ end
+ end
+ end
+
+ def test_grok_compile_succeeds_on_valid_expressions
+ good_regexps = ["[hello]", "(test)", "(?:hello)", "(?=testing)"]
+ good_regexps.each do |regexp|
+ assert_nothing_raised do
+ @grok.compile(regexp)
+ end
+ end
+ end
+
+ def test_grok_pattern_is_same_as_compile_pattern
+ pattern = "Hello world"
+ @grok.compile(pattern)
+ assert_equal(pattern, @grok.pattern)
+ end
+
+ # TODO(sissel): Move this test to a separate test suite aimed
+ # at testing grok internals
+ def test_grok_expanded_pattern_works_correctly
+ @grok.add_pattern("test", "hello world")
+ @grok.compile("%{test}")
+ assert_equal("(?<a0>hello world)", @grok.expanded_pattern)
+ end
+
+ def test_grok_load_patterns_from_file
+ require 'tempfile'
+ fd = Tempfile.new("grok_test_patterns.XXXXX")
+ fd.puts "TEST \\d+"
+ fd.close
+ @grok.add_patterns_from_file(fd.path)
+ @grok.compile("%{TEST}")
+ assert_equal("(?<a0>\\d+)", @grok.expanded_pattern)
+ end
+end
118 test/pure-ruby/general/captures_test.rb
View
@@ -0,0 +1,118 @@
+$: << File.join(File.dirname(__FILE__), "..", "..", "..", "lib")
+require 'grok-pure'
+require 'test/unit'
+
+class GrokPatternCapturingTests < Test::Unit::TestCase
+ def setup
+ @grok = Grok.new
+ end
+
+ def test_capture_methods
+ @grok.add_pattern("foo", ".*")
+ @grok.compile("%{foo}")
+ match = @grok.match("hello world")
+ assert_respond_to(match, :captures)
+ assert_respond_to(match, :start)
+ assert_respond_to(match, :end)
+ assert_respond_to(match, :subject)
+ assert_respond_to(match, :each_capture)
+ end
+
+ def test_basic_capture
+ @grok.add_pattern("foo", ".*")
+ @grok.compile("%{foo}")
+ input = "hello world"
+ match = @grok.match(input)
+ assert_equal("(?<a0>.*)", @grok.expanded_pattern)
+ assert_kind_of(Grok::Match, match)
+ assert_kind_of(Hash, match.captures)
+ assert_equal(match.captures.length, 1)
+ assert_kind_of(Array, match.captures["foo"])
+ assert_equal(1, match.captures["foo"].length)
+ assert_kind_of(String, match.captures["foo"][0])
+ assert_equal(input, match.captures["foo"][0])
+
+ match.each_capture do |key, val|
+ assert(key.is_a?(String), "Grok::Match::each_capture should yield string,string, got #{key.class.name} as first argument.")
+ assert(val.is_a?(String), "Grok::Match::each_capture should yield string,string, got #{key.class.name} as first argument.")
+ end
+
+ assert_kind_of(Fixnum, match.start)
+ assert_kind_of(Fixnum, match.end)
+ assert_kind_of(String, match.subject)
+ assert_equal(0, match.start,
+ "Match of /.*/, start should equal 0")
+ assert_equal(input.length, match.end,
+ "Match of /.*/, end should equal input string length")
+ assert_equal(input, match.subject)
+ end
+
+ def test_multiple_captures_with_same_name
+ @grok.add_pattern("foo", "\\w+")
+ @grok.compile("%{foo} %{foo}")
+ match = @grok.match("hello world")
+ assert_not_equal(false, match)
+ assert_equal(1, match.captures.length)
+ assert_equal(2, match.captures["foo"].length)
+ assert_equal("hello", match.captures["foo"][0])
+ assert_equal("world", match.captures["foo"][1])
+ end
+
+ def test_multiple_captures
+ @grok.add_pattern("foo", "\\w+")
+ @grok.add_pattern("bar", "\\w+")
+ @grok.compile("%{foo} %{bar}")
+ match = @grok.match("hello world")
+ assert_not_equal(false, match)
+ assert_equal(2, match.captures.length)
+ assert_equal(1, match.captures["foo"].length)
+ assert_equal(1, match.captures["bar"].length)
+ assert_equal("hello", match.captures["foo"][0])
+ assert_equal("world", match.captures["bar"][0])
+ end
+
+ def test_nested_captures
+ @grok.add_pattern("foo", "\\w+ %{bar}")
+ @grok.add_pattern("bar", "\\w+")
+ @grok.compile("%{foo}")
+ match = @grok.match("hello world")
+ assert_not_equal(false, match)
+ assert_equal(2, match.captures.length)
+ assert_equal(1, match.captures["foo"].length)
+ assert_equal(1, match.captures["bar"].length)
+ assert_equal("hello world", match.captures["foo"][0])
+ assert_equal("world", match.captures["bar"][0])
+ end
+
+ def test_nesting_recursion
+ @grok.add_pattern("foo", "%{foo}")
+ assert_raises(RuntimeError) do
+ @grok.compile("%{foo}")
+ end
+ end
+
+ def test_inline_define
+ path = "#{File.dirname(__FILE__)}/../../../patterns/pure-ruby/base"
+ @grok.add_patterns_from_file(path)
+ @grok.compile("%{foo=%{IP} %{BASE10NUM:fizz}}")
+ match = @grok.match("1.2.3.4 300.4425")
+ p match.captures
+ assert_equal(3, match.captures.length)
+ assert(match.captures.include?("foo"))
+ assert(match.captures.include?("IP"))
+ assert(match.captures.include?("BASE10NUM:fizz"))
+ end
+
+
+ def test_valid_capture_subnames
+ name = "foo"
+ @grok.add_pattern(name, "\\w+")
+ subname = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_abc:def"
+ @grok.compile("%{#{name}:#{subname}}")
+ match = @grok.match("hello")
+ assert_not_equal(false, match)
+ assert_equal(1, match.captures.length)
+ assert_equal(1, match.captures["#{name}:#{subname}"].length)
+ assert_equal("hello", match.captures["#{name}:#{subname}"][0])
+ end
+end
20,465 test/pure-ruby/logfile
View
20,465 additions, 0 deletions not shown
23 test/pure-ruby/patterns/day.rb
View
@@ -0,0 +1,23 @@
+$: << File.join(File.dirname(__FILE__), "..", "..", "..", "lib")
+require 'grok-pure'
+require 'test/unit'
+
+class DayPatternsTest < Test::Unit::TestCase
+ def setup
+ @grok = Grok.new
+ path = "#{File.dirname(__FILE__)}/../../../patterns/pure-ruby/base"
+ @grok.add_patterns_from_file(path)
+ @grok.compile("%{DAY}")
+ end
+
+ def test_days
+ days = %w{Mon Monday Tue Tuesday Wed Wednesday Thu Thursday Fri Friday
+ Sat Saturday Sun Sunday}
+ days.each do |day|
+ match = @grok.match(day)
+ assert_not_equal(false, day, "Expected #{day} to match.")
+ assert_equal(day, match.captures["DAY"][0])
+ end
+ end
+
+end
22 test/pure-ruby/patterns/host.rb
View
@@ -0,0 +1,22 @@
+$: << File.join(File.dirname(__FILE__), "..", "..", "..", "lib")
+require 'grok-pure'
+require 'test/unit'
+
+class HostPattternTest < Test::Unit::TestCase
+ def setup
+ @grok = Grok.new
+ path = "#{File.dirname(__FILE__)}/../../../patterns/pure-ruby/base"
+ @grok.add_patterns_from_file(path)
+ @grok.compile("%{HOSTNAME}")
+ end
+
+ def test_hosts
+ hosts = ["www.google.com", "foo-234.14.AAc5-2.foobar.net",
+ "192-455.a.b.c.d."]
+ hosts.each do |host|
+ match = @grok.match(host)
+ assert_not_equal(false, match, "Expected this to match: #{host}")
+ assert_equal(host, match.captures["HOSTNAME"][0])
+ end
+ end
+end
10,000 test/pure-ruby/patterns/ip.input
View
10,000 additions, 0 deletions not shown
32 test/pure-ruby/patterns/ip.rb
View
@@ -0,0 +1,32 @@
+$: << File.join(File.dirname(__FILE__), "..", "..", "..", "lib")
+require 'grok-pure'
+require 'test/unit'
+
+class IPPatternsTest < Test::Unit::TestCase
+ def setup
+ @grok = Grok.new
+ path = "#{File.dirname(__FILE__)}/../../../patterns/pure-ruby/base"
+ @grok.add_patterns_from_file(path)
+ end
+
+ def test_ips
+ @grok.compile("%{IP}")
+ File.open("#{File.dirname(__FILE__)}/ip.input").each do |line|
+ line.chomp!
+ match = @grok.match(line)
+ assert_not_equal(false, match)
+ assert_equal(line, match.captures["IP"][0])
+ end
+ end
+
+ def test_non_ips
+ @grok.compile("%{IP}")
+ nonips = %w{255.255.255.256 0.1.a.33 300.1.2.3 300 400.4.3.a 1.2.3.b
+ 1..3.4.5 hello world}
+ nonips << "hello world"
+ nonips.each do |input|
+ match = @grok.match(input)
+ assert_equal(false, match)
+ end
+ end
+end
69 test/pure-ruby/patterns/iso8601.rb
View
@@ -0,0 +1,69 @@
+$: << File.join(File.dirname(__FILE__), "..", "..", "..", "lib")
+require 'grok-pure'
+require 'test/unit'
+
+class ISO8601PatternsTest < Test::Unit::TestCase
+ def setup
+ @grok = Grok.new
+ path = "#{File.dirname(__FILE__)}/../../../patterns/pure-ruby/base"
+ @grok.add_patterns_from_file(path)
+ @grok.compile("^%{TIMESTAMP_ISO8601}$")
+ end
+
+ def test_iso8601
+ times = [
+ "2001-01-01T00:00:00",
+ "1974-03-02T04:09:09",
+ "2010-05-03T08:18:18+00:00",
+ "2004-07-04T12:27:27-00:00",
+ "2001-09-05T16:36:36+0000",
+ "2001-11-06T20:45:45-0000",
+ "2001-12-07T23:54:54Z",
+ "2001-01-01T00:00:00.123456",
+ "1974-03-02T04:09:09.123456",
+ "2010-05-03T08:18:18.123456+00:00",
+ "2004-07-04T12:27:27.123456-00:00",
+ "2001-09-05T16:36:36.123456+0000",
+ "2001-11-06T20:45:45.123456-0000",
+ "2001-12-07T23:54:54.123456Z",
+ "2001-12-07T23:54:60.123456Z", # '60' second is a leap second.
+ ]
+ times.each do |time|
+ match = @grok.match(time)
+ assert_not_equal(false, match, "Expected #{time} to match TIMESTAMP_ISO8601")
+ assert_equal(time, match.captures["TIMESTAMP_ISO8601"][0])
+ end
+ end
+
+ def test_iso8601_nomatch
+ times = [
+ "2001-13-01T00:00:00", # invalid month
+ "2001-00-01T00:00:00", # invalid month
+ "2001-01-00T00:00:00", # invalid day
+ "2001-01-32T00:00:00", # invalid day
+ "2001-01-aT00:00:00", # invalid day
+ "2001-01-1aT00:00:00", # invalid day
+ "2001-01-01Ta0:00:00", # invalid hour
+ "2001-01-01T0:00:00", # invalid hour
+ "2001-01-01T25:00:00", # invalid hour
+ "2001-01-01T01:60:00", # invalid minute
+ "2001-01-01T00:aa:00", # invalid minute
+ "2001-01-01T00:00:aa", # invalid second
+ "2001-01-01T00:00:-1", # invalid second
+ "2001-01-01T00:00:61", # invalid second
+ "2001-01-01T00:00:00A", # invalid timezone
+ "2001-01-01T00:00:00+", # invalid timezone
+ "2001-01-01T00:00:00+25", # invalid timezone
+ "2001-01-01T00:00:00+2500", # invalid timezone
+ "2001-01-01T00:00:00+25:00", # invalid timezone
+ "2001-01-01T00:00:00-25", # invalid timezone
+ "2001-01-01T00:00:00-2500", # invalid timezone
+ "2001-01-01T00:00:00-00:61", # invalid timezone
+ ]
+ times.each do |time|
+ match = @grok.match(time)
+ assert_equal(false, match, "Expected #{time} to not match TIMESTAMP_ISO8601")
+ end
+ end
+
+end
25 test/pure-ruby/patterns/month.rb
View
@@ -0,0 +1,25 @@
+$: << File.join(File.dirname(__FILE__), "..", "..", "..", "lib")
+require 'grok-pure'
+require 'test/unit'
+
+class MonthPatternsTest < Test::Unit::TestCase
+ def setup
+ @grok = Grok.new
+ path = "#{File.dirname(__FILE__)}/../../../patterns/pure-ruby/base"
+ @grok.add_patterns_from_file(path)
+ @grok.compile("%{MONTH}")
+ end
+
+ def test_months
+ months = ["Jan", "January", "Feb", "February", "Mar", "March", "Apr",
+ "April", "May", "Jun", "June", "Jul", "July", "Aug", "August",
+ "Sep", "September", "Oct", "October", "Nov", "November", "Dec",
+ "December"]
+ months.each do |month|
+ match = @grok.match(month)
+ assert_not_equal(false, match, "Expected #{month} to match")
+ assert_equal(month, match.captures["MONTH"][0])
+ end
+ end
+
+end
70 test/pure-ruby/patterns/number.rb
View
@@ -0,0 +1,70 @@
+$: << File.join(File.dirname(__FILE__), "..", "..", "..", "lib")
+require 'grok-pure'
+require 'test/unit'
+
+class NumberPatternsTest < Test::Unit::TestCase
+ def setup
+ @grok = Grok.new
+ path = "#{File.dirname(__FILE__)}/../../../patterns/pure-ruby/base"
+ @grok.add_patterns_from_file(path)
+ end
+
+ def test_match_number
+ @grok.compile("%{NUMBER}")
+ # step of a prime number near 100 so we get about 2000 iterations
+ #puts @grok.expanded_pattern.inspect
+ -100000.step(100000, 97) do |value|
+ match = @grok.match(value.to_s)
+ assert_not_equal(false, match, "#{value} should not match false")
+ assert_equal(value.to_s, match.captures["NUMBER"][0])
+ end
+ end
+
+ def test_match_number_float
+ # generate some random floating point values
+ # always seed with the same random number, so the test is always the same
+ srand(0)
+ @grok.compile("%{NUMBER}")
+ 0.upto(1000) do |value|
+ value = (rand * 100000 - 50000).to_s
+ match = @grok.match(value)
+ assert_not_equal(false, match)
+ assert_equal(value, match.captures["NUMBER"][0])
+ end
+ end
+
+ def test_match_number_amid_things
+ @grok.compile("%{NUMBER}")
+ value = "hello 12345 world"
+ match = @grok.match(value)
+ assert_not_equal(false, match)
+ assert_equal("12345", match.captures["NUMBER"][0])
+
+ value = "Something costs $55.4!"
+ match = @grok.match(value)
+ assert_not_equal(false, match)
+ assert_equal("55.4", match.captures["NUMBER"][0])
+ end
+
+ def test_no_match_number
+ @grok.compile("%{NUMBER}")
+ ["foo", "", " ", ".", "hello world", "-abcd"].each do |value|
+ match = @grok.match(value.to_s)
+ assert_equal(false, match)
+ end
+ end
+
+ def test_match_base16num
+ @grok.compile("%{BASE16NUM}")
+ # Ruby represents negative values in a strange way, so only
+ # test positive numbers for now.
+ # I don't think anyone uses negative values in hex anyway...
+ 0.upto(1000) do |value|
+ [("%x" % value), ("0x%08x" % value), ("%016x" % value)].each do |hexstr|
+ match = @grok.match(hexstr)
+ assert_not_equal(false, match)
+ assert_equal(hexstr, match.captures["BASE16NUM"][0])
+ end
+ end
+ end
+end
32 test/pure-ruby/patterns/path.rb
View
@@ -0,0 +1,32 @@
+$: << File.join(File.dirname(__FILE__), "..", "..", "..", "lib")
+require 'grok-pure'
+require 'test/unit'
+
+class PathPatternsTest < Test::Unit::TestCase
+ def setup
+ @grok = Grok.new
+ path = "#{File.dirname(__FILE__)}/../../../patterns/pure-ruby/base"
+ @grok.add_patterns_from_file(path)
+ @grok.compile("%{PATH}")
+ end
+
+ def test_unix_paths
+ paths = %w{/ /usr /usr/bin /usr/bin/foo /etc/motd /home/.test
+ /foo/bar//baz //testing /.test /%foo% /asdf/asdf,v}
+ paths.each do |path|
+ match = @grok.match(path)
+ assert_not_equal(false, match)
+ assert_equal(path, match.captures["PATH"][0])
+ end
+ end
+
+ def test_windows_paths
+ paths = %w{C:\WINDOWS \\\\Foo\bar \\\\1.2.3.4\C$ \\\\some\path\here.exe}
+ paths << "C:\\Documents and Settings\\"
+ paths.each do |path|
+ match = @grok.match(path)
+ assert_not_equal(false, match, "Expected #{path} to match, but it didn't.")
+ assert_equal(path, match.captures["PATH"][0])
+ end
+ end
+end
21 test/pure-ruby/patterns/prog.rb
View
@@ -0,0 +1,21 @@
+require 'grok-pure'
+require 'test/unit'
+
+class ProgPatternsTest < Test::Unit::TestCase
+ def setup
+ @grok = Grok.new
+ path = "#{File.dirname(__FILE__)}/../../../patterns/pure-ruby/base"
+ @grok.add_patterns_from_file(path)
+ @grok.compile("^%{PROG}$")
+ end
+
+ def test_progs
+ progs = %w{kernel foo-bar foo_bar foo/bar/baz}
+ progs.each do |prog|
+ match = @grok.match(prog)
+ assert_not_equal(false, prog, "Expected #{prog} to match.")
+ assert_equal(prog, match.captures["PROG"][0], "Expected #{prog} to match capture.")
+ end
+ end
+
+end
54 test/pure-ruby/patterns/quotedstring.rb
View
@@ -0,0 +1,54 @@
+#require 'rubygems'
+require 'grok-pure'
+require 'test/unit'
+
+class QuotedStringPatternsTest < Test::Unit::TestCase
+ def setup
+ @grok = Grok.new
+ path = "#{File.dirname(__FILE__)}/../../../patterns/pure-ruby/base"
+ @grok.add_patterns_from_file(path)
+ end
+
+ def test_quoted_string_common
+ @grok.compile("%{QUOTEDSTRING}")
+ inputs = ["hello", ""]
+ quotes = %w{" ' `}
+ inputs.each do |value|
+ quotes.each do |quote|
+ str = "#{quote}#{value}#{quote}"
+ match = @grok.match(str)
+ assert_not_equal(false, match)
+ assert_equal(str, match.captures["QUOTEDSTRING"][0])
+ end
+ end
+ end
+
+ def test_quoted_string_inside_escape
+ @grok.compile("%{QUOTEDSTRING}")
+ quotes = %w{" ' `}
+ quotes.each do |quote|
+ str = "#{quote}hello \\#{quote}world\\#{quote}#{quote}"
+ match = @grok.match(str)
+ assert_not_equal(false, match)
+ assert_equal(str, match.captures["QUOTEDSTRING"][0])
+ end
+ end
+
+ def test_escaped_quotes_no_match_quoted_string
+ @grok.compile("%{QUOTEDSTRING}")
+ inputs = ["\\\"testing\\\"", "\\\'testing\\\'", "\\\`testing\\\`",]
+ inputs.each do |value|
+ match = @grok.match(value)
+ assert_equal(false, match)
+ end
+ end
+
+ def test_non_quoted_strings_no_match
+ @grok.compile("%{QUOTEDSTRING}")
+ inputs = ["\\\"testing", "testing", "hello world ' something ` foo"]
+ inputs.each do |value|
+ match = @grok.match(value)
+ assert_equal(false, match)
+ end
+ end
+end
46 test/pure-ruby/patterns/uri.rb
View
@@ -0,0 +1,46 @@
+require 'grok-pure'
+require 'test/unit'
+
+class URIPatternsTest < Test::Unit::TestCase
+ def setup
+ @grok = Grok.new
+ path = "#{File.dirname(__FILE__)}/../../../patterns/pure-ruby/base"
+ @grok.add_patterns_from_file(path)
+ @grok.compile("%{URI}")
+ end
+
+ def test_urls
+ urls = ["http://www.google.com", "telnet://helloworld",
+ "http://www.example.com/", "http://www.example.com/test.html",
+ "http://www.example.com/test.html?foo=bar",
+ "http://www.example.com/test.html?foo=bar&fizzle=baz",
+ "http://www.example.com:80/test.html?foo=bar&fizzle=baz",
+ "https://www.example.com:443/test.html?foo=bar&fizzle=baz",
+ "https://user@www.example.com:443/test.html?foo=bar&fizzle=baz",
+ "https://user:pass@somehost/fetch.pl",
+ "puppet:///",
+ "http://www.foo.com",
+ "http://www.foo.com/",
+ "http://www.foo.com/?testing",
+ "http://www.foo.com/?one=two",
+ "http://www.foo.com/?one=two&foo=bar",
+ "foo://somehost.com:12345",
+ "foo://user@somehost.com:12345",
+ "foo://user@somehost.com:12345/",
+ "foo://user@somehost.com:12345/foo.bar/baz/fizz",
+ "foo://user@somehost.com:12345/foo.bar/baz/fizz?test",
+ "foo://user@somehost.com:12345/foo.bar/baz/fizz?test=1&sink&foo=4",
+ "http://www.google.com/search?hl=en&source=hp&q=hello+world+%5E%40%23%24&btnG=Google+Search",
+ "http://www.freebsd.org/cgi/url.cgi?ports/sysutils/grok/pkg-descr",
+ "http://www.google.com/search?q=CAPTCHA+ssh&start=0&ie=utf-8&oe=utf-8&client=firefox-a&rls=org.mozilla:en-US:official",
+ "svn+ssh://somehost:12345/testing",
+ ]
+
+ urls.each do |url|
+ match = @grok.match(url)
+ assert_not_equal(false, match, "Expected this to match: #{url}")
+ assert_equal(url, match.captures["URI"][0])
+ end
+ end
+
+end