Permalink
Browse files

- split the ruby grok bits into a separate repo

  • Loading branch information...
1 parent a615285 commit f8892b5067a354897ccf3e9297943959223bcb6e @jordansissel committed Dec 7, 2011
Showing with 42,734 additions and 0 deletions.
  1. +10 −0 Rakefile
  2. +131 −0 examples/grok-web.rb
  3. +39 −0 examples/pattern-discovery.rb
  4. +30 −0 examples/test.rb
  5. +25 −0 grok.gemspec
  6. +3 −0 lib/Grok.rb
  7. +145 −0 lib/grok-pure.rb
  8. +133 −0 lib/grok.rb
  9. +80 −0 lib/grok/c-ext/match.rb
  10. +56 −0 lib/grok/c-ext/pile.rb
  11. +45 −0 lib/grok/pure/match.rb
  12. +71 −0 lib/grok/pure/pile.rb
  13. +90 −0 patterns/pure-ruby/base
  14. +3 −0 patterns/pure-ruby/java
  15. +3 −0 patterns/pure-ruby/ruby
  16. +32 −0 sample.rb
  17. +19 −0 test/Makefile
  18. +17 −0 test/ffi-ruby/Makefile
  19. +8 −0 test/ffi-ruby/alltests.rb
  20. +58 −0 test/ffi-ruby/general/basic_test.rb
  21. +105 −0 test/ffi-ruby/general/captures_test.rb
  22. +23 −0 test/ffi-ruby/patterns/day.rb
  23. +31 −0 test/ffi-ruby/patterns/host.rb
  24. +10,000 −0 test/ffi-ruby/patterns/ip.input
  25. +32 −0 test/ffi-ruby/patterns/ip.rb
  26. +69 −0 test/ffi-ruby/patterns/iso8601.rb
  27. +25 −0 test/ffi-ruby/patterns/month.rb
  28. +70 −0 test/ffi-ruby/patterns/number.rb
  29. +32 −0 test/ffi-ruby/patterns/path.rb
  30. +21 −0 test/ffi-ruby/patterns/prog.rb
  31. +54 −0 test/ffi-ruby/patterns/quotedstring.rb
  32. +46 −0 test/ffi-ruby/patterns/uri.rb
  33. +65 −0 test/ffi-ruby/regression/grokmatch-subject-garbagecollected-early.rb
  34. +3 −0 test/ffi-ruby/run.sh
  35. +48 −0 test/ffi-ruby/speedtest.rb
  36. +17 −0 test/pure-ruby/Makefile
  37. +9 −0 test/pure-ruby/alltests.rb
  38. +58 −0 test/pure-ruby/general/basic_test.rb
  39. +118 −0 test/pure-ruby/general/captures_test.rb
  40. +20,465 −0 test/pure-ruby/logfile
  41. +23 −0 test/pure-ruby/patterns/day.rb
  42. +22 −0 test/pure-ruby/patterns/host.rb
  43. +10,000 −0 test/pure-ruby/patterns/ip.input
  44. +32 −0 test/pure-ruby/patterns/ip.rb
  45. +69 −0 test/pure-ruby/patterns/iso8601.rb
  46. +25 −0 test/pure-ruby/patterns/month.rb
  47. +70 −0 test/pure-ruby/patterns/number.rb
  48. +32 −0 test/pure-ruby/patterns/path.rb
  49. +21 −0 test/pure-ruby/patterns/prog.rb
  50. +54 −0 test/pure-ruby/patterns/quotedstring.rb
  51. +46 −0 test/pure-ruby/patterns/uri.rb
  52. +3 −0 test/pure-ruby/run.sh
  53. +48 −0 test/pure-ruby/speedtest.rb
View
@@ -0,0 +1,10 @@
+task :default => [:package]
+
+task :package do
+ system("gem build grok.gemspec")
+end
+
+task :publish do
+ latest_gem = %x{ls -t jls-grok*.gem}.split("\n").first
+ system("gem push #{latest_gem}")
+end
View
@@ -0,0 +1,131 @@
+#!/usr/bin/env ruby
+#
+# Simple web application that will let you feed grok's discovery feature
+# a bunch of data, and grok will show you patterns found and the results
+# of that pattern as matched against the same input.
+
+require 'rubygems'
+require 'sinatra'
+require 'grok'
+
+get '/' do
+ redirect "/demo/grok-discover/index"
+end
+
+get "/demo/grok-discover/index" do
+ haml :index
+end
+
+post "/demo/grok-discover/grok" do
+ grok = Grok.new
+ grok.add_patterns_from_file("/usr/local/share/grok/patterns/base")
+ @results = []
+ params[:data].split("\n").each do |line|
+ pattern = grok.discover(line)
+ grok.compile(pattern)
+ match = grok.match(line)
+ puts "Got input: #{line}"
+ puts " => pattern: (#{match != false}) #{pattern}"
+ @results << {
+ :input => line,
+ :pattern => grok.pattern.gsub(/\\Q|\\E/, ""),
+ :full_pattern => grok.expanded_pattern,
+ :match => (match and match.captures or false),
+ }
+ end
+ haml :grok
+end
+
+get "/demo/grok-discover/style.css" do
+ sass :style
+end
+
+__END__
+@@ style
+h1
+ color: red
+.original
+.regexp
+ display: block
+ border: 1px solid grey
+ padding: 1em
+
+.results
+ width: 80%
+ margin-left: auto
+ th
+ text-align: left
+ td
+ border-top: 1px solid black
+@@ layout
+%html
+ %head
+ %title Grok Web
+ %link{:rel => "stylesheet", :href => "/demo/grok-discover/style.css"}
+ %body
+ =yield
+
+@@ index
+#header
+ %h1 Grok Web
+#content
+ Paste some log data below. I'll do my best to have grok generate a pattern for you.
+
+ %p
+ Learn more about grok here:
+ %a{:href => "http://code.google.com/p/semicomplete/wiki/Grok"} Grok
+
+ %p
+ This is running off of my cable modem for now, so if it's sluggish, that's
+ why. Be gentle.
+ %form{:action => "/demo/grok-discover/grok", :method => "post"}
+ %textarea{:name => "data", :rows => 10, :cols => 80}
+ %br
+ %input{:type => "submit", :value=>"submit"}
+
+@@ grok
+#header
+ %h1 Grok Results
+ %h3
+ %a{:href => "/demo/grok-discover/index"} Try more?
+#content
+ %p
+ Below is grok's analysis of the data you provided. Each line is analyzed
+ separately. It uses grok's standard library of known patterns to give you a
+ pattern that grok can use to match more logs like the lines you provided.
+ %p
+ The results may not be perfect, but it gives you a head start on coming up with
+ log patterns for
+ %a{:href => "http://code.google.com/p/semicomplete/wiki/Grok"} grok
+ and
+ %a{:href => "http://code.google.com/p/logstash/"} logstash
+ %ol
+ - @results.each do |result|
+ %li
+ %p.original
+ %b Original:
+ %br= result[:input]
+ %p
+ %b Pattern:
+ %br
+ %span.pattern= result[:pattern]
+ %p
+ %b
+ Generated Regular Expression
+ %small
+ %i You could have written this by hand, be glad you didn't have to.
+ %code.regexp= result[:full_pattern].gsub("<", "&lt;")
+ %p
+ If you wanted to test this, you can paste the above expression into
+ pcretest(1) and it should match your input.
+ %p
+ %b Capture Results
+ %table.results
+ %tr
+ %th Name
+ %th Value
+ - result[:match].each do |key,val|
+ - val.each do |v|
+ %tr
+ %td= key
+ %td= v
@@ -0,0 +1,39 @@
+#!/usr/bin/env ruby
+#
+
+require "rubygems"
+require "grok"
+require "pp"
+
+grok = Grok.new
+
+# Load some default patterns that ship with grok.
+# See also:
+# http://code.google.com/p/semicomplete/source/browse/grok/patterns/base
+grok.add_patterns_from_file("/usr/local/share/grok/patterns/base")
+
+# Using the patterns we know, try to build a grok pattern that best matches
+# a string we give. Let's try Time.now.to_s, which has this format;
+# => Fri Apr 16 19:15:27 -0700 2010
+input = "Time is #{Time.now}"
+pattern = grok.discover(input)
+
+puts "Input: #{input}"
+puts "Pattern: #{pattern}"
+grok.compile(pattern)
+
+# Sleep to change time.
+puts "Sleeping so time changes and we can test against another input."
+sleep(2)
+match = grok.match("Time is #{Time.now.to_s}")
+puts "Resulting capture:"
+pp match.captures
+
+# When run, the output should look something like this:
+# % ruby pattern-discovery.rb
+# Pattern: Time is Fri %{SYSLOGDATE} %{BASE10NUM} 2010
+# {"BASE10NUM"=>["-0700"],
+# "SYSLOGDATE"=>["Apr 16 19:17:38"],
+# "TIME"=>["19:17:38"],
+# "MONTH"=>["Apr"],
+# "MONTHDAY"=>["16"]}
View
@@ -0,0 +1,30 @@
+#!/usr/bin/env ruby
+#
+
+require "rubygems"
+require "grok"
+require "pp"
+
+grok = Grok.new
+
+# Load some default patterns that ship with grok.
+# See also:
+# http://code.google.com/p/semicomplete/source/browse/grok/patterns/base
+grok.add_patterns_from_file("../..//patterns/base")
+
+# Using the patterns we know, try to build a grok pattern that best matches
+# a string we give. Let's try Time.now.to_s, which has this format;
+# => Fri Apr 16 19:15:27 -0700 2010
+input = "2010-04-18T15:06:02Z"
+pattern = "%{TIMESTAMP_ISO8601}"
+grok.compile(pattern)
+grok.compile(pattern)
+puts "Input: #{input}"
+puts "Pattern: #{pattern}"
+puts "Full: #{grok.expanded_pattern}"
+
+match = grok.match(input)
+if match
+ puts "Resulting capture:"
+ pp match.captures
+end
View
@@ -0,0 +1,25 @@
+Gem::Specification.new do |spec|
+ files = Dir.glob("lib/**/*.rb")
+ files + Dir.glob("patterns/**")
+ files + Dir.glob("test/")
+
+ #svnrev = %x{svn info}.split("\n").grep(/Revision:/).first.split(" ").last.to_i
+ spec.name = "jls-grok"
+ spec.version = "0.9.2"
+
+ spec.summary = "grok bindings for ruby"
+ spec.description = "Grok ruby bindings - pattern match/extraction tool"
+ spec.files = files
+
+ # TODO(sissel): ffi is now optional, get rid of it?
+ #spec.add_dependency("ffi", "> 0.6.3")
+ spec.require_paths << "lib"
+
+ # Cabin for logging.
+ spec.add_dependency("cabin")
+
+ spec.authors = ["Jordan Sissel", "Pete Fritchman"]
+ spec.email = ["jls@semicomplete.com", "petef@databits.net"]
+ spec.homepage = "http://code.google.com/p/semicomplete/wiki/Grok"
+end
+
View
@@ -0,0 +1,3 @@
+require "grok.rb"
+
+# compat for when grok was Grok.so
View
@@ -0,0 +1,145 @@
+require "rubygems"
+require "logger"
+require "cabin"
+
+# TODO(sissel): Check if 'grok' c-ext has been loaded and abort?
+class Grok
+ attr_accessor :pattern
+ attr_accessor :expanded_pattern
+ attr_accessor :logger
+
+ PATTERN_RE = \
+ /%\{ # match '%{' not prefixed with '\'
+ (?<name> # match the pattern name
+ (?<pattern>[A-z0-9]+)
+ (?::(?<subname>[A-z0-9_:]+))?
+ )
+ (?:=(?<definition>
+ (?:
+ (?:[^{}\\]+|\\.+)+
+ |
+ (?<curly>\{(?:(?>[^{}]+|(?>\\[{}])+)|(\g<curly>))*\})+
+ )+
+ ))?
+ [^}]*
+ \}/x
+
+ GROK_OK = 0
+ GROK_ERROR_FILE_NOT_ACCESSIBLE = 1
+ GROK_ERROR_PATTERN_NOT_FOUND = 2
+ GROK_ERROR_UNEXPECTED_READ_SIZE = 3
+ GROK_ERROR_COMPILE_FAILED = 4
+ GROK_ERROR_UNINITIALIZED = 5
+ GROK_ERROR_PCRE_ERROR = 6
+ GROK_ERROR_NOMATCH = 7
+
+ public
+ def initialize
+ @patterns = {}
+ @logger = Cabin::Channel.new
+ @logger.subscribe(Logger.new(STDOUT))
+ @logger.level = :warn
+
+ # TODO(sissel): Throw exception if we aren't using Ruby 1.9.2 or newer.
+ end # def initialize
+
+ public
+ def add_pattern(name, pattern)
+ @logger.info("Adding pattern", name => pattern)
+ @patterns[name] = pattern
+ return nil
+ end
+
+ public
+ def add_patterns_from_file(path)
+ file = File.new(path, "r")
+ file.each do |line|
+ next if line =~ /^\s*#/
+ name, pattern = line.gsub(/^\s*/, "").split(/\s+/, 2)
+ next if pattern.nil?
+ add_pattern(name, pattern.chomp)
+ end
+ return nil
+ end # def add_patterns_from_file
+
+ public
+ def compile(pattern)
+ @capture_map = {}
+
+ iterations_left = 100
+ @pattern = pattern
+ @expanded_pattern = pattern
+ index = 0
+
+ # Replace any instances of '%{FOO}' with that pattern.
+ loop do
+ if iterations_left == 0
+ raise "Deep recursion pattern compilation of #{pattern.inspect} - expanded: #{@expanded_pattern.inspect}"
+ end
+ iterations_left -= 1
+ m = PATTERN_RE.match(@expanded_pattern)
+ break if !m
+
+ if m["definition"]
+ add_pattern(m["pattern"], m["definition"])
+ end
+
+ if @patterns.include?(m["pattern"])
+ # create a named capture index that we can push later as the named
+ # pattern. We do this because ruby regexp can't capture something
+ # by the same name twice.
+ p = @patterns[m["pattern"]]
+
+ capture = "a#{index}" # named captures have to start with letters?
+ #capture = "%04d" % "#{index}" # named captures have to start with letters?
+ replacement_pattern = "(?<#{capture}>#{p})"
+ #p(:input => m[0], :pattern => replacement_pattern)
+ @capture_map[capture] = m["name"]
+ @expanded_pattern.sub!(m[0], replacement_pattern)
+ index += 1
+ end
+ end
+
+ @regexp = Regexp.new(@expanded_pattern)
+ @logger.debug("Grok compiled OK", :pattern => pattern,
+ :expanded_pattern => @expanded_pattern)
+ end # def compile
+
+ public
+ def match(text)
+ match = @regexp.match(text)
+
+ if match
+ grokmatch = Grok::Match.new
+ grokmatch.subject = text
+ grokmatch.start, grokmatch.end = match.offset(0)
+ grokmatch.grok = self
+ grokmatch.match = match
+ @logger.debug("Regexp match object", :names => match.names, :captures => match.captures)
+ return grokmatch
+ else
+ return false
+ end
+ end # def match
+
+ public
+ def discover(input)
+ init_discover if @discover == nil
+
+ return @discover.discover(input)
+ end # def discover
+
+ private
+ def init_discover
+ @discover = GrokDiscover.new(self)
+ @discover.logmask = logmask
+ end # def init_discover
+
+ public
+ def capture_name(id)
+ return @capture_map[id]
+ end # def capture_name
+end # Grok
+
+require "grok/pure/match"
+require "grok/pure/pile"
Oops, something went wrong.

0 comments on commit f8892b5

Please sign in to comment.