From eb9eabad8ad718bb031ff492254125ab90d73ad9 Mon Sep 17 00:00:00 2001 From: Andrew Cholakian Date: Mon, 1 Aug 2016 12:14:28 -0500 Subject: [PATCH] Add new match_direct method for optimized use by outside libs Bump benchmark pattern test to use 10m matches to even out inconsistencies --- CHANGELIST | 3 +++ grok.gemspec | 2 +- lib/grok-pure.rb | 23 ++++++++++++++++++----- lib/grok/pure/match.rb | 2 +- test/pure-ruby/benchmark_pattern.rb | 8 ++++---- 5 files changed, 27 insertions(+), 11 deletions(-) diff --git a/CHANGELIST b/CHANGELIST index e07929b..f4941db 100644 --- a/CHANGELIST +++ b/CHANGELIST @@ -1,3 +1,6 @@ +* 0.11.3 + - Add optimized match_direct method + * 0.10.7 - add Grok::PatternError exception class - raise PatternError when a pattern contains an unknown %{expansion} diff --git a/grok.gemspec b/grok.gemspec index 99c25e3..0fdb11d 100644 --- a/grok.gemspec +++ b/grok.gemspec @@ -5,7 +5,7 @@ Gem::Specification.new do |spec| #svnrev = %x{svn info}.split("\n").grep(/Revision:/).first.split(" ").last.to_i spec.name = "jls-grok" - spec.version = "0.11.2" + spec.version = "0.11.3" spec.summary = "grok bindings for ruby" spec.description = "Grok ruby bindings - pattern match/extraction tool" diff --git a/lib/grok-pure.rb b/lib/grok-pure.rb index 116eaff..8edfccd 100644 --- a/lib/grok-pure.rb +++ b/lib/grok-pure.rb @@ -1,4 +1,4 @@ -require "rubygems" + require "rubygems" require "logger" require "cabin" require "grok/pure/discovery" @@ -172,21 +172,34 @@ def match(text) end end # def match + # Returns the matched regexp object directly for performance at the + # cost of usability. + # + # Returns MatchData on success, nil on failure. + # + # Can be used with #capture + def execute(text) + @regexp.match(text) + end + # Optimized match and capture instead of calling them separately + # This could be DRYed up by using #match and #capture directly + # but there's a bit of a worry that that may lower perf. + # This should be benchmarked! def match_and_capture(text) - match = @regexp.match(text) + match = execute(text) if match @logger.debug? and @logger.debug("Regexp match object", :names => match.names, :captures => match.captures) - @captures_func.call(match) { |k,v| yield k,v } + capture(match) {|k,v| yield k,v} return true else return false end end # def match_and_capture - def capture(match, block) - @captures_func.call(match) { |k,v| block.call k,v } + def capture(match, &block) + @captures_func.call(match,&block) end # def capture public diff --git a/lib/grok/pure/match.rb b/lib/grok/pure/match.rb index c85a027..14c663d 100644 --- a/lib/grok/pure/match.rb +++ b/lib/grok/pure/match.rb @@ -12,7 +12,7 @@ def initialize public def each_capture(&block) - @grok.capture(@match, block) + @grok.capture(@match, &block) end # def each_capture public diff --git a/test/pure-ruby/benchmark_pattern.rb b/test/pure-ruby/benchmark_pattern.rb index 47115fe..5a2ae9d 100644 --- a/test/pure-ruby/benchmark_pattern.rb +++ b/test/pure-ruby/benchmark_pattern.rb @@ -14,16 +14,16 @@ def init_grok(named_captures_only) end Benchmark.bmbm(10) do |bm| - bm.report("100k Named Captures On") do + bm.report("10m Named Captures On") do grok = init_grok(true) - (1..100000).each do + (1..10_000_000).each do match = grok.match(@log_line) match.each_capture { |name, val| } end end - bm.report("100k Named Captures Off") do + bm.report("10m Named Captures Off") do grok = init_grok(false) - (1..100000).each do + (1..10_000_000).each do match = grok.match(@log_line) match.each_capture { |name, val| } end