Skip to content
Browse files

fuzzy matching support

  • Loading branch information...
1 parent e729c26 commit 613a629c8110f4f0b3ee4bbc1a46a78e823647c1 @igrigorik igrigorik committed Dec 30, 2009
Showing with 45 additions and 6 deletions.
  1. +8 −0 README.rdoc
  2. +11 −1 lib/textquery/textquery.rb
  3. +26 −5 spec/textquery_spec.rb
View
8 README.rdoc
@@ -9,6 +9,7 @@ Textquery is a simple PEG grammar with support for:
- OR
- NOT (- is an alias)
- 'quoted strings'
+- fuzzy matching
== Example
@@ -25,6 +26,13 @@ Textquery is a simple PEG grammar with support for:
q.match?("b") # => false
q.match?("a b cdefg") # => true
+ TextQuery.new("a~").match?("adf") # => true
+ TextQuery.new("~a").match?("dfa") # => true
+ TextQuery.new("~a~").match?("daf") # => true
+ TextQuery.new("2~a~1").match?("edaf") # => true
+ TextQuery.new("2~a~2").match?("edaf") # => false
+
+
== License
(The MIT License)
View
12 lib/textquery/textquery.rb
@@ -8,7 +8,17 @@
class WordMatch < Treetop::Runtime::SyntaxNode
def eval(text, opt)
- not text.match("^#{query}#{opt[:delim]}|#{opt[:delim]}#{query}#{opt[:delim]}|#{opt[:delim]}#{query}$|^#{query}$").nil?
+ fuzzy = query.match(/(\d)*(~)?([^~]+)(~)?(\d)*$/)
+
+ q = []
+ q.push "." if fuzzy[2]
+ q.push fuzzy[1].nil? ? "*" : "{#{fuzzy[1]}}" if fuzzy[2]
+ q.push fuzzy[3]
+ q.push "." if fuzzy[4]
+ q.push fuzzy[5].nil? ? "*" : "{#{fuzzy[5]}}" if fuzzy[4]
+ q = q.join
+
+ not text.match("^#{q}#{opt[:delim]}|#{opt[:delim]}#{q}#{opt[:delim]}|#{opt[:delim]}#{q}$|^#{q}$").nil?
end
def query
View
31 spec/textquery_spec.rb
@@ -160,14 +160,35 @@ def parse(input)
q.eval("a b cdefg").should be_true
end
+ it "should support fuzzy matching" do
+ parse("a~").eval("adf").should be_true
+ parse("~a").eval("dfa").should be_true
+ parse("~a~").eval("daf").should be_true
+
+ parse("1~a~1").eval("daf").should be_true
+ parse("2~a~1").eval("daf").should be_false
+ parse("1~a~2").eval("daf").should be_false
+
+ parse("~a~3").eval("daffy").should be_true
+ parse("a~1").eval("adf").should be_false
+
+ parse("a~1 AND b").eval("adf b").should be_false
+ parse("a~2 AND b").eval("adf b").should be_true
+ parse("a~3 AND b").eval("adf b").should be_false
+ end
+
it "should work on CJK text" do
- JP = "に入れるわけにはいかないので、プラグインの出力が同一であることでもって同一性を判定する"
+ JP = "仕様変更は出し尽くしてしまいß"
- q = TextQuery.new("に入".mb_chars, :delim => '')
+ q = TextQuery.new("変更", :delim => '')
q.eval(JP).should be_true
- q.eval("けにはい").should be_false
+ q.eval("変ま").should be_false
+ q.parse("は出").eval(JP).should be_true
- q.parse("れるわ AND が同".mb_chars).eval(JP).should be_true
- q.parse("れるわ AND NOT す".mb_chars).eval(JP).should be_false
+ q = TextQuery.new
+ q.parse("~出~").eval(JP).should be_true
+ q.parse("~出~ AND NOT ~尽~").eval(JP).should be_false
+ q.parse("~更は出~ OR ~尽く~").eval(JP).should be_true
end
+
end

0 comments on commit 613a629

Please sign in to comment.
Something went wrong with that request. Please try again.