Skip to content
Newer
Older
100644 285 lines (223 sloc) 10.9 KB
1d92a72 @igrigorik 1.9 friendly.. and ~55% faster
authored Sep 4, 2010
1 # encoding: UTF-8
2
0ba21d1 @igrigorik use rspec
authored Dec 4, 2010
3 require "rspec"
8af2e09 @igrigorik wrapper for grammar API
authored Dec 28, 2009
4 require "pp"
5
4f3376a @igrigorik release 0.1.8
authored Jun 28, 2011
6 require "textquery"
64793e0 @igrigorik basic word matching
authored Dec 27, 2009
7
8 # Resources:
9 # - http://github.com/nathansobo/treetop
10 # - http://github.com/nathansobo/treetop/blob/master/examples/lambda_calculus/arithmetic.treetop
11 # - http://snippets.dzone.com/tag/Treetop
12 # - http://treetop.rubyforge.org/index.html
8af2e09 @igrigorik wrapper for grammar API
authored Dec 28, 2009
13 # - http://en.wikipedia.org/wiki/Parsing_expression_grammar
64793e0 @igrigorik basic word matching
authored Dec 27, 2009
14 #
15
8af2e09 @igrigorik wrapper for grammar API
authored Dec 28, 2009
16 describe TextQuery do
ae27bd0 @eric Ensure we do not swallow spaces inside quoted strings
eric authored Jul 25, 2011
17 before(:each) do
8af2e09 @igrigorik wrapper for grammar API
authored Dec 28, 2009
18 @parser = TextQuery.new
64793e0 @igrigorik basic word matching
authored Dec 27, 2009
19 end
20
21 def parse(input)
8eddeb0 @igrigorik support for custom delimiter, and utf-8 compatibility for Ruby < 1.9
authored Dec 29, 2009
22 @parser.parse(input)
64793e0 @igrigorik basic word matching
authored Dec 27, 2009
23 end
24
25 it "should look for exact word boundary match" do
26 parse("text").eval("textstring").should be_false
27 parse("text").eval("stringtext").should be_false
28 parse("text").eval("some textstring").should be_false
29 parse("text").eval("string of texts stuff").should be_false
30 parse("$^").eval("string of $^* stuff").should be_false
31 end
4e44bc1 @igrigorik AND operator
authored Dec 27, 2009
32
33 it "should accept logical AND" do
34 parse("a AND b").eval("c").should be_false
35 parse("a AND b").eval("a").should be_false
36 parse("a AND b").eval("b").should be_false
37
38 parse("a AND b").eval("a b").should be_true
39 parse("a AND b").eval("a c b").should be_true
40 end
8eddeb0 @igrigorik support for custom delimiter, and utf-8 compatibility for Ruby < 1.9
authored Dec 30, 2009
41
8465779 @igrigorik OR operator
authored Dec 27, 2009
42 it "should accept logical OR" do
43 parse("a OR b").eval("c").should be_false
44 parse("a OR b").eval("a").should be_true
45 parse("a OR b").eval("b").should be_true
46
47 parse("a OR b").eval("a b").should be_true
48 parse("a OR b").eval("a c b").should be_true
49 end
a5bf8cc @igrigorik AND/OR precedence tests
authored Dec 27, 2009
50
51 it "should give precedence to AND" do
52 # a AND (b OR c) == a AND b OR c
53 parse("a AND b OR c").eval("a b c").should be_true
54 parse("a AND b OR c").eval("a b").should be_true
55 parse("a AND b OR c").eval("a c").should be_true
56
57 parse("a AND b OR c").eval("b c").should be_false
58 parse("a AND b OR c").eval("c").should be_false
59 parse("a AND b OR c").eval("b").should be_false
60 end
3a82f4d @igrigorik NOT operator
authored Dec 27, 2009
61
62 it "should accept logical NOT" do
63 %w[- NOT].each do |operator|
64 parse("#{operator} a").eval("a").should be_false
65 parse("#{operator} #{operator} a").eval("a").should be_true
66
67 parse("#{operator} a OR a").eval("a").should be_true
68 parse("a OR #{operator} a").eval("a").should be_true
69
70 parse("b AND #{operator} a").eval("b").should be_true
71 parse("b AND #{operator} a").eval("a").should be_false
72 end
73 end
f88d67d @igrigorik sub-expression support
authored Dec 27, 2009
74
75 it "should evaluate sub expressions" do
76 parse("(a AND b)").eval("a b").should be_true
77 parse("(a OR b)").eval("a b").should be_true
78 parse("(a AND NOT b)").eval("a b").should be_false
79
80 parse("(a AND b) OR c").eval("a b c").should be_true
81 parse("(a AND b) OR c").eval("a b").should be_true
82 parse("(a AND b) OR c").eval("a c").should be_true
8eddeb0 @igrigorik support for custom delimiter, and utf-8 compatibility for Ruby < 1.9
authored Dec 30, 2009
83
f88d67d @igrigorik sub-expression support
authored Dec 28, 2009
84 parse("(a AND b) OR c").eval("c").should be_true
85 parse("a AND (b OR c)").eval("c").should be_false
8eddeb0 @igrigorik support for custom delimiter, and utf-8 compatibility for Ruby < 1.9
authored Dec 30, 2009
86
f88d67d @igrigorik sub-expression support
authored Dec 28, 2009
87 # for the win...
88 parse("a AND (b AND (c OR d))").eval("d a b").should be_true
89 end
90
91 it "should not trip up on placement of brackets" do
92 parse("a AND (-b)").eval("a b").should == parse("a AND -(b)").eval("a b")
93 parse("(-a) AND b").eval("a b").should == parse("-(a) AND b").eval("a b")
94 parse("-(a) AND -(b)").eval("a b").should == parse("(-a) AND (-b)").eval("a b")
95
96 parse("a OR (-b)").eval("a b").should == parse("a OR -(b)").eval("a b")
97 parse("(-a) OR b").eval("a b").should == parse("-(a) OR b").eval("a b")
98 parse("(-a) OR (-b)").eval("a b").should == parse("-(a) OR -(b)").eval("a b")
99
100 parse("a AND (b OR c)").eval("a b c").should be_true
101 parse("a AND (b OR c)").eval("a b").should be_true
102 parse("a AND (b OR c)").eval("a c").should be_true
103
104 parse("(NOT a) OR a").eval("a").should be_true
105 parse("(NOT a) AND (NOT b) AND (NOT c)").eval("b").should be_false
106 parse("a AND (b AND (c OR NOT d))").eval("a b d").should be_false
107 parse("a AND (b AND (c OR NOT d))").eval("a b c").should be_true
108 parse("a AND (b AND (c OR NOT d))").eval("a b e").should be_true
8eddeb0 @igrigorik support for custom delimiter, and utf-8 compatibility for Ruby < 1.9
authored Dec 30, 2009
109
f88d67d @igrigorik sub-expression support
authored Dec 28, 2009
110 parse("a AND (b AND NOT (c OR d))").eval("a b").should be_true
111 parse("a AND (b AND NOT (c OR d))").eval("a b c").should be_false
112 parse("a AND (b AND NOT (c OR d))").eval("a b d").should be_false
113
114 parse("-a AND -b AND -c").eval("e").should be_true
115 parse("(-a) AND (-b) AND (-c)").eval("e").should be_true
116 parse("(NOT a) AND (NOT b) AND (NOT c)").eval("e").should be_true
117 parse("NOT a AND NOT b AND NOT c").eval("e").should be_true
118 end
b8c453e @igrigorik quoted strings
authored Dec 27, 2009
119
120 it "should accept quoted strings" do
121 parse("'some text'").eval("some text").should be_true
122 parse("'some text string'").eval("some text").should be_false
123
124 parse("'some text string'").eval("some text 1 string").should be_false
125 parse("-'some text string'").eval("some text 1 string").should be_true
126
127 parse("a AND -'a b'").eval("a b c").should be_false
128 parse("a AND -'a b'").eval("a c b").should be_true
129
130 parse("(a OR b) AND (-'a b c')").eval("a b c").should be_false
131 parse("(a OR b) AND (-'a b c')").eval("a c b").should be_true
132 parse("(a AND b) AND (-'a b c')").eval("a c b").should be_true
133
134 # shakespeare got nothin' on ruby...
135 parse("'to be' OR NOT 'to be'").eval("to be").should be_true
ade9e48 @dj2 double or single quoted strings
dj2 authored Jan 4, 2010
136 parse('"to be" OR NOT "to be"').eval("to be").should be_true
b8c453e @igrigorik quoted strings
authored Dec 28, 2009
137 end
d762d40 @igrigorik implicit ANDs
authored Dec 27, 2009
138
ae27bd0 @eric Ensure we do not swallow spaces inside quoted strings
eric authored Jul 26, 2011
139 it "should not swallow spaces inside quoted strings" do
140 parse('" some text "').eval("this is some text", :delim => '').should be_false
141 parse('" some text "').eval("this is some text that should match", :delim => '').should be_true
142 end
143
7e92443 @igrigorik unbalanced quotes specs
authored May 8, 2011
144 it "should accept unbalanced quotes" do
145 parse("awesome").eval("M&M's are awesome").should be_true
146 parse("M&M's").eval("M&M's are awesome").should be_true
147 parse("M&M's AND awesome").eval("M&M's are awesome").should be_true
148 parse("M&M's AND fail").eval("M&M's are awesome").should be_false
149 end
150
ff8e685 @igrigorik ccept mixed quotes inside the exact match queries
authored Aug 18, 2010
151 it "should accept mixed quotes inside the exact match queries" do
152 parse("seattle's best").eval("seattle's best").should be_true
153
154 parse("peets OR \"seattle's best\"").eval("peets").should be_true
155 parse("peets OR \"seattle's best\"").eval("seattle's").should be_false
156
157 parse("\"seattle's best\"").eval("seattle's best coffee").should be_true
158 parse('"seattle\'s best"').eval("seattle's best coffee").should be_true
159 end
160
d762d40 @igrigorik implicit ANDs
authored Dec 28, 2009
161 it "should treat spaces as implicit ANDs" do
162 parse("a b").eval("a c b").should be_true
163 parse("b a c").eval("a c b").should be_true
164 parse("b a c").eval("a c").should be_false
165
166 parse("some text AND 'exact match'").eval("some exact match text").should be_true
167 parse("some text AND 'exact match'").eval("some exact text match").should be_false
168
169 parse("some text AND -'exact match'").eval("some exact text match").should be_true
170 parse("some text AND -'exact match'").eval("some exact match").should be_false
171 end
172
8af2e09 @igrigorik wrapper for grammar API
authored Dec 28, 2009
173 it "should wrap the grammar API" do
174 TextQuery.new("'to be' OR NOT 'to_be'").match?("to be").should be_true
175 TextQuery.new("-test").match?("some string of text").should be_true
176 TextQuery.new("NOT test").match?("some string of text").should be_true
177 TextQuery.new("a AND b").match?("b a").should be_true
178 TextQuery.new("a AND b").match?("a c").should be_false
179
180 q = TextQuery.new("a AND (b AND NOT (c OR d))")
181 q.match?("d a b").should be_false
182 q.match?("b").should be_false
183 q.match?("a b cdefg").should be_true
c611563 @igrigorik readme with examples
authored Dec 28, 2009
184 q.eval("a b cdefg").should be_true
8eddeb0 @igrigorik support for custom delimiter, and utf-8 compatibility for Ruby < 1.9
authored Dec 30, 2009
185 end
186
613a629 @igrigorik fuzzy matching support
authored Dec 30, 2009
187 it "should support fuzzy matching" do
188 parse("a~").eval("adf").should be_true
189 parse("~a").eval("dfa").should be_true
190 parse("~a~").eval("daf").should be_true
191
192 parse("1~a~1").eval("daf").should be_true
193 parse("2~a~1").eval("daf").should be_false
194 parse("1~a~2").eval("daf").should be_false
195
196 parse("~a~3").eval("daffy").should be_true
197 parse("a~1").eval("adf").should be_false
ff8e685 @igrigorik ccept mixed quotes inside the exact match queries
authored Aug 18, 2010
198
613a629 @igrigorik fuzzy matching support
authored Dec 30, 2009
199 parse("a~1 AND b").eval("adf b").should be_false
200 parse("a~2 AND b").eval("adf b").should be_true
201 parse("a~3 AND b").eval("adf b").should be_false
202 end
203
8eddeb0 @igrigorik support for custom delimiter, and utf-8 compatibility for Ruby < 1.9
authored Dec 30, 2009
204 it "should work on CJK text" do
613a629 @igrigorik fuzzy matching support
authored Dec 30, 2009
205 JP = "仕様変更は出し尽くしてしまいß"
8eddeb0 @igrigorik support for custom delimiter, and utf-8 compatibility for Ruby < 1.9
authored Dec 30, 2009
206
613a629 @igrigorik fuzzy matching support
authored Dec 30, 2009
207 q = TextQuery.new("変更", :delim => '')
8eddeb0 @igrigorik support for custom delimiter, and utf-8 compatibility for Ruby < 1.9
authored Dec 30, 2009
208 q.eval(JP).should be_true
613a629 @igrigorik fuzzy matching support
authored Dec 30, 2009
209 q.eval("変ま").should be_false
210 q.parse("は出").eval(JP).should be_true
c611563 @igrigorik readme with examples
authored Dec 28, 2009
211
613a629 @igrigorik fuzzy matching support
authored Dec 30, 2009
212 q = TextQuery.new
213 q.parse("~出~").eval(JP).should be_true
214 q.parse("~出~ AND NOT ~尽~").eval(JP).should be_false
215 q.parse("~更は出~ OR ~尽く~").eval(JP).should be_true
8af2e09 @igrigorik wrapper for grammar API
authored Dec 28, 2009
216 end
613a629 @igrigorik fuzzy matching support
authored Dec 30, 2009
217
67142b0 @eric Make sure digits aren't accidentally collected when there isn't a "~" to
eric authored Nov 5, 2010
218 it "should work with queries starting with numbers" do
219 q = TextQuery.new('3827')
220 q.parse('abc 123 3827 9382').should be_true
221 end
222
1f98cdb @igrigorik case (in)sensitive option
authored Dec 30, 2009
223 it "should be case insensitive" do
224 TextQuery.new("a", :ignorecase => true).match?("A b cD").should be_true
225 TextQuery.new("a AND CD", :ignorecase => true).match?("A b cD").should be_true
226
227 TextQuery.new("a", :ignorecase => false).match?("A b cD").should be_false
228 TextQuery.new("a AND CD", :ignorecase => false).match?("A b cD").should be_false
229 end
230
f9143c9 @dj2 allow the :delim to hold an array of delimiters to be OR'd together
dj2 authored Jan 4, 2010
231 context 'delimiters' do
232 it 'should default to space delimiter' do
233 TextQuery.new("a").match?("a b").should be_true
234 TextQuery.new("a").match?("a*b").should be_false
235 end
236
237 it 'should accept a string delimiter' do
238 TextQuery.new("a", :delim => "*").match?("a*b").should be_true
239 TextQuery.new("a", :delim => "*").match?("a b").should be_false
240 end
241
242 it 'should accept an array of delimiters' do
243 TextQuery.new("a", :delim => ["*"]).match?("a*b").should be_true
244 end
245
246 it 'should OR delimiters together when provided as an array' do
247 TextQuery.new("a", :delim => ["*", "|"]).match?("a|b").should be_true
248 TextQuery.new("a", :delim => ["*", "|"]).match?("a*b").should be_true
249 TextQuery.new("a", :delim => ["*", "|"]).match?("a b").should be_false
250 end
251
252 it 'should not match just the delimiter' do
253 TextQuery.new("a*b", :delim => ["*", "<"]).match?("over<under").should be_false
254 end
0ba21d1 @igrigorik use rspec
authored Dec 4, 2010
255
d6f85cd @troy support Regexp delimiters, mixed String/Regexp delim lists; tests for…
troy authored Oct 16, 2010
256 it 'should accept a Regexp as a delimiter' do
257 TextQuery.new("a", :delim => [%r{\b}]).match?("a.b").should be_true
258 TextQuery.new("a b", :delim => [%r{\b}]).match?("a.b").should be_true
259 TextQuery.new("a b", :delim => [%r{\b}]).match?("a.c").should be_false
260 end
261
262 it 'should OR multiple Regexp delimiters and match on all words' do
263 TextQuery.new("cd", :delim => [%r{\d}, %r{\.\.}]).match?("ab2cd..ef").should be_true
264 TextQuery.new("ef", :delim => [%r{\d}, %r{\.\.}]).match?("ab2cd..ef").should be_true
265 TextQuery.new("ab2cd", :delim => [%r{\d}, %r{\.\.}]).match?("ab2cd..ef").should be_true
266 end
267
268 it 'should accept mixed Strings and Regexps as delimiters' do
269 TextQuery.new("a", :delim => [%r{a{2,3}}]).match?("aab").should be_false
270 TextQuery.new("a", :delim => [%r{a{2,3}}, 'b']).match?("aab").should be_false
271 TextQuery.new("b", :delim => [%r{a{2,3}}, 'a']).match?("aab").should be_true
272 end
6560101 @eric Provide a mechanism to traverse the parsed query.
eric authored Nov 5, 2010
273
274 it 'should allow query to be traversed' do
275 TextQuery.new("a b").accept { |*a| a }.should == [ :and, [ :value, 'a' ], [ :value, 'b' ] ]
276 TextQuery.new("a OR b").accept { |*a| a }.should == [ :or, [ :value, 'a' ], [ :value, 'b' ] ]
277 end
ae27bd0 @eric Ensure we do not swallow spaces inside quoted strings
eric authored Jul 26, 2011
278
279 it 'should not swallow spaces in quoted strings when traversed' do
280 TextQuery.new('" a "').accept { |*a| a }.should == [ :value, ' a ' ]
281
282 end
f9143c9 @dj2 allow the :delim to hold an array of delimiters to be OR'd together
dj2 authored Jan 4, 2010
283 end
64793e0 @igrigorik basic word matching
authored Dec 27, 2009
284 end
Something went wrong with that request. Please try again.