Skip to content

Commit

Permalink
First implementation of wild character '*'
Browse files Browse the repository at this point in the history
  • Loading branch information
daitangio committed May 16, 2012
1 parent 5f19cca commit 76ffe8b
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 3 deletions.
40 changes: 38 additions & 2 deletions lib/code_zauker.rb
Expand Up @@ -160,7 +160,12 @@ def initialize(redisConnection=nil)


def disconnect()
@redis.quit
begin
@redis.quit
rescue Errno::EAGAIN =>e
# Nothing to do...
puts "Ignored EAGAIN ERROR during disconnect..."
end
end


Expand Down Expand Up @@ -292,7 +297,7 @@ def split_in_trigrams(term, prefix)
trigramInAnd=Set.new()
# Search=> Sea AND ear AND arc AND rch
for j in 0...term.length
currentTrigram=term[j,GRAM_SIZE]
currentTrigram=term[j,GRAM_SIZE]
if currentTrigram.length <GRAM_SIZE
# We are at the end...
break
Expand Down Expand Up @@ -330,6 +335,37 @@ def isearch(term)
return map_ids_to_files(fileIds)
end

# = wild cards search
# You can search trigram in the form
# public*class*Apple
# will match java declaration of MyApple but not
# YourAppManager
def wsearch(term,case_sensitive=true)
# Split stuff
puts "Wild Search request:#{term}"
m=term.split("*")
if m.length>0
trigramInAnd=Set.new()
puts "*= Found:#{m.length}"
m.each do | wt |
puts "Splitting #{wt}"
trigSet=split_in_trigrams(wt,"trigram")
trigramInAnd=trigramInAnd.merge(trigSet)
end
puts "Trigrams: #{trigramInAnd.length}"
if trigramInAnd.length==0
return []
end
fileIds= @redis.sinter(*trigramInAnd)
fileNames=map_ids_to_files(fileIds)
puts "DEBUG #{fileIds} #{fileNames}"
return fileNames
else
puts "Warn no Wild!"
return search(term)
end
end


# = search
# Find a list of file candidates to a search string
Expand Down
8 changes: 7 additions & 1 deletion lib/code_zauker/constants.rb
Expand Up @@ -13,6 +13,12 @@ module CodeZauker
".zip",".7z","rar",
# MS Office zip-like files...
".pptx",".docx",".xlsx",
# MS Visual Studio big bad files"
".scc",".datasource",".pdb","vspscc",".settings",
#"Telerik.Web.UI.xml",
".Web.UI.xml",
# Auto-generated stuff...is suggested to be avoided
".designer.cs",
# Avoid slurping text document too...
".doc",
".ppt",".xls",".rtf",".vsd", ".odf",
Expand All @@ -27,7 +33,7 @@ module CodeZauker
".tar",
".gz",".Z",
".dropbox",
".svn-base",".pdb",".cache",
".svn-base",".cache",
#IDE STUFF
".wlwLock",
# Music exclusion
Expand Down
1 change: 1 addition & 0 deletions test/fixture/wildtest.txt
@@ -0,0 +1 @@
Wild Wild West Movie
40 changes: 40 additions & 0 deletions test/test_wild_search.rb
@@ -0,0 +1,40 @@
# -*- encoding: utf-8 -*-
# To test use
# rake TEST=test/test_wild_search.rb
require 'test/unit'
require 'code_zauker'

# See ri Test::Unit::Assertions
# for assertion documentation
class FileScannerBasicSearch < Test::Unit::TestCase
#This test can search very uinque things...
def test_foolish_wild1
fs=CodeZauker::FileScanner.new()
fs.load("./test/fixture/wildtest.txt")
files=fs.wsearch("Wild*West")
assert(files.include?("./test/fixture/wildtest.txt")== true,
"Expected file not found. Files found:#{files}")
assert(files.length==1)
end

def test_foolish_wild2
fs=CodeZauker::FileScanner.new()
fs.load("./test/fixture/wildtest.txt")
files=fs.wsearch("Wild*West*Movie")
assert(files.include?("./test/fixture/wildtest.txt")== true,
"Expected file not found. Files found:#{files}")
assert(files.length==1)
end

# Also unordered match will work
# So the negative match is difficult
def test_foolish_wild3
fs=CodeZauker::FileScanner.new()
fs.load("./test/fixture/wildtest.txt")
files=fs.wsearch("West*Wild*NotOnTheSameLineForSure")
assert(files.include?("./test/fixture/wildtest.txt")== false,
"Expected not matching wildtest.txt file. Matches:#{files}")
end


end

0 comments on commit 76ffe8b

Please sign in to comment.