From 76ffe8bed83a32860e35923fcd172e408f092a84 Mon Sep 17 00:00:00 2001 From: Giovanni Giorgi Date: Wed, 16 May 2012 14:45:06 +0200 Subject: [PATCH] First implementation of wild character '*' --- lib/code_zauker.rb | 40 ++++++++++++++++++++++++++++++++++-- lib/code_zauker/constants.rb | 8 +++++++- test/fixture/wildtest.txt | 1 + test/test_wild_search.rb | 40 ++++++++++++++++++++++++++++++++++++ 4 files changed, 86 insertions(+), 3 deletions(-) create mode 100644 test/fixture/wildtest.txt create mode 100644 test/test_wild_search.rb diff --git a/lib/code_zauker.rb b/lib/code_zauker.rb index 10157b1..cf6cf27 100644 --- a/lib/code_zauker.rb +++ b/lib/code_zauker.rb @@ -160,7 +160,12 @@ def initialize(redisConnection=nil) def disconnect() - @redis.quit + begin + @redis.quit + rescue Errno::EAGAIN =>e + # Nothing to do... + puts "Ignored EAGAIN ERROR during disconnect..." + end end @@ -292,7 +297,7 @@ def split_in_trigrams(term, prefix) trigramInAnd=Set.new() # Search=> Sea AND ear AND arc AND rch for j in 0...term.length - currentTrigram=term[j,GRAM_SIZE] + currentTrigram=term[j,GRAM_SIZE] if currentTrigram.length 0 + trigramInAnd=Set.new() + puts "*= Found:#{m.length}" + m.each do | wt | + puts "Splitting #{wt}" + trigSet=split_in_trigrams(wt,"trigram") + trigramInAnd=trigramInAnd.merge(trigSet) + end + puts "Trigrams: #{trigramInAnd.length}" + if trigramInAnd.length==0 + return [] + end + fileIds= @redis.sinter(*trigramInAnd) + fileNames=map_ids_to_files(fileIds) + puts "DEBUG #{fileIds} #{fileNames}" + return fileNames + else + puts "Warn no Wild!" + return search(term) + end + end + # = search # Find a list of file candidates to a search string diff --git a/lib/code_zauker/constants.rb b/lib/code_zauker/constants.rb index bae3458..68d596e 100644 --- a/lib/code_zauker/constants.rb +++ b/lib/code_zauker/constants.rb @@ -13,6 +13,12 @@ module CodeZauker ".zip",".7z","rar", # MS Office zip-like files... ".pptx",".docx",".xlsx", + # MS Visual Studio big bad files" + ".scc",".datasource",".pdb","vspscc",".settings", + #"Telerik.Web.UI.xml", + ".Web.UI.xml", + # Auto-generated stuff...is suggested to be avoided + ".designer.cs", # Avoid slurping text document too... ".doc", ".ppt",".xls",".rtf",".vsd", ".odf", @@ -27,7 +33,7 @@ module CodeZauker ".tar", ".gz",".Z", ".dropbox", - ".svn-base",".pdb",".cache", + ".svn-base",".cache", #IDE STUFF ".wlwLock", # Music exclusion diff --git a/test/fixture/wildtest.txt b/test/fixture/wildtest.txt new file mode 100644 index 0000000..e6ea4b7 --- /dev/null +++ b/test/fixture/wildtest.txt @@ -0,0 +1 @@ +Wild Wild West Movie diff --git a/test/test_wild_search.rb b/test/test_wild_search.rb new file mode 100644 index 0000000..5f83b2f --- /dev/null +++ b/test/test_wild_search.rb @@ -0,0 +1,40 @@ +# -*- encoding: utf-8 -*- +# To test use +# rake TEST=test/test_wild_search.rb +require 'test/unit' +require 'code_zauker' + +# See ri Test::Unit::Assertions +# for assertion documentation +class FileScannerBasicSearch < Test::Unit::TestCase + #This test can search very uinque things... + def test_foolish_wild1 + fs=CodeZauker::FileScanner.new() + fs.load("./test/fixture/wildtest.txt") + files=fs.wsearch("Wild*West") + assert(files.include?("./test/fixture/wildtest.txt")== true, + "Expected file not found. Files found:#{files}") + assert(files.length==1) + end + + def test_foolish_wild2 + fs=CodeZauker::FileScanner.new() + fs.load("./test/fixture/wildtest.txt") + files=fs.wsearch("Wild*West*Movie") + assert(files.include?("./test/fixture/wildtest.txt")== true, + "Expected file not found. Files found:#{files}") + assert(files.length==1) + end + + # Also unordered match will work + # So the negative match is difficult + def test_foolish_wild3 + fs=CodeZauker::FileScanner.new() + fs.load("./test/fixture/wildtest.txt") + files=fs.wsearch("West*Wild*NotOnTheSameLineForSure") + assert(files.include?("./test/fixture/wildtest.txt")== false, + "Expected not matching wildtest.txt file. Matches:#{files}") + end + + +end