Skip to content

Commit

Permalink
Fuzzy -> FuzzyTools
Browse files Browse the repository at this point in the history
  • Loading branch information
brianhempel committed Jul 17, 2012
1 parent 1cd3b2a commit be6f73a
Show file tree
Hide file tree
Showing 19 changed files with 99 additions and 99 deletions.
2 changes: 1 addition & 1 deletion Gemfile
Expand Up @@ -5,5 +5,5 @@ gem 'nokogiri'
gem 'perftools.rb', :require => false
gem 'rake'

# Specify your gem's dependencies in fuzzy.gemspec
# Specify your gem's dependencies in fuzzy_tools.gemspec
gemspec
2 changes: 1 addition & 1 deletion Rakefile
Expand Up @@ -16,7 +16,7 @@ desc "Launch an IRB session with the gem required"
task :console do
$:.unshift(File.dirname(__FILE__) + '/../lib')

require 'fuzzy'
require 'fuzzy_tools'
require 'irb'

IRB.setup(nil)
Expand Down
6 changes: 3 additions & 3 deletions accuracy/test_accuracy.rb
@@ -1,7 +1,7 @@
$LOAD_PATH.unshift File.expand_path("../../lib", __FILE__)
$LOAD_PATH.unshift File.expand_path("../support", __FILE__)

require 'fuzzy'
require 'fuzzy_tools'
require 'accuracy_test_case'
require 'histogram'
require 'rubygems'
Expand All @@ -19,10 +19,10 @@ class Failure < Struct.new(:given, :expected, :actual)
ENV['CPUPROFILE_REALTIME'] = "1"
ENV['CPUPROFILE_FREQUENCY=500'] = "50" # default is 100
require 'perftools'
PerfTools::CpuProfiler.start("/tmp/fuzzy_ruby_profile")
PerfTools::CpuProfiler.start("/tmp/fuzzy_tools_ruby_profile")
at_exit do
PerfTools::CpuProfiler.stop
puts `pprof.rb --text /tmp/fuzzy_ruby_profile`
puts `pprof.rb --text /tmp/fuzzy_tools_ruby_profile`
end

total_time = Benchmark.realtime do
Expand Down
8 changes: 4 additions & 4 deletions fuzzy.gemspec → fuzzy_tools.gemspec
@@ -1,14 +1,14 @@
# -*- encoding: utf-8 -*-
$:.push File.expand_path("../lib", __FILE__)
require "fuzzy/version"
require "fuzzy_tools/version"

Gem::Specification.new do |s|
s.name = "fuzzy"
s.version = Fuzzy::VERSION
s.name = "fuzzy_tools_tools"
s.version = FuzzyTools::VERSION
s.platform = Gem::Platform::RUBY
s.authors = ["Brian Hempel"]
s.email = ["plasticchicken@gmail.com"]
s.homepage = ""
s.homepage = "https://github.com/brianhempel/fuzzy_tools"
s.summary = %q{High quality fuzzy search and string matching in Ruby.}
s.description = %q{High quality fuzzy search and string matching in Ruby.}

Expand Down
4 changes: 0 additions & 4 deletions lib/fuzzy.rb

This file was deleted.

30 changes: 0 additions & 30 deletions lib/fuzzy/tokenizers.rb

This file was deleted.

4 changes: 4 additions & 0 deletions lib/fuzzy_tools.rb
@@ -0,0 +1,4 @@
require 'fuzzy_tools/helpers'
require 'fuzzy_tools/index'
require 'fuzzy_tools/tf_idf_index'
require 'fuzzy_tools/core_ext/enumerable'
@@ -1,4 +1,4 @@
require 'fuzzy/index'
require 'fuzzy_tools/index'

module Enumerable
def fuzzy_find(*args)
Expand All @@ -13,7 +13,7 @@ def fuzzy_find_all(*args)

def fuzzy_index(options = {})
options = options.merge(:source => self)
Fuzzy::TfIdfIndex.new(options)
FuzzyTools::TfIdfIndex.new(options)
end

private
Expand Down
2 changes: 1 addition & 1 deletion lib/fuzzy/helpers.rb → lib/fuzzy_tools/helpers.rb
@@ -1,6 +1,6 @@
require 'inline'

module Fuzzy
module FuzzyTools
module Helpers
extend self

Expand Down
6 changes: 3 additions & 3 deletions lib/fuzzy/index.rb → lib/fuzzy_tools/index.rb
@@ -1,7 +1,7 @@
require 'fuzzy/helpers'
require 'fuzzy/tokenizers'
require 'fuzzy_tools/helpers'
require 'fuzzy_tools/tokenizers'

module Fuzzy
module FuzzyTools
class Index
attr_reader :source, :indexed_attribute

Expand Down
8 changes: 4 additions & 4 deletions lib/fuzzy/tf_idf_index.rb → lib/fuzzy_tools/tf_idf_index.rb
@@ -1,8 +1,8 @@
require 'set'
require 'fuzzy/index'
require 'fuzzy/weighted_document_tokens'
require 'fuzzy_tools/index'
require 'fuzzy_tools/weighted_document_tokens'

module Fuzzy
module FuzzyTools
class TfIdfIndex < Index
class Token
attr_accessor :documents, :idf
Expand All @@ -13,7 +13,7 @@ def initialize
end

def self.default_tokenizer
Fuzzy::Tokenizers::HYBRID
FuzzyTools::Tokenizers::HYBRID
end

attr_reader :tokenizer
Expand Down
30 changes: 30 additions & 0 deletions lib/fuzzy_tools/tokenizers.rb
@@ -0,0 +1,30 @@
module FuzzyTools
module Tokenizers

CHARACTERS = lambda { |str| str.chars }
CHARACTERS_DOWNCASED = lambda { |str| str.downcase.chars }
BIGRAMS = lambda { |str| FuzzyTools::Helpers.ngrams(str, 2) }
BIGRAMS_DOWNCASED = lambda { |str| FuzzyTools::Helpers.ngrams(str.downcase, 2) }
TRIGRAMS = lambda { |str| FuzzyTools::Helpers.ngrams(str, 3) }
TRIGRAMS_DOWNCASED = lambda { |str| FuzzyTools::Helpers.ngrams(str.downcase, 3) }
TETRAGRAMS = lambda { |str| FuzzyTools::Helpers.ngrams(str, 4) }
TETRAGRAMS_DOWNCASED = lambda { |str| FuzzyTools::Helpers.ngrams(str.downcase, 4) }
PENTAGRAMS = lambda { |str| FuzzyTools::Helpers.ngrams(str, 5) }
PENTAGRAMS_DOWNCASED = lambda { |str| FuzzyTools::Helpers.ngrams(str.downcase, 5) }
HEXAGRAMS = lambda { |str| FuzzyTools::Helpers.ngrams(str, 6) }
HEXAGRAMS_DOWNCASED = lambda { |str| FuzzyTools::Helpers.ngrams(str.downcase, 6) }

WORDS = lambda { |str| str.split }
WORDS_DOWNCASED = lambda { |str| str.downcase.split }

HYBRID = lambda do |str|
str = str.downcase
words = str.split
words.map { |word| FuzzyTools::Helpers.soundex(word) } +
FuzzyTools::Helpers.ngrams(str.downcase, 2) +
words.map { |word| word.gsub(/[aeiou]/, '') } +
words
end

end
end
2 changes: 1 addition & 1 deletion lib/fuzzy/version.rb → lib/fuzzy_tools/version.rb
@@ -1,3 +1,3 @@
module Fuzzy
module FuzzyTools
VERSION = "0.0.1"
end
@@ -1,7 +1,7 @@
require 'fuzzy/helpers'
require 'fuzzy_tools/helpers'
require 'inline'

module Fuzzy
module FuzzyTools
class WeightedDocumentTokens
attr_reader :weights

Expand Down Expand Up @@ -57,7 +57,7 @@ def tokens

def set_token_weights(tokens, &block)
@weights = {}
counts = Fuzzy::Helpers.term_counts(tokens)
counts = FuzzyTools::Helpers.term_counts(tokens)
counts.each do |token, n|
@weights[token] = yield(token, n)
end
Expand Down
6 changes: 3 additions & 3 deletions performance/profile.rb
@@ -1,7 +1,7 @@
require 'csv'

$LOAD_PATH.unshift File.expand_path("../../lib", __FILE__)
require 'fuzzy'
require 'fuzzy_tools'


TEST_FILE_PATH = File.expand_path("../query_tests/bible_verses_daniel_kjv.csv", __FILE__)
Expand All @@ -24,10 +24,10 @@
ENV['CPUPROFILE_REALTIME'] = "1"
ENV['CPUPROFILE_FREQUENCY=500'] = "200" # default is 100
require 'perftools'
PerfTools::CpuProfiler.start("/tmp/fuzzy_ruby_profile")
PerfTools::CpuProfiler.start("/tmp/fuzzy_tools_ruby_profile")
at_exit do
PerfTools::CpuProfiler.stop
puts `pprof.rb --text /tmp/fuzzy_ruby_profile`
puts `pprof.rb --text /tmp/fuzzy_tools_ruby_profile`
end

index = targets.fuzzy_index
Expand Down
12 changes: 6 additions & 6 deletions spec/enumerable_spec.rb
Expand Up @@ -27,15 +27,15 @@
before(:each) { @letter_count_tokenizer = lambda { |str| str.size.to_s } }

it "passes :tokenizer through to the index with simple query syntax" do
Fuzzy::TfIdfIndex.should_receive(:new).with(:source => @books, :tokenizer => @letter_count_tokenizer)
FuzzyTools::TfIdfIndex.should_receive(:new).with(:source => @books, :tokenizer => @letter_count_tokenizer)
begin
@books.fuzzy_find("the", :tokenizer => @letter_count_tokenizer)
rescue
end
end

it "passes :tokenizer through to the index with :attribute => query syntax" do
Fuzzy::TfIdfIndex.should_receive(:new).with(:source => @books, :tokenizer => @letter_count_tokenizer, :attribute => :title)
FuzzyTools::TfIdfIndex.should_receive(:new).with(:source => @books, :tokenizer => @letter_count_tokenizer, :attribute => :title)
begin
@books.fuzzy_find(:title => "the", :tokenizer => @letter_count_tokenizer)
rescue
Expand All @@ -57,15 +57,15 @@
before(:each) { @letter_count_tokenizer = lambda { |str| str.size.to_s } }

it "passes :tokenizer through to the index with simple query syntax" do
Fuzzy::TfIdfIndex.should_receive(:new).with(:source => @books, :tokenizer => @letter_count_tokenizer)
FuzzyTools::TfIdfIndex.should_receive(:new).with(:source => @books, :tokenizer => @letter_count_tokenizer)
begin
@books.fuzzy_find_all("the", :tokenizer => @letter_count_tokenizer)
rescue
end
end

it "passes :tokenizer through to the index with :attribute => query syntax" do
Fuzzy::TfIdfIndex.should_receive(:new).with(:source => @books, :tokenizer => @letter_count_tokenizer, :attribute => :title)
FuzzyTools::TfIdfIndex.should_receive(:new).with(:source => @books, :tokenizer => @letter_count_tokenizer, :attribute => :title)
begin
@books.fuzzy_find_all(:title => "the", :tokenizer => @letter_count_tokenizer)
rescue
Expand All @@ -76,12 +76,12 @@

describe "#fuzzy_index" do
it "returns an TfIdfIndex" do
@books.fuzzy_index.class.should == Fuzzy::TfIdfIndex
@books.fuzzy_index.class.should == FuzzyTools::TfIdfIndex
end

it "passes options along to the index" do
letter_count_tokenizer = lambda { |str| str.size.to_s }
Fuzzy::TfIdfIndex.should_receive(:new).with(:source => @books, :tokenizer => letter_count_tokenizer, :attribute => :title)
FuzzyTools::TfIdfIndex.should_receive(:new).with(:source => @books, :tokenizer => letter_count_tokenizer, :attribute => :title)
@books.fuzzy_index(:attribute => :title, :tokenizer => letter_count_tokenizer)
end
end
Expand Down
20 changes: 10 additions & 10 deletions spec/helpers_spec.rb
@@ -1,10 +1,10 @@
require 'spec_helper'

describe Fuzzy::Helpers do
describe FuzzyTools::Helpers do
describe ".ngrams" do

it "should do trigrams" do
Fuzzy::Helpers.trigrams("hello").should == %w{
FuzzyTools::Helpers.trigrams("hello").should == %w{
__h
_he
hel
Expand All @@ -16,7 +16,7 @@
end

it "should do bigrams" do
Fuzzy::Helpers.bigrams("hello").should == %w{
FuzzyTools::Helpers.bigrams("hello").should == %w{
_h
he
el
Expand All @@ -27,7 +27,7 @@
end

it "should do 1-grams" do
Fuzzy::Helpers.ngrams("hello", 1).should == %w{
FuzzyTools::Helpers.ngrams("hello", 1).should == %w{
h
e
l
Expand All @@ -37,7 +37,7 @@
end

it "should do x-grams" do
Fuzzy::Helpers.ngrams("hello", 4).should == %w{
FuzzyTools::Helpers.ngrams("hello", 4).should == %w{
___h
__he
_hel
Expand All @@ -53,11 +53,11 @@

describe ".soundex" do
it "works" do
Fuzzy::Helpers.soundex("Robert").should == "R163"
Fuzzy::Helpers.soundex("Rubin").should == "R150"
Fuzzy::Helpers.soundex("Washington").should == "W252"
Fuzzy::Helpers.soundex("Lee").should == "L000"
Fuzzy::Helpers.soundex("Gutierrez").should == "G362"
FuzzyTools::Helpers.soundex("Robert").should == "R163"
FuzzyTools::Helpers.soundex("Rubin").should == "R150"
FuzzyTools::Helpers.soundex("Washington").should == "W252"
FuzzyTools::Helpers.soundex("Lee").should == "L000"
FuzzyTools::Helpers.soundex("Gutierrez").should == "G362"
end
end
end
2 changes: 1 addition & 1 deletion spec/spec_helper.rb
@@ -1,5 +1,5 @@
$:.unshift(File.join(File.dirname(__FILE__), "..", "lib"))

require 'fuzzy'
require 'fuzzy_tools'

Book = Struct.new(:title, :author)

0 comments on commit be6f73a

Please sign in to comment.