Skip to content

Commit

Permalink
Merge remote-tracking branch 'wilkerlucio/simon-similarity'
Browse files Browse the repository at this point in the history
  • Loading branch information
threedaymonk committed Nov 2, 2011
2 parents 55af084 + be6d555 commit f6472a8
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 0 deletions.
60 changes: 60 additions & 0 deletions lib/text/simon_similarity.rb
@@ -0,0 +1,60 @@
# encoding: utf-8
#
# Ruby implementation of the string similarity described by Simon White
# at: http://www.catalysoft.com/articles/StrikeAMatch.html
#
# Based on Java implementation of the article
#
# Author: Wilker Lúcio <wilkerlucio@gmail.com>
#

module Text
module SimonSimilarity
def compare_strings(str1, str2)
pairs1 = word_letter_pairs(str1.upcase)
pairs2 = word_letter_pairs(str2.upcase)

intersection = 0
union = pairs1.length + pairs2.length

pairs1.each do |pair1|
pairs2.each_with_index do |pair2, j|
if pair1 == pair2
intersection += 1
pairs2.delete_at(j)
break
end
end
end

(2.0 * intersection) / union
end

private

def word_letter_pairs(str)
pairs = []
words = str.split(/\s+/)

words.each do |word|
pairs_in_word = letter_pairs(word)
pairs.concat(pairs_in_word)
end

pairs
end

def letter_pairs(str)
num_pairs = str.length - 1
pairs = []

0.upto(num_pairs - 1) do |n|
pairs << str[n, 2]
end

pairs
end

extend self
end
end
15 changes: 15 additions & 0 deletions test/test_simon_similarity.rb
@@ -0,0 +1,15 @@
require File.join(File.dirname(__FILE__), 'preamble')
require "text/simon_similarity"

class SimonSimilarityTest < Test::Unit::TestCase
def test_similarity
word = "Healed"

assert_in_delta 0.8, Text::SimonSimilarity.compare_strings(word, "Sealed"), 0.01
assert_in_delta 0.55, Text::SimonSimilarity.compare_strings(word, "Healthy"), 0.01
assert_in_delta 0.44, Text::SimonSimilarity.compare_strings(word, "Heard"), 0.01
assert_in_delta 0.40, Text::SimonSimilarity.compare_strings(word, "Herded"), 0.01
assert_in_delta 0.25, Text::SimonSimilarity.compare_strings(word, "Help"), 0.01
assert_in_delta 0.0, Text::SimonSimilarity.compare_strings(word, "Sold"), 0.01
end
end

0 comments on commit f6472a8

Please sign in to comment.