Skip to content

Commit

Permalink
Condensed some of the algorithms
Browse files Browse the repository at this point in the history
  • Loading branch information
Team JED committed Apr 30, 2011
1 parent c1725f3 commit 57c2d50
Showing 1 changed file with 21 additions and 22 deletions.
43 changes: 21 additions & 22 deletions lib/copycat.rb
Expand Up @@ -53,8 +53,12 @@ def compare(sentence1, sentence2)
end

max_similarity = 0.0
tagged_words2.size.times do |size|
max_similarity = [compare_fragments(tagged_words1, tagged_words2[0..size]), max_similarity].max
tagged_words2.size.times do |end_point|
end_point.times do |begin_point|
similarity = compare_fragments(tagged_words1, tagged_words2[begin_point..end_point])
similarity *= (end_point - begin_point).to_f / tagged_words2.size
max_similarity = [similarity, max_similarity].max
end
end

STDERR.puts "final result = #{max_similarity.inspect}"
Expand All @@ -64,25 +68,19 @@ def compare(sentence1, sentence2)

# precondition: tagged_words1.size >= tagged_words2.size
def compare_fragments tagged_words1, tagged_words2
if tagged_words1.size == tagged_words2.size
result = compare_tagged_words tagged_words1, tagged_words2
STDERR.puts "identical sizes, tagged_words1.size=#{tagged_words1.size} result = #{result.inspect}\n\n#{'#'*120}"
result
else
max_similarity = 0.0
compared_size = tagged_words2.size

(tagged_words1.size - tagged_words2.size).times do |offset|
similarity = compare_tagged_words tagged_words1[offset..(offset + compared_size - 1)], tagged_words2
STDERR.puts ">> offset #{offset} similarity = #{similarity.inspect}"
max_similarity = [similarity, max_similarity].max
end
max_similarity = 0.0
compared_size = tagged_words2.size

result = max_similarity
STDERR.puts "non-identical sizes, result = #{result.inspect}\n\n#{'#'*120}"
result
(tagged_words1.size - tagged_words2.size + 1).times do |offset|
similarity = compare_tagged_words tagged_words1[offset..(offset + compared_size - 1)], tagged_words2
# STDERR.puts ">> offset #{offset} similarity = #{similarity.inspect}"
max_similarity = [similarity, max_similarity].max
end

result = max_similarity
STDERR.puts ">> compare_fragments, result = #{result.inspect}\n\n#{'#'*120}"
result

#compare_subtrees tree1, tree2

# subtrees1 = assemble_subtrees tree1
Expand Down Expand Up @@ -156,10 +154,11 @@ def wordnet_compare entries1, entries2
result =
if entries1.class == String and entries2.class == String
entries1 == entries2 ? 1.0 : 0.0
elsif entries1.class == String
entries2.detect {|entry| entry.words.detect {|word| word == entries1 } } ? 1.0 : 0.0
elsif entries2.class == String
entries1.detect {|entry| entry.words.detect {|word| word == entries2 } } ? 1.0 : 0.0
elsif [entries1.class,entries2.class].include?(String)
# This means one of the words is in wordnet but the other isn't,
# which makes it terribly hard to make a judgment.
# Just an estimate. Not really sure what a good number here is.
0.1
else
min_distance = 30.0

Expand Down

0 comments on commit 57c2d50

Please sign in to comment.