Skip to content

Commit

Permalink
merging
Browse files Browse the repository at this point in the history
  • Loading branch information
jasondew committed Apr 30, 2011
2 parents 208ca4a + 57c2d50 commit eadab69
Showing 1 changed file with 18 additions and 17 deletions.
35 changes: 18 additions & 17 deletions lib/copycat.rb
Original file line number Diff line number Diff line change
Expand Up @@ -50,29 +50,29 @@ def compare(sentence1, sentence2)
end

max_similarity = 0.0
tagged_words2.size.times do |size|
max_similarity = [compare_fragments(tagged_words1, tagged_words2[0..size]), max_similarity].max
tagged_words2.size.times do |end_point|
end_point.times do |begin_point|
similarity = compare_fragments(tagged_words1, tagged_words2[begin_point..end_point])
similarity *= (end_point - begin_point).to_f / tagged_words2.size
max_similarity = [similarity, max_similarity].max
end
end

max_similarity
end

# precondition: tagged_words1.size >= tagged_words2.size
def compare_fragments tagged_words1, tagged_words2
if tagged_words1.size == tagged_words2.size
compare_tagged_words tagged_words1, tagged_words2
else
max_similarity = 0.0
compared_size = tagged_words2.size

(tagged_words1.size - tagged_words2.size).times do |offset|
similarity = compare_tagged_words tagged_words1[offset..(offset + compared_size - 1)], tagged_words2
max_similarity = [similarity, max_similarity].max
end
max_similarity = 0.0
compared_size = tagged_words2.size

max_similarity
(tagged_words1.size - tagged_words2.size + 1).times do |offset|
similarity = compare_tagged_words tagged_words1[offset..(offset + compared_size - 1)], tagged_words2
max_similarity = [similarity, max_similarity].max
end

max_similarity

#compare_subtrees tree1, tree2

# subtrees1 = assemble_subtrees tree1
Expand Down Expand Up @@ -140,10 +140,11 @@ def remove_non_words tagged_words
def wordnet_compare entries1, entries2
if entries1.class == String and entries2.class == String
entries1 == entries2 ? 1.0 : 0.0
elsif entries1.class == String
entries2.detect {|entry| entry.words.detect {|word| word == entries1 } } ? 1.0 : 0.0
elsif entries2.class == String
entries1.detect {|entry| entry.words.detect {|word| word == entries2 } } ? 1.0 : 0.0
elsif [entries1.class,entries2.class].include?(String)
# This means one of the words is in wordnet but the other isn't,
# which makes it terribly hard to make a judgment.
# Just an estimate. Not really sure what a good number here is.
0.1
else
min_distance = 30.0

Expand Down

0 comments on commit eadab69

Please sign in to comment.