This repository has been archived by the owner on Apr 14, 2021. It is now read-only.
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Suggest alternatives when typoing gem names
eg $ bundle open thinking_sphinx Could not find gem 'thinking_sphinx'. Did you mean thinking-sphinx?
- Loading branch information
Jonathan del Strother
authored and
Jonathan del Strother
committed
Aug 20, 2012
1 parent
4d163e8
commit a567307
Showing
4 changed files
with
86 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
module Bundler | ||
class SimilarityDetector | ||
SimilarityScore = Struct.new(:string, :distance) | ||
|
||
# initialize with an array of words to be matched against | ||
def initialize(corpus) | ||
@corpus = corpus | ||
end | ||
|
||
# return an array of words similar to 'word' from the corpus | ||
def similar_words(word, limit=3) | ||
words_by_similarity = @corpus.map{|w| SimilarityScore.new(w, levenshtein_distance(word, w))} | ||
words_by_similarity.select{|s| s.distance<=limit}.sort_by(&:distance).map(&:string) | ||
end | ||
|
||
# return the result of 'similar_words', concatenated into a list | ||
# (eg "a, b, or c") | ||
def similar_word_list(word, limit=3) | ||
words = similar_words(word,limit) | ||
if words.length==1 | ||
words[0] | ||
elsif words.length>1 | ||
[words[0..-2].join(', '), words[-1]].join(' or ') | ||
end | ||
end | ||
|
||
|
||
protected | ||
# http://www.informit.com/articles/article.aspx?p=683059&seqNum=36 | ||
def levenshtein_distance(this, that, ins=2, del=2, sub=1) | ||
# ins, del, sub are weighted costs | ||
return nil if this.nil? | ||
return nil if that.nil? | ||
dm = [] # distance matrix | ||
|
||
# Initialize first row values | ||
dm[0] = (0..this.length).collect { |i| i * ins } | ||
fill = [0] * (this.length - 1) | ||
|
||
# Initialize first column values | ||
for i in 1..that.length | ||
dm[i] = [i * del, fill.flatten] | ||
end | ||
|
||
# populate matrix | ||
for i in 1..that.length | ||
for j in 1..this.length | ||
# critical comparison | ||
dm[i][j] = [ | ||
dm[i-1][j-1] + | ||
(this[j-1] == that[i-1] ? 0 : sub), | ||
dm[i][j-1] + ins, | ||
dm[i-1][j] + del | ||
].min | ||
end | ||
end | ||
|
||
# The last value in matrix is the Levenshtein distance between the strings | ||
dm[that.length][this.length] | ||
end | ||
|
||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters