Permalink
Browse files

The encoding of strings from hunspell now matches the dictionary enco…

…ding
  • Loading branch information...
1 parent 1ac9481 commit a0513cc55bbbfb6069ea3f91f2807ba750e6027e @drbrain drbrain committed with Apr 30, 2012
Showing with 28 additions and 0 deletions.
  1. +17 −0 lib/hunspell-ffi.rb
  2. +4 −0 test/en_US.aff
  3. +1 −0 test/en_US.dic
  4. +6 −0 test/test_hunspell.rb
View
@@ -17,6 +17,7 @@ module C
attach_function :Hunspell_add_with_affix, [:pointer, :string, :string], :int
attach_function :Hunspell_analyze, [:pointer, :pointer, :string], :int
attach_function :Hunspell_free_list, [:pointer, :pointer, :int], :void
+ attach_function :Hunspell_get_dic_encoding, [:pointer], :string
attach_function :Hunspell_remove, [:pointer, :string], :int
attach_function :Hunspell_stem, [:pointer, :pointer, :string], :int
end
@@ -60,6 +61,16 @@ def initialize(path, language = nil)
File.exist?(@dictionary)
@handler = C.Hunspell_create @affix, @dictionary
+ @dic_encoding = nil
+
+ if Object.const_defined? :Encoding then
+ begin
+ encoding_name = C.Hunspell_get_dic_encoding @handler
+ @dic_encoding = Encoding.find encoding_name
+ rescue ArgumentError
+ # unknown encoding name, results will be ASCII-8BIT
+ end
+ end
end
def find_language
@@ -121,6 +132,12 @@ def read_list(list_pointer, len)
C.Hunspell_free_list(@handler, list_pointer, len)
+ if @dic_encoding then
+ strings.map do |string|
+ string.force_encoding @dic_encoding
+ end
+ end
+
strings
end
View
@@ -10,3 +10,7 @@ PFX A 0 re .
SFX B Y 2
SFX B 0 ed [^y]
SFX B y ied y
+
+OCONV 1
+OCONV ' ’
+
View
@@ -1,4 +1,5 @@
3
hello
+hots's
try/B
work/AB
@@ -106,4 +106,10 @@ def test_find_langauge_LC_MESSAGES
def test_stem
assert_equal %w[hello], @dict.stem("hello")
end
+
+ def test_suggest
+ suggestions = @dict.suggest "HOWTOs"
+
+ assert_equal %w[Hots’s], suggestions
+ end
end

0 comments on commit a0513cc

Please sign in to comment.