Permalink
Browse files

Merge pull request #5 from drbrain/find_dictionaries

Added automatic dictionary determination from path and ENV.
  • Loading branch information...
2 parents a5a237c + 24e3b45 commit 350d70ad012c3725f4985e38eb45542e45786625 @ahx committed Apr 17, 2012
Showing with 164 additions and 17 deletions.
  1. +14 −5 README.rdoc
  2. +53 −6 lib/hunspell-ffi.rb
  3. +12 −0 test/en_US.aff
  4. +4 −0 test/en_US.dic
  5. +81 −6 test/test_hunspell.rb
View
@@ -20,12 +20,22 @@ On Debian:
gem install hunspell-ffi
== Usage
+
require 'hunspell-ffi'
+
+ # Detect language from ENV:
+ dict = Hunspell.new("/path/to/dictionaries")
+
+ # Directly specify language:
+ dict = Hunspell.new("/path/to/dictionaries", "en_US")
+
+ # directly specify dictionaries (legacy)
dict = Hunspell.new("path/to/cakes.aff", "path/to/cakes.dic")
- dict.spell("Baumkuchen") # => true same as #check, #check?
- dict.spell("Bomcuken") # => false
- dict.check?("Bomcuken") # => false
- dict.suggest("Baumgurken") # => ["Baumkuchen"]
+
+ dict.spell("walked") # => true same as #check, #check?
+ dict.spell("woked") # => false
+ dict.check?("woked") # => false
+ dict.suggest("woked") # => ["woke", "worked", "waked", "woken", ...]
dict.suggest("qwss43easd") # => []
# Modify the run-time dictionary:
@@ -42,7 +52,6 @@ Hereby placed under public domain, do what you want, just do not hold me account
== Help wanted
I hear Hunspell has some superpowers like stemming and some that i never even heard of.
Maybe you want to help out to bring something of that power into the ruby world.
-Or maybe we can think of a nice way to find to locate .dict files on a system or something.
Anyways, feel free to fork and send pull requests. kthxbye. Andreas.
The source is on GitHub: https://github.com/ahx/hunspell-ffi
View
@@ -11,13 +11,60 @@ module C
attach_function :Hunspell_add_with_affix, [:pointer, :string, :string], :int
attach_function :Hunspell_remove, [:pointer, :string], :int
end
-
- def initialize(affpath, dicpath)
- warn("Hunspell could not find aff-file #{affpath}") unless File.exist?(affpath)
- warn("Hunspell could not find dic-file #{affpath}") unless File.exist?(dicpath)
- @handler = C.Hunspell_create(affpath, dicpath)
+
+ ##
+ # The affix file used to check words
+
+ attr_reader :affix
+
+ ##
+ # The dictionary file used to check words
+
+ attr_reader :dictionary
+
+ ##
+ # Creates a spell-checking instance. If only +path+ is given, Hunspell will
+ # look for a dictionary using the language of your current locale, checking
+ # LC_ALL, LC_MESSAGES and LANG. If you would like to spell check words of a
+ # specific language provide it as the second parameter, +language+.
+ #
+ # You may also directly provide the affix file as the +path+ argument and
+ # the dictionary file as the +language+ argument, provided they both exist.
+ # This is for legacy use of Hunspell.
+
+ def initialize(path, language = nil)
+ if File.exist?(path) and language and File.exist?(language) then
+ @affix = path
+ @dictionary = language
+ else
+ language ||= find_language
+
+ @affix = File.join path, "#{language}.aff"
+ @dictionary = File.join path, "#{language}.dic"
+ end
+
+ raise ArgumentError,
+ "Hunspell could not find affix file #{@affix}" unless
+ File.exist?(@affix)
+ raise ArgumentError,
+ "Hunspell could not find dictionary file #{@dictionary}" unless
+ File.exist?(@dictionary)
+
+ @handler = C.Hunspell_create @affix, @dictionary
end
-
+
+ def find_language
+ %w[LC_ALL LC_MESSAGES LANG].each do |var|
+ next unless value = ENV[var]
+
+ lang, charset = value.split('.', 2)
+
+ return lang if charset
+ end
+
+ nil
+ end
+
# Returns true for a known word or false.
def spell(word)
C.Hunspell_spell(@handler, word)
View
@@ -0,0 +1,12 @@
+SET UTF-8
+TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ'
+REP 2
+REP f ph
+REP ph f
+
+PFX A Y 1
+PFX A 0 re .
+
+SFX B Y 2
+SFX B 0 ed [^y]
+SFX B y ied y
View
@@ -0,0 +1,4 @@
+3
+hello
+try/B
+work/AB
View
@@ -4,14 +4,36 @@
class TestHunspell < Test::Unit::TestCase
def setup
@dict_dir = File.dirname(__FILE__)
- @dict = Hunspell.new("#{@dict_dir}/cakes.aff", "#{@dict_dir}/cakes.dic")
+ @dict = Hunspell.new(@dict_dir, "en_US")
+ end
+
+ def test_initialize
+ assert_equal File.join(@dict_dir, "en_US.aff"), @dict.affix
+ assert_equal File.join(@dict_dir, "en_US.dic"), @dict.dictionary
+ end
+
+ def test_initialize_legacy
+ h = Hunspell.new("#{@dict_dir}/cakes.aff", "#{@dict_dir}/cakes.dic")
+
+ assert_equal File.join(@dict_dir, "cakes.aff"), h.affix
+ assert_equal File.join(@dict_dir, "cakes.dic"), h.dictionary
+ end
+
+ def test_initialize_missing
+ e = assert_raises ArgumentError do
+ Hunspell.new(@dict_dir, "en_CA")
+ end
+
+ dict = File.join(@dict_dir, "en_CA.aff")
+ assert_equal "Hunspell could not find affix file #{dict}", e.message
end
def test_basic_spelling
- assert @dict.spell("Baumkuchen") == true
- assert @dict.check("Baumkuchen") == true # check alias
- assert @dict.spell("Bomcuken") == false
- assert_equal ["Baumkuchen"], @dict.suggest("Baumgurken")
+ assert @dict.spell("worked")
+ assert @dict.check("worked") # check alias
+ assert !@dict.spell("working")
+
+ assert_equal ["worked", "work"], @dict.suggest("woked")
assert_equal [], @dict.suggest("qwss43easd")
end
@@ -23,4 +45,57 @@ def test_dict_modifications
assert @dict.spell("Neuer Kuchen") == false
# TODO test add_with_affix
end
-end
+
+ def test_find_langauge_none
+ orig_LC_ALL = ENV["LC_ALL"]
+ orig_LC_MESSAGES = ENV["LC_ALL"]
+ orig_LANG = ENV["LANG"]
+
+ ENV.delete "LC_ALL"
+ ENV.delete "LC_MESSAGES"
+ ENV.delete "LANG"
+
+ assert_nil @dict.find_language
+ ensure
+ ENV["LC_ALL"] = orig_LC_ALL
+ ENV["LC_MESSAGES"] = orig_LC_MESSAGES
+ ENV["LANG"] = orig_LANG
+ end
+
+ def test_find_langauge_LANG
+ orig_LC_ALL = ENV["LC_ALL"]
+ orig_LC_MESSAGES = ENV["LC_ALL"]
+ orig_LANG = ENV["LANG"]
+
+ ENV.delete "LC_ALL"
+ ENV.delete "LC_MESSAGES"
+ ENV["LANG"] = "en_CA.UTF-8"
+
+ assert_equal "en_CA", @dict.find_language
+ ensure
+ ENV["LC_ALL"] = orig_LC_ALL
+ ENV["LC_MESSAGES"] = orig_LC_MESSAGES
+ ENV["LANG"] = orig_LANG
+ end
+
+ def test_find_langauge_LC_ALL
+ orig_LC_ALL = ENV["LC_ALL"]
+ ENV["LC_ALL"] = "en_CA.UTF-8"
+
+ assert_equal "en_CA", @dict.find_language
+ ensure
+ ENV["LC_ALL"] = orig_LC_ALL
+ end
+
+ def test_find_langauge_LC_MESSAGES
+ orig_LC_ALL = ENV["LC_ALL"]
+ orig_LC_MESSAGES = ENV["LC_ALL"]
+ ENV.delete "LC_ALL"
+ ENV["LC_MESSAGES"] = "en_CA.UTF-8"
+
+ assert_equal "en_CA", @dict.find_language
+ ensure
+ ENV["LC_ALL"] = orig_LC_ALL
+ ENV["LC_MESSAGES"] = orig_LC_MESSAGES
+ end
+end

0 comments on commit 350d70a

Please sign in to comment.