Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
branch: master
Damián Silvani March 11, 2011
file 100 lines (83 sloc) 3.223 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
# encoding: utf-8
require 'helper'

class TestStemmer < Test::Unit::TestCase

  def test_stemmer_creation
    assert_kind_of ::Lingua::Stemmer, ::Lingua::Stemmer.new
  end

  def test_exceptions
    assert_raise ::Lingua::StemmerError do
      # invalid encoding for language
      ::Lingua::Stemmer.new :language => "ro", :encoding => "ISO_8859_1"
    end
    assert_raise ::Lingua::StemmerError do
      # invalid language
      ::Lingua::Stemmer.new :language => "cat"
    end
  end

  def test_latin
    assert_nothing_raised do
      ::Lingua::Stemmer.new :language => "latin", :encoding => "ISO_8859_1"
    end
  end

  def test_stem
    s= ::Lingua::Stemmer.new(:language => "en", :encoding => "UTF_8")
    assert_equal s.stem("obnoxious"), "obnoxi"
    assert_equal s.stem("personalities"), "person"
  end

  def test_string_stemmer
    assert_equal ::Lingua.stemmer("installation", :language => "en"), "instal"
    stemmer= ::Lingua.stemmer("installation", :language => "fr") do | word |
      assert_equal word, "install"
    end
    assert_kind_of ::Lingua::Stemmer, stemmer

    if RUBY_VERSION >= '1.9'
      assert_equal stemmer.encoding, Encoding::UTF_8
    else
      assert_equal stemmer.encoding, "UTF_8"
    end
  end

  def test_array_stemmer
    results= ::Lingua.stemmer(["one", "two"], :language => "de", :encoding => "ISO_8859_1")
    assert_equal 2, results.size
    assert_kind_of Array, results
  end

  def test_stemmer_subclass
    assert_raises(RuntimeError) do
      Class.new(Lingua::Stemmer) {
        def native_init a, b; end
      }.new.stem('cow')
    end
  end

  def test_default_encoding_option
    if RUBY_VERSION >= '1.9'
      assert_equal ::Lingua::Stemmer.new.encoding, Encoding::UTF_8
    else
      assert_equal ::Lingua::Stemmer.new.encoding, "UTF_8"
    end
  end

  def test_different_encoding_options
    if RUBY_VERSION >= '1.9'
      assert_equal ::Lingua::Stemmer.new(:encoding => "ISO_8859_1").encoding, Encoding::ISO_8859_1
      assert_equal ::Lingua::Stemmer.new(:encoding => "UTF-8").encoding, Encoding::UTF_8
      assert_equal ::Lingua::Stemmer.new(:encoding => "utf-8").encoding, Encoding::UTF_8
      assert_equal ::Lingua::Stemmer.new(:encoding => :ISO_8859_1).encoding, Encoding::ISO_8859_1
      assert_equal ::Lingua::Stemmer.new(:encoding => Encoding::UTF_8).encoding, Encoding::UTF_8
    else
      assert_equal ::Lingua::Stemmer.new(:encoding => "ISO_8859_1").encoding, "ISO_8859_1"
      assert_equal ::Lingua::Stemmer.new(:encoding => "UTF-8").encoding, "UTF_8"
      assert_equal ::Lingua::Stemmer.new(:encoding => "utf-8").encoding, "UTF_8"
      assert_equal ::Lingua::Stemmer.new(:encoding => :ISO_8859_1).encoding, "ISO_8859_1"
    end
  end

  if RUBY_VERSION >= '1.9'
    def test_string_encoding
      word = "așezare"

      stem = ::Lingua.stemmer(word, :language => "ro", :encoding => "UTF_8")
      assert_equal word.encoding, stem.encoding

      s = ::Lingua::Stemmer.new(:language => "ro", :encoding => "UTF_8")
      assert_equal s.stem(word).encoding, word.encoding

      stem = ::Lingua.stemmer("installation", :language => "fr", :encoding => "ISO-8859-1")
      assert_equal stem.encoding, Encoding::ISO_8859_1
    end
  end

end
Something went wrong with that request. Please try again.