diff --git a/lib/highscore/content.rb b/lib/highscore/content.rb index f0ca22f..cc5b2ed 100644 --- a/lib/highscore/content.rb +++ b/lib/highscore/content.rb @@ -31,7 +31,7 @@ def initialize(content, wordlist = nil) :consonants => 0, :ignore_short_words => true, :ignore_case => false, - :word_pattern => /\w+/, + :word_pattern => /\p{Word}+/u, :stemming => false } end diff --git a/test/highscore/test_content.rb b/test/highscore/test_content.rb index 6469621..e04d947 100644 --- a/test/highscore/test_content.rb +++ b/test/highscore/test_content.rb @@ -1,3 +1,4 @@ +# encoding: utf-8 $:.unshift(File.join(File.dirname(__FILE__), %w{.. .. lib highscore})) require "content" require "test/unit" @@ -32,6 +33,13 @@ def test_keywords_fixnum assert_equal 1, content.keywords.length end + def test_keywords_utf8 + content = 'Schöne Grüße, caractères, русский' + + content = Highscore::Content.new content + assert_equal 4, content.keywords.length + end + def test_vowels_and_consonants keywords = 'foobar RubyGems'.keywords do set :vowels, 2 @@ -93,4 +101,4 @@ def test_stemming # do nothing, just skip this test end end -end \ No newline at end of file +end