Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
tree: c5cca8059a
Fetching contributors…

Cannot retrieve contributors at this time

64 lines (54 sloc) 1.56 kB
# coding: utf-8
require 'moji'
module GyomuRuby
module WordNormalizer
extend self
def to_doublewidth_hiragana(word)
normalize_symbol_unicode(Moji.kata_to_hira(Moji.han_to_zen(word || '', Moji::KANA)))
end
alias to_kana to_doublewidth_hiragana
def normalize_unconversion_symbol_unicode(word)
word.tap do |w|
w.gsub!(/\u2212|\u2012|\u2013|\u2014/){ "\u30fc" }
w.gsub!(/\u301c/){ "\uff5e" }
end
end
def normalize_symbol_unicode(word)
normalize_unconversion_symbol_unicode(word).tap do |w|
w.gsub!(/\u002d|\uff0d|\u2015|\u2500/){ "\u30fc" }
end
end
def to_time(word)
if word.acts_like?(:time) && word.acts_like_time?
word
else
Time.parse(word.to_s) rescue nil
end
end
def to_date(word)
if word.acts_like?(:date) && word.acts_like_date?
word
elsif word.respond_to?(:to_date)
word.to_date
else
Date.parse(word.to_s) rescue nil
end
end
def to_array(words)
if words.is_a?(Hash)
words.inject([]){|memo, (k,v)| v.blank? ? memo : memo << k.to_s }
else
Array(words)
end
end
# XXX 0x20が壊れる詳細を追いかける
def to_hankaku(word)
return '' if word.blank?
Moji.zen_to_han(Moji.han_to_zen(normalize_symbol_unicode(word), Moji::SYMBOL)).gsub(/\uff70/){ "\u002d" }
end
def to_numeric(word)
return word.to_s if word.is_a?(Numeric)
to_hankaku(word || '').scan(/\d/).join
end
end
end
Jump to Line
Something went wrong with that request. Please try again.