Skip to content

Commit

Permalink
make it so words that contains a number (ex. pennyweight) wont get ma…
Browse files Browse the repository at this point in the history
…ngled
  • Loading branch information
jduff committed Jan 1, 2010
1 parent 0428b21 commit 2412ad6
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 17 deletions.
40 changes: 25 additions & 15 deletions lib/numerizer.rb
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -26,18 +26,22 @@ class Numerizer
['nineteen', '19'], ['nineteen', '19'],
['ninteen', '19'], # Common mis-spelling ['ninteen', '19'], # Common mis-spelling
['zero', '0'], ['zero', '0'],
['one', '1'],
['two', '2'],
['three', '3'],
['four(\W|$)', '4\1'], # The weird regex is so that it matches four but not fourty
['five', '5'],
['six(\W|$)', '6\1'],
['seven(\W|$)', '7\1'],
['eight(\W|$)', '8\1'],
['nine(\W|$)', '9\1'],
['ten', '10'], ['ten', '10'],
['\ba[\b^$]', '1'] # doesn't make sense for an 'a' at the end to be a 1 ['\ba[\b^$]', '1'] # doesn't make sense for an 'a' at the end to be a 1
] ]

SINGLE_NUMS = [
['one', 1],
['two', 2],
['three', 3],
#['four(\W|$)', '4\1'], # The weird regex is so that it matches four but not fourty
['four', 4],
['five', 5],
['six', 6],
['seven', 7],
['eight', 8],
['nine', 9]
]


TEN_PREFIXES = [ ['twenty', 20], TEN_PREFIXES = [ ['twenty', 20],
['thirty', 30], ['thirty', 30],
Expand Down Expand Up @@ -76,16 +80,22 @@ def self.numerize(string)


# easy/direct replacements # easy/direct replacements


DIRECT_NUMS.each do |dn| (DIRECT_NUMS + SINGLE_NUMS).each do |dn|
string.gsub!(/#{dn[0]}/i, '<num>' + dn[1]) # string.gsub!(/#{dn[0]}/i, '<num>' + dn[1])
string.gsub!(/(^|\W+)#{dn[0]}($|\W+)/i) {"#{$1}<num>" + dn[1].to_s + $2}
end end


# ten, twenty, etc. # ten, twenty, etc.
# TEN_PREFIXES.each do |tp|
# string.gsub!(/(?:#{tp[0]}) *<num>(\d(?=[^\d]|$))*/i) {'<num>' + (tp[1] + $1.to_i).to_s}
# end
TEN_PREFIXES.each do |tp| TEN_PREFIXES.each do |tp|
string.gsub!(/(?:#{tp[0]}) *<num>(\d(?=[^\d]|$))*/i) {'<num>' + (tp[1] + $1.to_i).to_s} SINGLE_NUMS.each do |dn|
end string.gsub!(/(^|\W+)#{tp[0]}#{dn[0]}($|\W+)/i) {
TEN_PREFIXES.each do |tp| "#{$1}<num>" + (tp[1] + dn[1]).to_s + $2
string.gsub!(/#{tp[0]}/i) { '<num>' + tp[1].to_s } }
end
string.gsub!(/(^|\W+)#{tp[0]}($|\W+)/i) { "#{$1}<num>" + tp[1].to_s + $2 }
end end


# handle fractions # handle fractions
Expand Down
13 changes: 11 additions & 2 deletions test/test_numerizer.rb
Original file line number Original file line Diff line number Diff line change
@@ -1,4 +1,4 @@
require 'test_helper' require File.join(File.dirname(__FILE__), 'test_helper')


class NumerizerTest < Test::Unit::TestCase class NumerizerTest < Test::Unit::TestCase
def test_straight_parsing def test_straight_parsing
Expand Down Expand Up @@ -31,7 +31,7 @@ def test_straight_parsing
1_200 => 'twelve hundred', 1_200 => 'twelve hundred',
1_200 => 'one thousand two hundred', 1_200 => 'one thousand two hundred',
17_000 => 'seventeen thousand', 17_000 => 'seventeen thousand',
21_473 => 'twentyone-thousand-four-hundred-and-seventy-three', 21_473 => 'twentyone-thousand-four-hundred-and-seventy-three',
74_002 => 'seventy four thousand and two', 74_002 => 'seventy four thousand and two',
99_999 => 'ninety nine thousand nine hundred ninety nine', 99_999 => 'ninety nine thousand nine hundred ninety nine',
100_000 => '100 thousand', 100_000 => '100 thousand',
Expand All @@ -48,6 +48,11 @@ def test_straight_parsing
assert_equal "2.5", Numerizer.numerize("two and a half") assert_equal "2.5", Numerizer.numerize("two and a half")
assert_equal "1/2", Numerizer.numerize("one half") assert_equal "1/2", Numerizer.numerize("one half")
end end

def test_combined_double_digets
assert_equal "21", Numerizer.numerize("twentyone")
assert_equal "37", Numerizer.numerize("thirtyseven")
end


def test_fractions_in_words def test_fractions_in_words
assert_equal "1/4", Numerizer.numerize("1 quarter") assert_equal "1/4", Numerizer.numerize("1 quarter")
Expand All @@ -65,6 +70,10 @@ def test_fractional_addition
assert_equal "2.375", Numerizer.numerize("two and three eighths") assert_equal "2.375", Numerizer.numerize("two and three eighths")
assert_equal "3.5 hours", Numerizer.numerize("three and a half hours") assert_equal "3.5 hours", Numerizer.numerize("three and a half hours")
end end

def test_word_with_a_number
assert_equal "pennyweight", Numerizer.numerize("pennyweight")
end


def test_edges def test_edges
assert_equal "27 Oct 2006 7:30am", Numerizer.numerize("27 Oct 2006 7:30am") assert_equal "27 Oct 2006 7:30am", Numerizer.numerize("27 Oct 2006 7:30am")
Expand Down

0 comments on commit 2412ad6

Please sign in to comment.