Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

make it so words that contains a number (ex. pennyweight) wont get ma…

…ngled
  • Loading branch information...
commit 2412ad6624d42dae9234179609ccc76ff9cb097a 1 parent 0428b21
@jduff authored
Showing with 36 additions and 17 deletions.
  1. +25 −15 lib/numerizer.rb
  2. +11 −2 test/test_numerizer.rb
View
40 lib/numerizer.rb
@@ -26,18 +26,22 @@ class Numerizer
['nineteen', '19'],
['ninteen', '19'], # Common mis-spelling
['zero', '0'],
- ['one', '1'],
- ['two', '2'],
- ['three', '3'],
- ['four(\W|$)', '4\1'], # The weird regex is so that it matches four but not fourty
- ['five', '5'],
- ['six(\W|$)', '6\1'],
- ['seven(\W|$)', '7\1'],
- ['eight(\W|$)', '8\1'],
- ['nine(\W|$)', '9\1'],
['ten', '10'],
['\ba[\b^$]', '1'] # doesn't make sense for an 'a' at the end to be a 1
]
+
+ SINGLE_NUMS = [
+ ['one', 1],
+ ['two', 2],
+ ['three', 3],
+ #['four(\W|$)', '4\1'], # The weird regex is so that it matches four but not fourty
+ ['four', 4],
+ ['five', 5],
+ ['six', 6],
+ ['seven', 7],
+ ['eight', 8],
+ ['nine', 9]
+ ]
TEN_PREFIXES = [ ['twenty', 20],
['thirty', 30],
@@ -76,16 +80,22 @@ def self.numerize(string)
# easy/direct replacements
- DIRECT_NUMS.each do |dn|
- string.gsub!(/#{dn[0]}/i, '<num>' + dn[1])
+ (DIRECT_NUMS + SINGLE_NUMS).each do |dn|
+ # string.gsub!(/#{dn[0]}/i, '<num>' + dn[1])
+ string.gsub!(/(^|\W+)#{dn[0]}($|\W+)/i) {"#{$1}<num>" + dn[1].to_s + $2}
end
# ten, twenty, etc.
+ # TEN_PREFIXES.each do |tp|
+ # string.gsub!(/(?:#{tp[0]}) *<num>(\d(?=[^\d]|$))*/i) {'<num>' + (tp[1] + $1.to_i).to_s}
+ # end
TEN_PREFIXES.each do |tp|
- string.gsub!(/(?:#{tp[0]}) *<num>(\d(?=[^\d]|$))*/i) {'<num>' + (tp[1] + $1.to_i).to_s}
- end
- TEN_PREFIXES.each do |tp|
- string.gsub!(/#{tp[0]}/i) { '<num>' + tp[1].to_s }
+ SINGLE_NUMS.each do |dn|
+ string.gsub!(/(^|\W+)#{tp[0]}#{dn[0]}($|\W+)/i) {
+ "#{$1}<num>" + (tp[1] + dn[1]).to_s + $2
+ }
+ end
+ string.gsub!(/(^|\W+)#{tp[0]}($|\W+)/i) { "#{$1}<num>" + tp[1].to_s + $2 }
end
# handle fractions
View
13 test/test_numerizer.rb
@@ -1,4 +1,4 @@
-require 'test_helper'
+require File.join(File.dirname(__FILE__), 'test_helper')
class NumerizerTest < Test::Unit::TestCase
def test_straight_parsing
@@ -31,7 +31,7 @@ def test_straight_parsing
1_200 => 'twelve hundred',
1_200 => 'one thousand two hundred',
17_000 => 'seventeen thousand',
- 21_473 => 'twentyone-thousand-four-hundred-and-seventy-three',
+ 21_473 => 'twentyone-thousand-four-hundred-and-seventy-three',
74_002 => 'seventy four thousand and two',
99_999 => 'ninety nine thousand nine hundred ninety nine',
100_000 => '100 thousand',
@@ -48,6 +48,11 @@ def test_straight_parsing
assert_equal "2.5", Numerizer.numerize("two and a half")
assert_equal "1/2", Numerizer.numerize("one half")
end
+
+ def test_combined_double_digets
+ assert_equal "21", Numerizer.numerize("twentyone")
+ assert_equal "37", Numerizer.numerize("thirtyseven")
+ end
def test_fractions_in_words
assert_equal "1/4", Numerizer.numerize("1 quarter")
@@ -65,6 +70,10 @@ def test_fractional_addition
assert_equal "2.375", Numerizer.numerize("two and three eighths")
assert_equal "3.5 hours", Numerizer.numerize("three and a half hours")
end
+
+ def test_word_with_a_number
+ assert_equal "pennyweight", Numerizer.numerize("pennyweight")
+ end
def test_edges
assert_equal "27 Oct 2006 7:30am", Numerizer.numerize("27 Oct 2006 7:30am")
Please sign in to comment.
Something went wrong with that request. Please try again.