Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
  • 4 commits
  • 2 files changed
  • 0 comments
  • 1 contributor

Showing 2 changed files with 301 additions and 150 deletions. Show diff stats Hide diff stats

  1. +247 149 lib/street_address.rb
  2. +54 1 test/test_street_address.rb
396 lib/street_address.rb
@@ -65,7 +65,7 @@
65 65 module StreetAddress
66 66 VERSION = '1.0.1'
67 67 class US
68   - @@directional = {
  68 + Directional = {
69 69 "north" => "N",
70 70 "northeast" => "NE",
71 71 "east" => "E",
@@ -75,9 +75,9 @@ class US
75 75 "west" => "W",
76 76 "northwest" => "NW"
77 77 }
78   - @@direction_code = @@directional.invert
  78 + DirectionCode = Directional.invert
79 79
80   - @@street_type = {
  80 + StreetType = {
81 81 "allee" => "aly",
82 82 "alley" => "aly",
83 83 "ally" => "aly",
@@ -442,10 +442,10 @@ class US
442 442 "wy" => "way"
443 443 }
444 444
445   - @@street_type_list = {}
446   - @@street_type.to_a.each{|item| @@street_type_list[item[0]] = true; @@street_type_list[item[1]] = true}
  445 + StreetTypeList = {}
  446 + StreetType.to_a.each{|item| StreetTypeList[item[0]] = true; StreetTypeList[item[1]] = true}
447 447
448   - @@state_code = {
  448 + StateCode = {
449 449 "alabama" => "AL",
450 450 "alaska" => "AK",
451 451 "american samoa" => "AS",
@@ -507,9 +507,9 @@ class US
507 507 "wyoming" => "WY"
508 508 }
509 509
510   - @@state_name = @@state_code.invert
  510 + StateName = StateCode.invert
511 511
512   - @@state_fips = {
  512 + StateFips = {
513 513 "01" => "AL",
514 514 "02" => "AK",
515 515 "04" => "AZ",
@@ -565,49 +565,93 @@ class US
565 565 "78" => "VI"
566 566 }
567 567
568   - @@fips_state = @@state_fips.invert
  568 + FipsState = StateFips.invert
569 569
570   - @@street_type_regexp = @@street_type_list.keys.join("|")
571   - @@number_regexp = '\d+-?\d*'
572   - @@fraction_regexp = '\d+\/\d+'
573   - @@state_regexp = @@state_code.to_a.join("|").gsub(/ /, "\\s")
574   - @@direct_regexp = @@directional.keys.join("|") + "|" + @@directional.values.sort{|a,b| b.length <=> a.length }.map{|x| f = x.gsub(/(\w)/, '\1.'); [Regexp::quote(f), Regexp::quote(x)] }.join("|")
575   - @@zip_regexp = '(\d{5})(?:-(\d{4}))?'
576   - @@corner_regexp = '(?:\band\b|\bat\b|&|\@)'
577   - @@unit_regexp = '(?:(su?i?te|p\W*[om]\W*b(?:ox)?|dept|apt|apartment|ro*m|fl|unit|box)\W+|\#\W*)([\w-]+)'
578   - @@street_regexp =
  570 + Ordinals = {
  571 + 'first' => 1,
  572 + 'one' => 1,
  573 + 'ten' => 10,
  574 + 'tenth' => 10,
  575 + 'eleventh' => 11,
  576 + 'eleven' => 11,
  577 + 'twelfth' => 12,
  578 + 'twelve' => 12,
  579 + 'thirteenth' => 13,
  580 + 'thirteen' => 13,
  581 + 'fourteenth' => 14,
  582 + 'fourteen' => 14,
  583 + 'fifteenth' => 15,
  584 + 'fifteen' => 15,
  585 + 'sixteenth' => 16,
  586 + 'sixteen' => 16,
  587 + 'seventeenth' => 17,
  588 + 'seventeen' => 17,
  589 + 'eighteenth' => 18,
  590 + 'eighteen' => 18,
  591 + 'nineteenth' => 19,
  592 + 'nineteen' => 19,
  593 + 'second' => 2,
  594 + 'two' => 2,
  595 + 'twentieth' => 20,
  596 + 'twenty' => 20,
  597 + 'third' => 3,
  598 + 'three' => 3,
  599 + 'fourth' => 4,
  600 + 'four' => 4,
  601 + 'fifth' => 5,
  602 + 'five' => 5,
  603 + 'sixth' => 6,
  604 + 'six' => 6,
  605 + 'seventh' => 7,
  606 + 'seven' => 7,
  607 + 'eighth' => 8,
  608 + 'eight' => 8,
  609 + 'ninth' => 9,
  610 + 'nine' => 9
  611 + }
  612 +
  613 + OrdinalsRegexp = Ordinals.keys.join('\b|')
  614 + StreetTypeRegexp = StreetTypeList.keys.join("|")
  615 + NumberRegexp = '\d+-?\d*'
  616 + FractionRegexp = '\d+\/\d+'
  617 + StateRegexp = StateCode.to_a.join("|").gsub(/ /, "\\s")
  618 + DirectRegexp = Directional.keys.join("|") + "|" + Directional.values.sort{|a,b| b.length <=> a.length }.map{|x| f = x.gsub(/(\w)/, '\1.'); [Regexp::quote(f), Regexp::quote(x)] }.join("|")
  619 + ZipRegexp = '(\d{5})(?:-(\d{4}))?'
  620 + CornerRegexp = '(?:\band\b|\bat\b|&|\@)'
  621 + UnitRegexp = '(?:(su?i?te|p\W*[om]\W*b(?:ox)?|dept|apt|apartment|ro*m|fl|unit|box)\W+|\#\W*)([\w-]+)'
  622 + StreetRegexp =
579 623 '(?:
580   - (?:(' + @@direct_regexp + ')\W+
581   - (' + @@street_type_regexp + ')\b)
  624 + (?:(' + DirectRegexp + ')\W+
  625 + (' + StreetTypeRegexp + ')\b)
582 626 |
583   - (?:(' + @@direct_regexp + ')\W+)?
  627 + (?:(' + DirectRegexp + ')\W+)?
584 628 (?:
585 629 ([^,]+)
586   - (?:[^\w,]+(' + @@street_type_regexp + ')\b)
587   - (?:[^\w,]+(' + @@direct_regexp + ')\b)?
  630 + (?:[^\w,]+(' + StreetTypeRegexp + ')\b)
  631 + (?:[^\w,]+(' + DirectRegexp + ')\b)?
588 632 |
589 633 ([^,]*\d)
590   - (' + @@direct_regexp + ')\b
  634 + (' + DirectRegexp + ')\b
591 635 |
592 636 ([^,]+?)
593   - (?:[^\w,]+(' + @@street_type_regexp + ')\b)?
594   - (?:[^\w,]+(' + @@direct_regexp + ')\b)?
  637 + (?:[^\w,]+(' + StreetTypeRegexp + ')\b)?
  638 + (?:[^\w,]+(' + DirectRegexp + ')\b)?
595 639 )
596 640 )'
597   - @@place_regexp =
  641 + PlaceRegexp =
598 642 '(?:
599 643 ([^\d,]+?)\W+
600   - ($' + @@state_regexp + ')\W*
  644 + ($' + StateRegexp + ')\W*
601 645 )?
602   - (?:' + @@zip_regexp + ')?'
  646 + (?:' + ZipRegexp + ')?'
603 647
604   - @@address_regexp =
  648 + AddressRegexp =
605 649 '\A\W*
606   - (' + @@number_regexp + ')\W*
607   - (?:' + @@fraction_regexp + '\W*)?' +
608   - @@street_regexp + '\W+
609   - (?:' + @@unit_regexp + '\W+)?' +
610   - @@place_regexp +
  650 + (' + NumberRegexp + ')\W*
  651 + (?:' + FractionRegexp + '\W*)?' +
  652 + StreetRegexp + '\W+
  653 + (?:' + UnitRegexp + '\W+)?' +
  654 + PlaceRegexp +
611 655 '\W*\Z'
612 656
613 657 class << self
@@ -623,7 +667,7 @@ class << self
623 667
624 668 =end
625 669 def parse(location)
626   - regex = Regexp.new(@@corner_regexp, Regexp::IGNORECASE)
  670 + regex = Regexp.new(CornerRegexp, Regexp::IGNORECASE)
627 671 if regex.match(location)
628 672 z = regex.match(location)
629 673 parse_intersection(location);
@@ -644,28 +688,27 @@ def parse(location)
644 688 =end
645 689 def parse_intersection(inter)
646 690 regex = Regexp.new(
647   - '\A\W*' + @@street_regexp + '\W*?
648   - \s+' + @@corner_regexp + '\s+' +
649   - @@street_regexp + '\W+' +
650   - @@place_regexp + '\W*\Z', Regexp::IGNORECASE + Regexp::EXTENDED)
  691 + '\A\W*' + StreetRegexp + '\W*?
  692 + \s+' + CornerRegexp + '\s+' +
  693 + StreetRegexp + '\W+' +
  694 + PlaceRegexp + '\W*\Z', Regexp::IGNORECASE + Regexp::EXTENDED)
651 695 match = regex.match(inter)
652 696 return if match.nil?
653 697
654   - normalize_address(
655   - StreetAddress::US::Address.new(
656   - :street => match[4] || match[9],
657   - :street_type => match[5],
658   - :suffix => match[6],
659   - :prefix => match[3],
660   - :street2 => match[15] || match[20],
661   - :street_type2 => match[16],
662   - :suffix2 => match[17],
663   - :prefix2 => match[14],
664   - :city => match[23],
665   - :state => match[24],
666   - :postal_code => match[25]
667   - )
  698 + StreetAddress::US::Address.new(
  699 + :street => match[4] || match[9],
  700 + :street_type => match[5],
  701 + :suffix => match[6],
  702 + :prefix => match[3],
  703 + :street2 => match[15] || match[20],
  704 + :street_type2 => match[16],
  705 + :suffix2 => match[17],
  706 + :prefix2 => match[14],
  707 + :city => match[23],
  708 + :state => match[24],
  709 + :postal_code => match[25]
668 710 )
  711 +
669 712 end
670 713
671 714 =begin rdoc
@@ -679,72 +722,25 @@ def parse_intersection(inter)
679 722
680 723 =end
681 724 def parse_address(addr)
682   - regex = Regexp.new(@@address_regexp, Regexp::IGNORECASE + Regexp::EXTENDED)
683   - match = regex.match(addr)
684   - return if match.nil?
  725 + regex = Regexp.new(AddressRegexp, Regexp::IGNORECASE + Regexp::EXTENDED)
  726 + match = regex.match(addr)
  727 + return if match.nil?
685 728
686   - normalize_address(
687   - StreetAddress::US::Address.new(
688   - :number => match[1],
689   - :street => match[5] || match[10] || match[2],
690   - :street_type => match[6] || match[3],
691   - :unit => match[14],
692   - :unit_prefix => match[13],
693   - :suffix => match[7] || match[12],
694   - :prefix => match[4],
695   - :city => match[15],
696   - :state => match[16],
697   - :postal_code => match[17],
698   - :postal_code_ext => match[18]
699   - )
  729 + StreetAddress::US::Address.new(
  730 + :number => match[1],
  731 + :street => match[5] || match[10] || match[2],
  732 + :street_type => match[6] || match[3],
  733 + :unit => match[14],
  734 + :unit_prefix => match[13],
  735 + :suffix => match[7] || match[12],
  736 + :prefix => match[4],
  737 + :city => match[15],
  738 + :state => match[16],
  739 + :postal_code => match[17],
  740 + :postal_code_ext => match[18]
700 741 )
701 742 end
702 743
703   - def state_name #:nodoc:
704   - @@state_name
705   - end
706   -
707   - def fips_state #:nodoc:
708   - @@fips_state
709   - end
710   -
711   - private
712   - def normalize_address(addr)
713   - addr.state = normalize_state(addr.state) unless addr.state.nil?
714   - addr.street_type = normalize_street_type(addr.street_type) unless addr.street_type.nil?
715   - addr.prefix = normalize_directional(addr.prefix) unless addr.prefix.nil?
716   - addr.suffix = normalize_directional(addr.suffix) unless addr.suffix.nil?
717   - addr.street.gsub!(/\b([a-z])/) {|wd| wd.capitalize} unless addr.street.nil?
718   - addr.street_type2 = normalize_street_type(addr.street_type2) unless addr.street_type2.nil?
719   - addr.prefix2 = normalize_directional(addr.prefix2) unless addr.prefix2.nil?
720   - addr.suffix2 = normalize_directional(addr.suffix2) unless addr.suffix2.nil?
721   - addr.street2.gsub!(/\b([a-z])/) {|wd| wd.capitalize} unless addr.street2.nil?
722   - addr.city.gsub!(/\b([a-z])/) {|wd| wd.capitalize} unless addr.city.nil?
723   - addr.unit_prefix.capitalize! unless addr.unit_prefix.nil?
724   - return addr
725   - end
726   -
727   - def normalize_state(state)
728   - if state.length < 3
729   - state.upcase
730   - else
731   - @@state_code[state.downcase]
732   - end
733   - end
734   -
735   - def normalize_street_type(s_type)
736   - s_type.downcase!
737   - s_type = @@street_type[s_type] || s_type if @@street_type_list[s_type]
738   - s_type.capitalize
739   - end
740   -
741   - def normalize_directional(dir)
742   - if dir.length < 3
743   - dir.upcase
744   - else
745   - @@directional[dir.downcase]
746   - end
747   - end
748 744 end
749 745
750 746 =begin rdoc
@@ -755,59 +751,161 @@ def normalize_directional(dir)
755 751
756 752 =end
757 753 class Address
758   - attr_accessor :number, :street, :street_type, :unit, :unit_prefix, :suffix, :prefix, :city, :state, :postal_code, :postal_code_ext, :street2, :street_type2, :suffix2, :prefix2
  754 + attr_accessor :number, :street, :street_type, :unit, :unit_prefix,
  755 + :suffix, :prefix, :city, :state, :postal_code,
  756 + :postal_code_ext, :street2, :street_type2, :suffix2,
  757 + :prefix2
759 758
760 759 def initialize(args)
761 760 args.keys.each { |attrib| self.send("#{attrib}=", args[attrib]) }
  761 + normalize!
762 762 end
763 763
764 764 def state_fips
765   - StreetAddress::US::fips_state[@state]
  765 + FipsState[state]
766 766 end
767 767
768 768 def state_name
769   - s_name = StreetAddress::US.state_name[state]
770   - s_name.capitalize unless s_name.nil?
  769 + s_name = StateName[state]
  770 + s_name.capitalize if s_name
771 771 end
772 772
773 773 def intersection?
774 774 !street2.nil?
775 775 end
  776 +
  777 + def street_address
  778 + [prefix,street,street_type,suffix].compact.join(' ')
  779 + end
  780 +
  781 + def street_address2
  782 + [prefix2,street2,street_type2,suffix2].compact.join(' ')
  783 + end
  784 +
  785 + def house_address
  786 + [
  787 + number,
  788 + street_address,
  789 + (unit_prefix || '#' if unit),
  790 + unit
  791 + ].compact.join(' ')
  792 + end
  793 +
  794 + def city_state_postal_code
  795 + [
  796 + ("#{city}," if city),
  797 + state, postal_code, postal_code_ext
  798 + ].compact.join(' ')
  799 + end
776 800
777 801 def to_s
778   - s = ""
779 802 if intersection?
780   - s += prefix + " " unless prefix.nil?
781   - s += street
782   - s += " " + street_type unless street_type.nil?
783   - s += " " + suffix unless suffix.nil?
784   - s += " and"
785   - s += " " + prefix2 unless prefix2.nil?
786   - s += " " + street2
787   - s += " " + street_type2 unless street_type2.nil?
788   - s += " " + suffix2 unless suffix2.nil?
789   - s += ", " + city unless city.nil?
790   - s += ", " + state unless state.nil?
791   - s += " " + postal_code unless postal_code.nil?
  803 + [
  804 + street_address,
  805 + 'and',
  806 + ("#{street_address2}," if street_address2),
  807 + city_state_postal_code
  808 + ].compact.join(' ')
792 809 else
793   - s += number
794   - s += " " + prefix unless prefix.nil?
795   - s += " " + street unless street.nil?
796   - s += " " + street_type unless street_type.nil?
797   - if( !unit_prefix.nil? && !unit.nil? )
798   - s += " " + unit_prefix
799   - s += " " + unit
800   - elsif( unit_prefix.nil? && !unit.nil? )
801   - s += " #" + unit
802   - end
803   - s += " " + suffix unless suffix.nil?
804   - s += ", " + city unless city.nil?
805   - s += ", " + state unless state.nil?
806   - s += " " + postal_code unless postal_code.nil?
807   - s += "-" + postal_code_ext unless postal_code_ext.nil?
  810 + [
  811 + house_address,
  812 + city_state_postal_code
  813 + ].compact.join(', ')
808 814 end
809   - return s
810 815 end
  816 +
  817 + def normalize!
  818 + normalize_state
  819 + normalize_street_types
  820 + normalize_directionals
  821 + normalize_ordinals
  822 + normalize_capitalization
  823 + end
  824 +
  825 + private
  826 +
  827 + def normalize_state
  828 + return unless state
  829 + if state.size < 3
  830 + state.upcase!
  831 + else
  832 + self.state = StateCode[state.downcase]
  833 + end
  834 + end
  835 +
  836 + def normalize_street_types
  837 + self.street_type = normalize_street_type(street_type)
  838 + self.street_type2 = normalize_street_type(street_type2)
  839 + end
  840 +
  841 + def normalize_street_type(str_type)
  842 + return unless str_type
  843 + str_type.downcase!
  844 + str_type = StreetType[str_type].dup if StreetType[str_type]
  845 + str_type.capitalize!
  846 + end
  847 +
  848 + def normalize_directionals
  849 + self.prefix = normalize_directional(prefix)
  850 + self.suffix = normalize_directional(suffix)
  851 + self.prefix2 = normalize_directional(prefix2)
  852 + self.suffix2 = normalize_directional(suffix2)
  853 + end
  854 +
  855 + def normalize_directional(dir)
  856 + return unless dir
  857 + if dir.length < 3
  858 + dir.upcase
  859 + else
  860 + Directional[dir.downcase]
  861 + end
  862 + end
  863 +
  864 + def normalize_ordinals
  865 + self.street = normalize_ordinal(street)
  866 + self.street2 = normalize_ordinal(street2)
  867 + end
  868 +
  869 + def normalize_ordinal(str)
  870 + return unless str
  871 + words = str.scan(Regexp.new(OrdinalsRegexp, true))
  872 + return str if words.empty?
  873 + num = words.inject(0) {|sum, wd| sum += Ordinals[wd.downcase]; sum }
  874 + str.sub(words.join(' '), ordinalize(num))
  875 + end
  876 +
  877 + def normalize_capitalization
  878 + street.gsub!(/\b([a-z])/) {|wd| wd.capitalize} if street
  879 + street2.gsub!(/\b([a-z])/) {|wd| wd.capitalize} if street2
  880 + city.gsub!(/\b([a-z])/) {|wd| wd.capitalize } if city
  881 + unit_prefix.capitalize! if unit_prefix
  882 + end
  883 +
  884 + # Turns a number into an ordinal string used to denote the position in an
  885 + # ordered sequence such as 1st, 2nd, 3rd, 4th.
  886 + #
  887 + # Examples:
  888 + # ordinalize(1) # => "1st"
  889 + # ordinalize(2) # => "2nd"
  890 + # ordinalize(1002) # => "1002nd"
  891 + # ordinalize(1003) # => "1003rd"
  892 + # ordinalize(-11) # => "-11th"
  893 + # ordinalize(-1021) # => "-1021st"
  894 + #
  895 + # Taken from ActiveSupport
  896 + def ordinalize(number)
  897 + if (11..13).include?(number.to_i.abs % 100)
  898 + "#{number}th"
  899 + else
  900 + case number.to_i.abs % 10
  901 + when 1; "#{number}st"
  902 + when 2; "#{number}nd"
  903 + when 3; "#{number}rd"
  904 + else "#{number}th"
  905 + end
  906 + end
  907 + end
  908 +
811 909 end
812 910 end
813 911 end
55 test/test_street_address.rb
@@ -144,5 +144,58 @@ def test_parse
144 144 end
145 145
146 146 end
147   -
  147 +
  148 + def test_should_parse_ordinals
  149 + address = "701 First Avenue, Minneapolis, MN 55403"
  150 + addr = StreetAddress::US.parse(address)
  151 + assert_equal '1st', addr.street
  152 + assert_equal 'Ave', addr.street_type
  153 + end
  154 +
  155 + def test_should_parse_ordinals_on_intersection
  156 + address = "First Avenue and Seventh Street, Minneapolis, MN 55403"
  157 + addr = StreetAddress::US.parse(address)
  158 + assert_equal '7th', addr.street2
  159 + assert_equal 'St', addr.street_type2
  160 + assert_equal '1st', addr.street
  161 + assert_equal 'Ave', addr.street_type
  162 + end
  163 +
  164 + def test_should_have_street_address
  165 + address = "701 First Avenue, Minneapolis, MN 55403"
  166 + addr = StreetAddress::US.parse(address)
  167 + assert_equal '1st Ave', addr.street_address
  168 + end
  169 +
  170 + def test_should_have_house_address
  171 + address = "701 First Avenue, Minneapolis, MN 55403"
  172 + addr = StreetAddress::US.parse(address)
  173 + assert_equal '701 1st Ave', addr.house_address
  174 + end
  175 +
  176 + def test_should_have_house_number_for_suite
  177 + address = "44 Canal Center Plaza Suite 500, Alexandria, VA 22314"
  178 + addr = StreetAddress::US.parse(address)
  179 + assert_equal '44 Canal Center Plz Suite 500', addr.house_address
  180 + end
  181 +
  182 + def test_should_have_street_addresses_on_intersection
  183 + address = "First Avenue and Seventh Street, Minneapolis, MN 55403"
  184 + addr = StreetAddress::US.parse(address)
  185 + assert_equal "1st Ave", addr.street_address
  186 + assert_equal "7th St", addr.street_address2
  187 + end
  188 +
  189 + def test_should_have_to_s
  190 + address = "701 First Avenue, Minneapolis, MN 55403"
  191 + addr = StreetAddress::US.parse(address)
  192 + assert_equal '701 1st Ave, Minneapolis, MN 55403', addr.to_s
  193 + end
  194 +
  195 + def test_should_have_to_s_on_intersection
  196 + address = "First Avenue and Seventh Street, Minneapolis, MN 55403"
  197 + addr = StreetAddress::US.parse(address)
  198 + assert_equal "1st Ave and 7th St, Minneapolis, MN 55403", addr.to_s
  199 + end
  200 +
148 201 end

No commit comments for this range

Something went wrong with that request. Please try again.