Permalink
Browse files

initial import

  • Loading branch information...
0 parents commit 1102c840ca652097602e2a10dcf2463c611c3733 @coderifous committed Nov 14, 2008
Showing with 318 additions and 0 deletions.
  1. +3 −0 .gitignore
  2. +23 −0 LICENSE.textile
  3. +45 −0 README.textile
  4. +14 −0 Rakefile
  5. +162 −0 lib/address_extractor.rb
  6. +71 −0 test/test_address_extractor.rb
@@ -0,0 +1,3 @@
+pkg
+doc
+Manifest
@@ -0,0 +1,23 @@
+h4. Copyright and License
+
+The MIT License
+
+Copyright (c) 2008 Jim Garvin
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
@@ -0,0 +1,45 @@
+h1. AddressExtractor
+
+Find and/or replace mailing addresses in strings.
+
+h2. Examples
+
+<pre><code>
+string = <<EOF
+Please send the package to 123 Foo St., Someplace FL
+
+My phone number is 123-1234 and St. Marc of Israel can be reached
+via mail at:
+ 123 Goob Avenue
+ Apt 123
+ Nice Town CA 123456
+EOF
+
+# Find first address
+AddressExtractor.first_address(string) # => { :street1 => "123 Foo St.", :city => "Someplace", :state => "FL" }
+
+# Find all addresses
+AddressExtractor.find_addresses(string) # =>
+ # [
+ # { :street1 => "123 Foo St.", :city => "Someplace", :state => "FL" }
+ # { :street1 => "123 Goob Avenue.", :street2 => "Apt 123", :city => "Nice Town", :state => "CA", :zip => "123456" }
+ # ]
+
+# Do a gsub on first address
+new_string = AddressExtractor.replace_first_address(string) do |address_hash, address_string|
+ map_link_to(address_string)
+end
+
+# Do a gsub on all addresses
+new_string = AddressExtractor.replace_addresses(string) do |address_hash, address_string|
+ map_link_to(address_string)
+end
+</code></pre>
+
+h3. About
+
+Written by Jim Garvin at RubyConf '08 at the request of Chris Murphy and Ryan McGeary so they could use it in Yarp.com.
+
+You can use it, too.
+
+The address-finding regex may be a bit naive, I'll gladly accept pull requests that add to the test data and tests.
@@ -0,0 +1,14 @@
+require 'rubygems'
+require 'rake'
+require 'echoe'
+
+Echoe.new('address_extractor', '0.1.0') do |p|
+ p.description = "Give it text. It finds addresses in it."
+ p.url = "http://github.com/coderifous/address_extractor"
+ p.author = "Jim Garvin"
+ p.email = "jim at thegarvin dot com"
+ p.ignore_pattern = ["tmp/*", "script/*"]
+ p.development_dependencies = []
+end
+
+Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
@@ -0,0 +1,162 @@
+class AddressExtractor
+ class << self
+
+ def first_address(string)
+ hashify_results string.scan(ADDRESS_PATTERN).first
+ end
+
+ def find_addresses(string)
+ string.scan(ADDRESS_PATTERN).collect { |a| hashify_results(a) }.compact
+ end
+
+ def replace_first_address(string)
+ hash = first_address(string)
+ string.sub(ADDRESS_PATTERN) do |match|
+ yield(hash, $&)
+ end
+ end
+
+ def replace_addresses(string)
+ string.gsub(ADDRESS_PATTERN) do |match|
+ hash = hashify_results match.scan(ADDRESS_PATTERN).first
+ useful_address?(hash) ? yield(hash, $&) : match
+ end
+ end
+
+ def hashify_results(matches)
+ result = { }
+ capture_index = 0
+ CAPTURE_MAP.each do |field|
+ result[field] = matches[capture_index].to_s.chomp if matches[capture_index]
+ capture_index += 1
+ end
+ useful_address?(result) ? result : nil
+ end
+
+ def useful_address?(hash)
+ hash &&
+ hash[:street1] && ( hash[:zip] || hash[:city] && hash[:state] )
+ end
+
+ end
+
+ CAPTURE_MAP = [ :street1, :street2, :city, :state, :zip, :zip ]
+
+ STATES = <<-EOF
+ ALABAMA AL
+ ALASKA AK
+ AMERICAN SAMOA AS
+ ARIZONA AZ
+ ARKANSAS AR
+ CALIFORNIA CA
+ COLORADO CO
+ CONNECTICUT CT
+ DELAWARE DE
+ DISTRICT OF COLUMBIA DC
+ FEDERATED STATES OF MICRONESIA FM
+ FLORIDA FL
+ GEORGIA GA
+ GUAM GU
+ HAWAII HI
+ IDAHO ID
+ ILLINOIS IL
+ INDIANA IN
+ IOWA IA
+ KANSAS KS
+ KENTUCKY KY
+ LOUISIANA LA
+ MAINE ME
+ MARSHALL ISLANDS MH
+ MARYLAND MD
+ MASSACHUSETTS MA
+ MICHIGAN MI
+ MINNESOTA MN
+ MISSISSIPPI MS
+ MISSOURI MO
+ MONTANA MT
+ NEBRASKA NE
+ NEVADA NV
+ NEW HAMPSHIRE NH
+ NEW JERSEY NJ
+ NEW MEXICO NM
+ NEW YORK NY
+ NORTH CAROLINA NC
+ NORTH DAKOTA ND
+ NORTHERN MARIANA ISLANDS MP
+ OHIO OH
+ OKLAHOMA OK
+ OREGON OR
+ PALAU PW
+ PENNSYLVANIA PA
+ PUERTO RICO PR
+ RHODE ISLAND RI
+ SOUTH CAROLINA SC
+ SOUTH DAKOTA SD
+ TENNESSEE TN
+ TEXAS TX
+ UTAH UT
+ VERMONT VT
+ VIRGIN ISLANDS VI
+ VIRGINIA VA
+ WASHINGTON WA
+ WEST VIRGINIA WV
+ WISCONSIN WI
+ WYOMING WY
+ EOF
+
+ STATE_REGEX = STATES.split(/\n/).collect{ |n| n.scan(/(\w.*\w)\s*([A-Z]{2})\s*$/) }.join("|")
+
+ SECONDARY_UNIT_DESIGNATORS = <<-EOF
+ APARTMENT APT
+ BASEMENT BSMT
+ BUILDING BLDG
+ DEPARTMENT DEPT
+ FLOOR FL
+ FRONT FRNT
+ HANGAR HNGR
+ LOBBY LBBY
+ LOT LOT
+ LOWER LOWR
+ OFFICE OFC
+ PENTHOUSE PH
+ PIER PIER
+ REAR REAR
+ ROOM RM
+ SIDE SIDE
+ SLIP SLIP
+ SPACE SPC
+ STOP STOP
+ SUITE STE
+ TRAILER TRLR
+ UNIT UNIT
+ UPPER UPPR
+ EOF
+
+ SECONDARY_UNIT_DESIGNATORS_REGEX = SECONDARY_UNIT_DESIGNATORS.split(/\n/).collect{ |n| n.scan(/(\w+)\s*(\w+)\s*$/) }.join("|")
+
+ ADDRESS_PATTERN = /
+ (
+ \d+ # A few numbers
+ \s+
+ (?:[A-Za-z'.-]+\s?){1,3} # Followed by a street name
+ )
+ \s* ,? \s*
+ (
+ (?:\d+\s+)? # a secondary unit, optionally
+ (?:#{SECONDARY_UNIT_DESIGNATORS_REGEX})
+ (?:\s+\d+)?
+ )?
+ \s* ,? \s* # a comma, optionally
+ (?:
+ (?:
+ ((?:[A-Za-z]+\s?){1,3}) # city
+ \s+
+ \b(#{STATE_REGEX})\b # state
+ \s* ,? \s* # a comma, optionally
+ (\d{6})? # a zip code, optionally
+ )
+ | # or, instead of city and state
+ (\d{6})? # a lone zip code will do
+ )
+ /xi
+end
@@ -0,0 +1,71 @@
+$: << File.dirname(__FILE__)+"/../lib"
+
+require 'test/unit'
+require 'address_extractor.rb'
+
+class AddressExtractorTest < Test::Unit::TestCase
+
+ def test_first_address_extraction
+ address = AddressExtractor.first_address(DATA1)
+ assert_first_address(address)
+ end
+
+ def test_find_addresses
+ addresses = AddressExtractor.find_addresses(DATA1)
+ assert_first_address addresses[0]
+ assert_second_address addresses[1]
+ end
+
+ def test_replace_first_address
+ string = AddressExtractor.replace_first_address(DATA1) do |address_hash, address|
+ assert_first_address address_hash
+ assert_first_address_string address
+ "skidoosh"
+ end
+ assert string =~ /Please send the package to skidoosh/
+ end
+
+ def test_replace_addresses
+ string = AddressExtractor.replace_addresses(DATA1) do |address_hash, address|
+ "skidoosh"
+ end
+ assert string =~ /Please send the package to skidoosh/
+ assert string =~ /via mail at:\n skidoosh/
+ end
+
+ module Helpers
+ def assert_first_address(a)
+ assert_not_nil a
+ assert_equal "123 Foo St.", a[:street1]
+ assert_equal nil, a[:street2]
+ assert_equal "Someplace", a[:city]
+ assert_equal "FL", a[:state]
+ assert_equal nil, a[:zip]
+ end
+
+ def assert_first_address_string(string)
+ assert_match /^123 Foo St\., Someplace FL\s*$/, string
+ end
+
+
+ def assert_second_address(a)
+ assert_not_nil a
+ assert_equal "123 Goob Avenue", a[:street1]
+ assert_equal "Apt 123", a[:street2]
+ assert_equal "Nice Town", a[:city]
+ assert_equal "CA", a[:state]
+ assert_equal "123456", a[:zip]
+ end
+ end
+ include Helpers
+end
+
+DATA1 = <<EOF
+Please send the package to 123 Foo St., Someplace FL
+
+My phone number is 123-1234 and St. Marc of Israel can be reached
+via mail at:
+ 123 Goob Avenue
+ Apt 123
+ Nice Town CA 123456
+EOF

0 comments on commit 1102c84

Please sign in to comment.