Skip to content

Commit

Permalink
initial import
Browse files Browse the repository at this point in the history
  • Loading branch information
coderifous committed Nov 14, 2008
0 parents commit 1102c84
Show file tree
Hide file tree
Showing 6 changed files with 318 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
@@ -0,0 +1,3 @@
pkg
doc
Manifest
23 changes: 23 additions & 0 deletions LICENSE.textile
@@ -0,0 +1,23 @@
h4. Copyright and License

The MIT License

Copyright (c) 2008 Jim Garvin

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
45 changes: 45 additions & 0 deletions README.textile
@@ -0,0 +1,45 @@
h1. AddressExtractor

Find and/or replace mailing addresses in strings.

h2. Examples

<pre><code>
string = <<EOF
Please send the package to 123 Foo St., Someplace FL

My phone number is 123-1234 and St. Marc of Israel can be reached
via mail at:
123 Goob Avenue
Apt 123
Nice Town CA 123456
EOF

# Find first address
AddressExtractor.first_address(string) # => { :street1 => "123 Foo St.", :city => "Someplace", :state => "FL" }

# Find all addresses
AddressExtractor.find_addresses(string) # =>
# [
# { :street1 => "123 Foo St.", :city => "Someplace", :state => "FL" }
# { :street1 => "123 Goob Avenue.", :street2 => "Apt 123", :city => "Nice Town", :state => "CA", :zip => "123456" }
# ]

# Do a gsub on first address
new_string = AddressExtractor.replace_first_address(string) do |address_hash, address_string|
map_link_to(address_string)
end

# Do a gsub on all addresses
new_string = AddressExtractor.replace_addresses(string) do |address_hash, address_string|
map_link_to(address_string)
end
</code></pre>

h3. About

Written by Jim Garvin at RubyConf '08 at the request of Chris Murphy and Ryan McGeary so they could use it in Yarp.com.

You can use it, too.

The address-finding regex may be a bit naive, I'll gladly accept pull requests that add to the test data and tests.
14 changes: 14 additions & 0 deletions Rakefile
@@ -0,0 +1,14 @@
require 'rubygems'
require 'rake'
require 'echoe'

Echoe.new('address_extractor', '0.1.0') do |p|
p.description = "Give it text. It finds addresses in it."
p.url = "http://github.com/coderifous/address_extractor"
p.author = "Jim Garvin"
p.email = "jim at thegarvin dot com"
p.ignore_pattern = ["tmp/*", "script/*"]
p.development_dependencies = []
end

Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
162 changes: 162 additions & 0 deletions lib/address_extractor.rb
@@ -0,0 +1,162 @@
class AddressExtractor
class << self

def first_address(string)
hashify_results string.scan(ADDRESS_PATTERN).first
end

def find_addresses(string)
string.scan(ADDRESS_PATTERN).collect { |a| hashify_results(a) }.compact
end

def replace_first_address(string)
hash = first_address(string)
string.sub(ADDRESS_PATTERN) do |match|
yield(hash, $&)
end
end

def replace_addresses(string)
string.gsub(ADDRESS_PATTERN) do |match|
hash = hashify_results match.scan(ADDRESS_PATTERN).first
useful_address?(hash) ? yield(hash, $&) : match
end
end

def hashify_results(matches)
result = { }
capture_index = 0
CAPTURE_MAP.each do |field|
result[field] = matches[capture_index].to_s.chomp if matches[capture_index]
capture_index += 1
end
useful_address?(result) ? result : nil
end

def useful_address?(hash)
hash &&
hash[:street1] && ( hash[:zip] || hash[:city] && hash[:state] )
end

end

CAPTURE_MAP = [ :street1, :street2, :city, :state, :zip, :zip ]

STATES = <<-EOF
ALABAMA AL
ALASKA AK
AMERICAN SAMOA AS
ARIZONA AZ
ARKANSAS AR
CALIFORNIA CA
COLORADO CO
CONNECTICUT CT
DELAWARE DE
DISTRICT OF COLUMBIA DC
FEDERATED STATES OF MICRONESIA FM
FLORIDA FL
GEORGIA GA
GUAM GU
HAWAII HI
IDAHO ID
ILLINOIS IL
INDIANA IN
IOWA IA
KANSAS KS
KENTUCKY KY
LOUISIANA LA
MAINE ME
MARSHALL ISLANDS MH
MARYLAND MD
MASSACHUSETTS MA
MICHIGAN MI
MINNESOTA MN
MISSISSIPPI MS
MISSOURI MO
MONTANA MT
NEBRASKA NE
NEVADA NV
NEW HAMPSHIRE NH
NEW JERSEY NJ
NEW MEXICO NM
NEW YORK NY
NORTH CAROLINA NC
NORTH DAKOTA ND
NORTHERN MARIANA ISLANDS MP
OHIO OH
OKLAHOMA OK
OREGON OR
PALAU PW
PENNSYLVANIA PA
PUERTO RICO PR
RHODE ISLAND RI
SOUTH CAROLINA SC
SOUTH DAKOTA SD
TENNESSEE TN
TEXAS TX
UTAH UT
VERMONT VT
VIRGIN ISLANDS VI
VIRGINIA VA
WASHINGTON WA
WEST VIRGINIA WV
WISCONSIN WI
WYOMING WY
EOF

STATE_REGEX = STATES.split(/\n/).collect{ |n| n.scan(/(\w.*\w)\s*([A-Z]{2})\s*$/) }.join("|")

SECONDARY_UNIT_DESIGNATORS = <<-EOF
APARTMENT APT
BASEMENT BSMT
BUILDING BLDG
DEPARTMENT DEPT
FLOOR FL
FRONT FRNT
HANGAR HNGR
LOBBY LBBY
LOT LOT
LOWER LOWR
OFFICE OFC
PENTHOUSE PH
PIER PIER
REAR REAR
ROOM RM
SIDE SIDE
SLIP SLIP
SPACE SPC
STOP STOP
SUITE STE
TRAILER TRLR
UNIT UNIT
UPPER UPPR
EOF

SECONDARY_UNIT_DESIGNATORS_REGEX = SECONDARY_UNIT_DESIGNATORS.split(/\n/).collect{ |n| n.scan(/(\w+)\s*(\w+)\s*$/) }.join("|")

ADDRESS_PATTERN = /
(
\d+ # A few numbers
\s+
(?:[A-Za-z'.-]+\s?){1,3} # Followed by a street name
)
\s* ,? \s*
(
(?:\d+\s+)? # a secondary unit, optionally
(?:#{SECONDARY_UNIT_DESIGNATORS_REGEX})
(?:\s+\d+)?
)?
\s* ,? \s* # a comma, optionally
(?:
(?:
((?:[A-Za-z]+\s?){1,3}) # city
\s+
\b(#{STATE_REGEX})\b # state
\s* ,? \s* # a comma, optionally
(\d{6})? # a zip code, optionally
)
| # or, instead of city and state
(\d{6})? # a lone zip code will do
)
/xi
end
71 changes: 71 additions & 0 deletions test/test_address_extractor.rb
@@ -0,0 +1,71 @@
$: << File.dirname(__FILE__)+"/../lib"

require 'test/unit'
require 'address_extractor.rb'

class AddressExtractorTest < Test::Unit::TestCase

def test_first_address_extraction
address = AddressExtractor.first_address(DATA1)
assert_first_address(address)
end

def test_find_addresses
addresses = AddressExtractor.find_addresses(DATA1)
assert_first_address addresses[0]
assert_second_address addresses[1]
end

def test_replace_first_address
string = AddressExtractor.replace_first_address(DATA1) do |address_hash, address|
assert_first_address address_hash
assert_first_address_string address
"skidoosh"
end
assert string =~ /Please send the package to skidoosh/
end

def test_replace_addresses
string = AddressExtractor.replace_addresses(DATA1) do |address_hash, address|
"skidoosh"
end
assert string =~ /Please send the package to skidoosh/
assert string =~ /via mail at:\n skidoosh/
end

module Helpers
def assert_first_address(a)
assert_not_nil a
assert_equal "123 Foo St.", a[:street1]
assert_equal nil, a[:street2]
assert_equal "Someplace", a[:city]
assert_equal "FL", a[:state]
assert_equal nil, a[:zip]
end

def assert_first_address_string(string)
assert_match /^123 Foo St\., Someplace FL\s*$/, string
end


def assert_second_address(a)
assert_not_nil a
assert_equal "123 Goob Avenue", a[:street1]
assert_equal "Apt 123", a[:street2]
assert_equal "Nice Town", a[:city]
assert_equal "CA", a[:state]
assert_equal "123456", a[:zip]
end
end
include Helpers
end

DATA1 = <<EOF
Please send the package to 123 Foo St., Someplace FL
My phone number is 123-1234 and St. Marc of Israel can be reached
via mail at:
123 Goob Avenue
Apt 123
Nice Town CA 123456
EOF

0 comments on commit 1102c84

Please sign in to comment.