In [4]:
import lxml.etree as ET
from collections import defaultdict
import re

OSMFILE = "phoenix_arizona.osm" 
street_type_re = re.compile(r'\b\S+\.?$', re.IGNORECASE)

expected = ["Street", "Avenue", "Boulevard", "Drive", "Court", "Place", "Square", "Lane", "Road", "Way", "Trail", "Parkway", "Commons", "Circle", "Terrace", "Highway"]

In [5]:
def audit_street_type(street_types, street_name):
    m = street_type_re.search(street_name)
    if m:
        street_type = m.group()
        if street_type not in expected:
            street_types[street_type].add(street_name)

def audit_street(osmfile):
    osm_file = open(osmfile, "r")
    street_types = defaultdict(set)
    
    for event, elem in ET.iterparse(osm_file, events=("start",)):
        if elem.tag == "node" or elem.tag == "way":
            for tag in elem.iter("tag"):
                # if the tag is a street
                if tag.attrib['k'] == "addr:street":
                    audit_street_type(street_types, tag.attrib['v'])
    osm_file.close()
    return street_types

In [6]:
audit_street(OSMFILE)

defaultdict(set,
            {'1': {'S Yavapai Rd #1'},
             '100': {'East Elwood Street Suite 100'},
             '101': {'East Baseline Road Suite 101',
              'N 27th Ave Ste 101',
              'N John Wayne Parkway #101'},
             '102': {'North 93rd Avenue #102',
              'North Power Road #102',
              'W Lake Pleasant Pkwy # 102'},
             '103': {'E Greenway Pkwy #103'},
             '104': {'E Valley Auto Dr #104', 'S Signal Butte Rd #104'},
             '105': {'E Ocotillo Rd Bldg 105',
              'N Scottsdale Rd #105',
              'North 90th Street #105'},
             '110': {'E 5th Ave #110', 'East Doubletree Ranch Road #110'},
             '111': {'N Gilbert Rd #111'},
             '114-225': {'South Higley Road, Suite 114-225'},
             '117': {'N 51st Ave #117'},
             '119': {'N. Gilbert Road, #119'},
             '121': {'3514 N. Power Road, Ste. 121'},
             '122': {'E Baseline Rd Ste 122'},
            

In [7]:
mapping = { "St": "Street",
            "St.": "Street",
            "street": "Street",
            "Ave": "Avenue",
            "Ave.": "Avenue",
            "Blvd": "Boulevard",
            "Blvd.": "Boulevard",
            "Boulavard": "Boulevard",
            "Rd": "Road",
            "Rd.": "Road",
            "RD": "Road",
            "Pl": "Place",
            "Pl.": "Place",
            "PKWY": "Parkway",
            "Pkwy": "Parkway",
            "Ln": "Lane",
            "Ln.": "Lane",
            "Dr": "Drive",
            "Dr.": "Drive"
            }

def fix_street(osmfile):
    st_types = audit_street(osmfile)
    for st_type, ways in st_types.iteritems():
        for name in ways:
            if st_type in mapping:
                better_name = name.replace(st_type, mapping[st_type])
                print name, "=>", better_name

In [8]:
fix_street(OSMFILE)

E. Brown RD => E. Brown Road
East Rio Salado Pkwy => East Rio Salado Parkway
West Happy Valley Rd => West Happy Valley Road
W Broadway Rd => W Broadway Road
W Elliot Rd => W Elliot Road
East Williamsfield Rd => East Williamsfield Road
W. Indian School Rd => W. Indian School Road
E. Elliot Rd => E. Elliot Road
South MIller Rd => South MIller Road
N Fort McDowell Rd => N Fort McDowell Road
N Hayden Rd => N Hayden Road
S Coronado Rd => S Coronado Road
S Watson Rd => S Watson Road
E Warner Rd => E Warner Road
N Scottsdale Rd => N Scottsdale Road
N Maricopa Rd => N Maricopa Road
E Baseline Rd => E Baseline Road
E Camelback Rd => E Camelback Road
South Wall street => South Wall Street
East Chandler Heights Rd. => East Chandler Heights Road
5810 Alameda Rd. => 5810 Alameda Road
N. Cave Creek Rd. => N. Cave Creek Road
West Happy Vally Rd. => West Happy Vally Road
E Havasu Pl => E Havasu Place
West Morelos Pl => West Morelos Place
E. Devonshire Ave. => E. Devonshire Avenue
N. 120th St. => N. 12

In [9]:
def audit_city(filename):
    osm_file = open(filename, "r")  
    city_list = set()
    for event, elem in ET.iterparse(osm_file, events=("start",)):
        if elem.tag == "node" or elem.tag == "way":
            for tag in elem.iter("tag"):
                if tag.attrib['k'] == "addr:city" and tag.attrib['v'] != "Phoenix":
                    city_list.add(tag.attrib['v'])
    return city_list

In [12]:
other_city = audit_city(OSMFILE)
print len(other_city)
print other_city

79
set(['El Mirage', 'Gila Bend', 'Coolidge', 'Richland', 'Maricopa', 'Phenix', 'Anthem', 'Fort McDowell', 'Guadalupe', 'Golbert', 'Carefree', 'Sacaton', 'casa Grande', 'Morristown', 'Glemdale', 'tEMPE', '2036 N. Gilbert Rd.', 'Paradise Valley, AZ', 'Surprise', 'Gilbert', 'Mayer', 'Goodyear', 'Sun City West', 'Laveen', 'mesa', 'Rio Verde', 'Phoenx', 'PHoenix', 'Tempe', 'Avondale', 'Luke AFB, Waddell', 'Laveen Village', 'Peoria', 'San Diego', 'Apache Junction', 'Scottsdale', 'Chandler', 'Queen Creek', 'Tonopah', 'Sun Lakes', 'SunLakes', 'Arlington', 'Vulture City', 'Mesa', 'sun City West', 'Desert Hills', 'Higley', 'Fountain Hills', 'Luke AFB', 'Maricopaaricopa', 'tempe', '25', 'Mobile', 'peoria', 'Wickenburg', 'Litchfield Park', 'scottsdale', 'Paradise Valley', 'Gold Canyon', 'SanTan Valley', 'Glendale', 'MEsa', 'Tollenson', 'Black Canyon City', 'Sun City', 'Tolleson', 'Cave Creek', 'San Tan Valley', 'Mesa, AZ', 'Rock Springs', 'Eloy', 'New River', 'Tohono Oodham', 'Buckeye', 'Casa Gra