In [4]:
import xml.etree.cElementTree as ET
import pprint

#audit way tags for unexpected city names in Austin map

osmfile = "sample.osm"

expected = ("Austin", "Pflugerville", "Round Rock")

def is_city_name(elem):
    return (elem.attrib['k'] =="addr:city")

def audit_city(osmfile):
     city_file = open(osmfile, "r")
     for event, elem in ET.iterparse(city_file, events=("start",)):
        if elem.tag == "node" or elem.tag == "way":
            for tag in elem.iter("tag"):
                if tag.attrib['k'] == 'addr:city':
                    city = tag.attrib['v'].strip()
                    if city not in expected: 
                        print city
                        
                         
     city_file.close()

audit_city("sample.osm")

#audit of sample file shows names which appear to be outside of what would be considered Austin (eg Buda > 60mi away)
#this would require edit specific entries; these can be ignore for analysis purposes in most cases with no material effect

Manchaca
Cedar Park, TX
Buda
Sunset Valley
Buda
Lakeway
Sunset Valley
Cedar Park


In [11]:
"""
Your task in this exercise has two steps:

- audit the OSMFILE and change the variable 'mapping' to reflect the changes needed to fix 
    the unexpected street types to the appropriate ones in the expected list.
    You have to add mappings only for the actual problems you find in this OSMFILE,
    not a generalized solution, since that may and will depend on the particular area you are auditing.
- write the update_name function, to actually fix the street name.
    The function takes a string with street name as an argument and should return the fixed name
    We have provided a simple test so that you see what exactly is expected
"""
import xml.etree.cElementTree as ET
from collections import defaultdict
import re
import pprint

OSMFILE = "austin.osm"
street_type_re = re.compile(r'\b\S+\.?$', re.IGNORECASE)


expected = ["Street", "Avenue", "Boulevard", "Drive", "Court", "Place", "Square", "Lane", "Road", 
            "Trail", "Parkway", "Commons"]

# UPDATE THIS VARIABLE
mapping = { "St": "Street",
            "St.": "Street",
            "Ave.": "Avenue",
            "Rd.": "Road"
            }


def audit_street_type(street_types, street_name):
    m = street_type_re.search(street_name)
    if m:
        street_type = m.group()
        if street_type not in expected:
            street_types[street_type].add(street_name)


def is_street_name(elem):
    return (elem.attrib['k'] == "addr:street")


def audit(osmfile):
    osm_file = open(osmfile, "r")
    street_types = defaultdict(set)
    for event, elem in ET.iterparse(osm_file, events=("start",)):

        if elem.tag == "node" or elem.tag == "way":
            for tag in elem.iter("tag"):
                if is_street_name(tag):
                    audit_street_type(street_types, tag.attrib['v'])
    osm_file.close()
    #pprint.pprint (dict(street_types))
    return street_types


def update_name(name, mapping):

    # YOUR CODE HERE
    m = street_type_re.search(name)
    if m:
        street_type = m.group()
        if street_type in mapping.keys():
            print 'Before: ', name
            name = re.sub(m.group(), mapping[m.group()], name)
            print 'After: ', name


    return name


def test():
    st_types = audit(OSMFILE)
    #assert len(st_types) == 3
    #pprint.pprint(dict(st_types))

    for st_type, ways in st_types.iteritems():
        for name in ways:
            better_name = update_name(name, mapping)
            #print name, "=>", better_name

#if __name__ == '__main__':
    #te
    
test()
    



Before:  Woodrow Ave.
After:  Woodrow Avenue
Before:  Pecan St.
After:  Pecan Street
Before:  E 38th 1/2 St.
After:  E 38th 1/2 Street
Before:  E. 43rd St.
After:  E. 43rd Street
Before:  Pecan St
After:  Pecan Street
Before:  Rio Grande St
After:  Rio Grande Street
Before:  S 1st St
After:  S 1st Street
Before:  E Oltorf St
After:  E Oltorf Street
Before:  E 43rd St
After:  E 43rd Street
Before:  Red River St
After:  Red River Street
Before:  W 10th St
After:  W 10th Street
Before:  W Annie St
After:  W Annie Street
Before:  E 51st St
After:  E 51st Street
Before:  W 6th St
After:  W 6th Street
Before:  West Lynn St
After:  West Lynn Street
Before:  Duval St
After:  Duval Street


In [1]:
import xml.etree.cElementTree as ET
from collections import defaultdict
import pprint
import re

osmfile = "sample.osm"

postcodes = defaultdict(set)

mapping = {'78724-1199' : '78724',
          'TX 78745' : '78745'
          }

def is_postcode(elem):
    return (elem.attrib['k'] == "addr:postcode")

def audit_postcode(postcodes, postcode):
    expected = (map(str,range(78610,78799)))
    if postcode not in expected:
        postcodes[postcode].add(postcode)
    return postcodes

def update_postcode(postcode):
    if postcode in mapping.keys():
            print 'Before: ', postcode
            postcode = mapping[postcode]
            print 'After: ', postcode
    return postcode



def audit(osmfile):
    osm_file = open(osmfile, "r")
    
    postcodes = defaultdict(set)
    #for i, elem in enumerate(get_element(osmfile)):
    for event, elem in ET.iterparse(osm_file, events=("start",)):
        if elem.tag == 'node' or elem.tag == 'way':
            for tag in elem.iter("tag"):
                if is_postcode(tag):
                    audit_postcode(postcodes, tag.attrib['v'])
    pprint.pprint(dict(postcodes))
    
    osm_file.close()
    
    return dict(postcodes)
    #pprint(dict(postcodes))

def test():
    flagged_postcodes = audit(osmfile)
    #print flagged_postcodes
    
    

    for key in flagged_postcodes:
            update_postcode(key)
            
           
    
test()


{'78724-1199': set(['78724-1199']), 'TX 78745': set(['TX 78745'])}
Before:  TX 78745
After:  78745
Before:  78724-1199
After:  78724
