Analyzing Walla Walla Open Street Data
Part 1: Counting Tags

In [32]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Your task is to use the iterative parsing to process the map file and
find out not only what tags are there, but also how many, to get the
feeling on how much of which data you can expect to have in the map.
Fill out the count_tags function. It should return a dictionary with the 
tag name as the key and number of times this tag can be encountered in 
the map as value.

Note that your code will be tested with a different data file than the 'example.osm'
"""
import xml.etree.cElementTree as ET
import pprint

TagTypes = {}

def count_tags(filename):
    for event, elem in ET.iterparse(filename):
        if elem.tag not in TagTypes.keys():
            TagTypes[elem.tag] = 1
        else:
            TagTypes[elem.tag] += 1

def test():
    tags = count_tags('wallawalla.osm')
    pprint.pprint(TagTypes)
    
test()

{'bounds': 1,
 'member': 3244,
 'meta': 1,
 'nd': 120034,
 'node': 100407,
 'note': 1,
 'osm': 1,
 'relation': 60,
 'tag': 67070,
 'way': 15814}


Part Two: Auditing Street Types

In [40]:
import xml.etree.cElementTree as ET
from collections import defaultdict
import re
import pprint

osm_file = open("wallawalla.osm","r", encoding="utf8")

street_type_re = re.compile(r'\b\S+\.?$',re.IGNORECASE)
street_types = {}

def audit_street_type(street_name):
    m = street_type_re.search(street_name)
    if m:
        street_type = m.group()
        if street_type not in street_types.keys():
            street_types[street_type] = 1
        else:
            street_types[street_type] += 1
        
def print_sorted_dict(d):
    keys = d.keys()
    keys = sorted(keys, key=lambda s: s.lower())
    for k in keys:
        v = d[k]
        print("%s: %d" %(k,v))
        
def is_street_name(elem):
    return (elem.tag == "tag") and (elem.attrib['k'] == "addr:street")

def store_street_names(street_name):
    m = street_type_re.search(street_name)
    if m:
        street_type = m.group()
        if street_type not in street_types.keys():
            street_types[street_type] = street_name
        else:
            street_types[street_type] += street_name

def audit(filename):
    for event, elem in ET.iterparse(filename):
        if is_street_name(elem):
            audit_street_type(elem.attrib['v'])
            
def audit2(filename):
    for event, elem in ET.iterparse(filename):
        if is_street_name(elem):
            store_street_names(elem.attrib['v'])

def audit3(filename):
    for event, elem in ET.iterparse(filename):
        if is_street_name(elem):
            print (elem)

audit("wallawalla.osm")

print(street_types)

{'Avenue': 1719, 'Street': 5936, 'Way': 102, 'ave': 1, 'St': 1, 'Isaacs': 1, 'Ave': 4, 'Drive': 1715, 'Boulevard': 1, 'Road': 186, 'Place': 353, 'Circle': 32, 'Lane': 391, 'Court': 141, 'Terrace': 69, 'Run': 21, 'Loop': 101, 'Pl': 7, 'Steet': 22}


In [None]:
Part Three: Fixing Street Names

In [44]:
import xml.etree.cElementTree as ET
from collections import defaultdict
import re
import pprint

#Define the OSM file and street_type_re re
OSMFILE = "wallawalla.osm"
street_type_re = re.compile(r'\b\S+\.?$', re.IGNORECASE)

#Use the expected array in order to determine street types to exclude from the analysis
expected = []

#Use the mapping dictionary in order to 
mapping = { "Ave": "Avenue",
            "Pl": "Place",
            "St": "Street",
            "Steet": "Street",
            "ave": "Avenue"
            }

def audit(osmfile):
    osm_file = open(osmfile, "rb")
    street_types = defaultdict(set)
    for event, elem in ET.iterparse(osmfile, events=("start",)):
        if elem.tag == "node" or elem.tag == "way":
            for tag in elem.iter("tag"):
                if is_street_name(tag):
                    audit_street_type(street_types, tag.attrib['v'])
    osmfile.close()
    return street_types

def audit_street_type(street_types, street_name):
    m = street_type_re.search(street_name)
    if m:
        street_type = m.group()
        if street_type not in expected:
            #I need some code here...
            
            
def is_street_name(elem):
    return (elem.attrib['k'] == "addr:street")

def update_name(name, mapping):
    m = street_type_re.search(name)
    if m:
        street_type = m.group()
        if street_type in mapping.keys():
            name = mapping[street_type]
    return name

def part1():
    st_types = audit(OSMFILE)
    pprint.pprint(dict(st_types))

def part2():
    for street_type, ways in street_types.items():
        for name in ways:
            m = street_type_re.search(name)
            if m:
                street_type = m.group()
            if street_type in mapping.keys():
                better_name = update_name(name, mapping)
                print (name, "=>", better_name)
                
audit("wallawalla.osm")

IndentationError: expected an indented block (<ipython-input-44-46c1358af2e0>, line 38)

In [43]:
part2()

NameError: name 'st_types' is not defined

In [37]:
part2()

NameError: name 'st_types' is not defined