Analyzing Walla Walla Open Street Data
Part 1: Counting Tags

In [32]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Your task is to use the iterative parsing to process the map file and
find out not only what tags are there, but also how many, to get the
feeling on how much of which data you can expect to have in the map.
Fill out the count_tags function. It should return a dictionary with the 
tag name as the key and number of times this tag can be encountered in 
the map as value.

Note that your code will be tested with a different data file than the 'example.osm'
"""

#Import statements
import xml.etree.cElementTree as ET
import pprint

#Create a dictionary
TagTypes = {}

#Count the number of each tag and store it in the dictionary
def count_tags(filename):
    for event, elem in ET.iterparse(filename):
        if elem.tag not in TagTypes.keys():
            TagTypes[elem.tag] = 1
        else:
            TagTypes[elem.tag] += 1

#This is the main function. We are going to count the tags in wallawalla.osm and print out the results.
def test():
    tags = count_tags('wallawalla.osm')
    pprint.pprint(TagTypes)
    
test()

{'bounds': 1,
 'member': 3244,
 'meta': 1,
 'nd': 120034,
 'node': 100407,
 'note': 1,
 'osm': 1,
 'relation': 60,
 'tag': 67070,
 'way': 15814}


Part Two: Auditing Street Types

In [46]:
"""
In this bit of code, we are going to be getting a list of the different street types present in the file. We will get
a list of the different street types as well as how many of each there are.
"""

#Import statements
import xml.etree.cElementTree as ET
from collections import defaultdict
import re
import pprint

#Here, we set the variable osm_file to open wallawalla.osm
osm_file = open("wallawalla.osm","r", encoding="utf8")

#Here, we set up our registry expression as well as our street_types dictionary, and our street_types_names dictionary.
#The first one will count the number of each street type, and the second one will save the street names in sets in that
#dictionary.
street_type_re = re.compile(r'\b\S+\.?$',re.IGNORECASE)
street_types = {}
street_type_names = defaultdict(set)

#This function will take a street name and cut off the last chunk in order to get the street type, and then
#set it as the key in a dictionary that points to the number of times that street type has occured.
def audit_street_type(street_name):
    m = street_type_re.search(street_name)
    if m:
        street_type = m.group()
        if street_type not in street_types.keys():
            street_types[street_type] = 1
        else:
            street_types[street_type] += 1

#This will sort the dictionary, and then print it
def print_sorted_dict(d):
    keys = d.keys()
    keys = sorted(keys, key=lambda s: s.lower())
    for k in keys:
        v = d[k]
        print("%s: %d" %(k,v))

#This function checks to see if an element is a street name
def is_street_name(elem):
    return (elem.tag == "tag") and (elem.attrib['k'] == "addr:street")

#This function will store the different street names in the correct location in the dictionary based on street types.
def store_street_names(street_name):
    m = street_type_re.search(street_name)
    if m:
        street_type = m.group()
        if street_type not in street_type_names.keys():
            street_type_names[street_type].add(street_name)
        else:
            street_type_names[street_type].add(street_name)

#This is one of our main functions we are going to test. It parses through the osm file and then,
#if it runs into a street name, it will audit the street type.
def audit(filename):
    for event, elem in ET.iterparse(filename):
        if is_street_name(elem):
            audit_street_type(elem.attrib['v'])

#This is our second main function, if we choose to use it. It will store the street names instead of counting them.
def audit2(filename):
    for event, elem in ET.iterparse(filename):
        if is_street_name(elem):
            store_street_names(elem.attrib['v'])

audit("wallawalla.osm")

print(street_types)

{'Avenue': 1719, 'Street': 5936, 'Way': 102, 'ave': 1, 'St': 1, 'Isaacs': 1, 'Ave': 4, 'Drive': 1715, 'Boulevard': 1, 'Road': 186, 'Place': 353, 'Circle': 32, 'Lane': 391, 'Court': 141, 'Terrace': 69, 'Run': 21, 'Loop': 101, 'Pl': 7, 'Steet': 22}


In [None]:
Part Three: Fixing Street Names

In [90]:
"""
This program is going to suggest better street names for us to use where there are problems.
"""

#Import statements
import xml.etree.cElementTree as ET
from collections import defaultdict
import re
import pprint

#Define the OSM file and street_type_re re
OSMFILE = "wallawalla.osm"
street_type_re = re.compile(r'\b\S+\.?$', re.IGNORECASE)
street_name_re = re.compile(r'^(.*(?!(\S*$)))', re.IGNORECASE)

#Use the expected array in order to determine street types to exclude from the analysis
expected = []

#Use the mapping dictionary in order to suggest better names for streets that have incorrect names.
mapping = { "Ave": "Avenue",
            "Pl": "Place",
            "St": "Street",
            "Steet": "Street",
            "ave": "Avenue"
            }

#This function will audit the osm file and call the audit_street_type function if the element is a street type.
def audit(osmfile):
    osm_file = open(osmfile, "rb")
    street_types = defaultdict(set)
    for event, elem in ET.iterparse(osmfile, events=("start",)):
        if elem.tag == "node" or elem.tag == "way":
            for tag in elem.iter("tag"):
                if is_street_name(tag):
                    audit_street_type(street_types, tag.attrib['v'])
    osm_file.close()
    return street_types

#This function will add street names to a dictionary in the correct set according to street type.
def audit_street_type(street_types, street_name):
    m = street_type_re.search(street_name)
    if m:
        street_type = m.group()
        if street_type not in expected:
            street_types[street_type].add(street_name)
            
#This function checks to see if an element is a street.     
def is_street_name(elem):
    return (elem.attrib['k'] == "addr:street")

#This function will update the street name if necessary to the correct one.
def update_name(name, mapping):
    m = street_type_re.search(name)
    o = street_name_re.search(name)
    if m:
        street_type = m.group()
        if street_type in mapping.keys():
            name = mapping[street_type]
            if o:
                firstname = o.group()
                
    return firstname + " " + name

#This is our first main function, which is going to create and print out a dictionary of street names
#according to street type. It will return the dictionary.
def part1():
    st_types = audit(OSMFILE)
    pprint.pprint(dict(st_types))
    return st_types

#This is our second main function, which is going to discover the street names that are incorrect and then
#suggest new names for them.

def part2(street_types):
    for street_type, names in street_types.items():
            m = street_type_re.search(street_type)
            if m:
                street_type = m.group()
            
                if street_type in mapping.keys():
                    for name1 in names:
                        print (street_type)
                        better_name = update_name(name1, mapping)
                        print (name1, "=>", better_name)

streat_types = part1()
part2(streat_types)

{'Ave': {'S College Ave'},
 'Avenue': {'Bel Air Avenue',
            'Boyer Avenue',
            'California Avenue',
            'Central Avenue',
            'Chapelwood Avenue',
            'Criscola Avenue',
            'Dawning Avenue',
            'Delaware Avenue',
            'East Isaacs Avenue',
            'East Rees Avenue',
            'Electric Avenue',
            'Fleetwood Avenue',
            'Francis Avenue',
            'Maple Avenue',
            'North 13th Avenue',
            'North 4th Avenue',
            'North College Avenue',
            'North Wilbur Avenue',
            'Northeast Ash Avenue',
            'Northeast Birch Avenue',
            'Northeast Cargill Avenue',
            'Northeast Cedar Avenue',
            'Northeast Damson Avenue',
            'Northeast Della Avenue',
            'Northeast Maple Avenue',
            'Northeast Spitzenburg Avenue',
            'Northwest Evans Avenue',
            'Pacific Avenue',
            'Penrose Aven