# Import libraries

In [1]:
import csv
import codecs
import pprint
import re
from collections import defaultdict
from collections import Counter
import xml.etree.cElementTree as ET
import cerberus
import schema
import sqlite3

#FILENAME = "example.osm" #real file is OREM.OSM

# Make a sample file (example.osm)

In [3]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import xml.etree.ElementTree as ET  # Use cElementTree or lxml if too slow

OSM_FILE = "OREM.osm"  # Replace this with your osm file
SAMPLE_FILE = "example.osm"

k = 10 # Parameter: take every k-th top level element

def get_element(osm_file, tags=('node', 'way', 'relation')):
    """Yield element if it is the right type of tag

    Reference:
    http://stackoverflow.com/questions/3095434/inserting-newlines-in-xml-file-generated-via-xml-etree-elementtree-in-python
    """
    context = iter(ET.iterparse(osm_file, events=('start', 'end')))
    _, root = next(context)
    for event, elem in context:
        if event == 'end' and elem.tag in tags:
            yield elem
            root.clear()


with open(SAMPLE_FILE, 'wb') as output:
    output.write('<?xml version="1.0" encoding="UTF-8"?>\n')
    output.write('<osm>\n  ')

    # Write every kth top level element
    for i, element in enumerate(get_element(OSM_FILE)):
        if i % k == 0:
            output.write(ET.tostring(element, encoding='utf-8'))

    output.write('</osm>')


# mapparser.py (count_tags function) from lesson

In [11]:
import xml.etree.cElementTree as ET
import pprint

def count_tags(filename):  #(filename)
    my_tags = {}
    for event, child in ET.iterparse(filename):
        tag = child.tag
        if tag not in my_tags.keys():
            my_tags[tag] =1
        else:
            my_tags[tag] +=1
    return my_tags

count_tags('example.osm')

In [12]:
count_tags('example.osm')

{'bounds': 1,
 'member': 10984,
 'meta': 1,
 'nd': 449612,
 'node': 367494,
 'note': 1,
 'osm': 1,
 'relation': 513,
 'tag': 195187,
 'way': 45162}

# Audit State & Clean State

In [13]:
import xml.etree.cElementTree as ET
from collections import defaultdict
import re
import pprint

#Reference: https://www.tutorialspoint.com/counters-in-python

def audit_states(osmfile):
    state_list =[]
    osm_file = open(osmfile, "r")
    for event,element in ET.iterparse(osmfile):      
        for elem in element.iter("tag"):
            if elem.attrib['k'] == "addr:state":
                state_list.append(elem.attrib['v'])
    pprint.pprint(Counter(state_list))
    osm_file.close()


In [14]:
audit_states('example.osm')

Counter({'UT': 15519, 'Utah': 2229, 'Ut': 105, 'ut': 6})


Update/Clean up states

In [15]:
state_re = re.compile(r'\b\S+\.?$', re.IGNORECASE)

expected_states = ["UT"]

mapping_states = { "Utah": "UT", "Ut": "UT", "ut": "UT"}

def update_state(name, mapping_states):
    m = state_re.search(name)
    if m.group() in mapping_states.keys():
        name = name[:len(name)-len(m.group())] + mapping_states[m.group()]
    return name
    
def update_state_name(state_name, mapping_states):
    m = state_re.search(state_name)
    if m:
        state = m.group()
        if state not in expected_states:
            print state
            state_name = update_name(state_name, mapping_states)
            print state_name
    return state_name

# audit & update - street names

In [16]:
#import xml.etree.cElementTree as ET
#from collections import defaultdict
#import re
#import pprint

#osmfile = "example.osm"
street_type_re = re.compile(r'\b\S+\.?$', re.IGNORECASE)


expected = ["Street", "Avenue", "Boulevard", "Drive", "Court", "Place", "Square", "Lane", "Road", 
            "Trail", "Parkway", "Commons"]

# UPDATE THIS VARIABLE
mapping = { "St": "Street", "St.": "Street", "street": "Street",
            "Ave": "Avenue", 
            "Rd": "Road", "Rd.": "Road",
            "Dr": "Drive", "DrIve": "Drive",
            "Pkwy": "Parkway",
            "lane": "Lane",
            "Grove)": "Grove",
            "N.": "North", "N": "North",
            "E": "East",
            "S": "South",
            "W": "West"
          }

def audit_street_type(street_types, street_name):
    m = street_type_re.search(street_name)
    if m:
        street_type = m.group()
        if street_type not in expected:
            street_types[street_type].add(street_name)

def audit_street_type2(street_types, street_name):
    m = street_type_re.search(street_name)
    if m:
        street_type = m.group()
        street_types[street_type] += 1
            
def is_street_name(elem):
    return (elem.attrib['k'] == "addr:street")

def is_street_name2(elem):
    return (elem.tag == "tag") and (elem.attrib['k'] == "addr:street")

def print_sorted_dict(d):
    keys = d.keys()
    keys = sorted(keys, key=lambda s: s.lower())
    for k in keys:
        v = d[k]
        print("%s: %d" % (k, v))
    
def audit(osmfile):    #All street shown in a dictionary (to see all values)
    osm_file = open(osmfile, "r")
    street_types = defaultdict(set)
    for event, elem in ET.iterparse(osm_file):
        if elem.tag == "node" or elem.tag == "way":
            for tag in elem.iter("tag"):
                if is_street_name(tag):
                    audit_street_type(street_types, tag.attrib['v'])             
    osm_file.close()
    return street_types

def audit2(osmfile):    #The different street types and how often they appear in the data (to see summary)
    osm_file = open(osmfile, "r")
    street_types = defaultdict(int)
    for event, elem in ET.iterparse(osm_file):
        if is_street_name2(elem):
            audit_street_type2(street_types, elem.attrib['v'])    
    print_sorted_dict(street_types)    
    osm_file.close()

#if __name__ == '__main__':
#    audit('example.osm')

Check how many street types we are working with, and run the audits (full detail & Summary level).

In [17]:
len(audit("example.osm"))

35

In [18]:
audit('example.osm')   #Full detail

defaultdict(set,
            {'200': {'South University Avenue Suite 200'},
             '301': {'North University Avenue #301'},
             '319': {'S Sleepy Ridge Drive Suite 319'},
             'Ave': {'N University Ave'},
             'Borda': {'Rue Borda'},
             'Center': {'Center'},
             'Circle': {'California Circle',
              'Chardonnay Circle',
              'Glendon Circle',
              'Lindon Park Circle',
              'North 3830 West Circle',
              'North 3900 West Circle',
              'North Forest Lake Circle',
              'North Landon Circle',
              'North Timothy Circle',
              'North Verona Circle',
              'North Vintage Circle',
              'Parkview Circle',
              'Tuscany Circle',
              'Verona Circle',
              'Washington Circle',
              'West Tibble Fork Circle'},
             'Cournot': {'Rue Cournot'},
             'Cove': {'North Cottage Cove',
              'North E

In [19]:
audit2('example.osm')   #Summary detail

200: 1
301: 1
319: 1
Ave: 1
Avenue: 90
Borda: 8
Boulevard: 39
Center: 1
Circle: 95
Cournot: 23
Court: 42
Cove: 23
Dr: 4
Drive: 595
DrIve: 18
E: 6
East: 951
Grove): 5
Halls: 1
Hugo: 19
Lane: 371
lane: 4
Loop: 19
Matth: 44
N: 18
N.: 1
North: 1267
Paris: 25
Parkway: 11
Pkwy: 2
Place: 24
Rd: 1
Rd.: 1
Road: 335
S: 1
South: 560
St: 5
St.: 1
Store: 1
street: 4
Street: 134
W: 3
Way: 192
West: 1150


Update/Clean for street type

In [20]:
def update_name(name, mapping):
    m = street_type_re.search(name)
    if m.group() in mapping.keys():
        name = name[:len(name)-len(m.group())] + mapping[m.group()]
    return name
    
def update_street_type(street_name, mapping):
    m = street_type_re.search(street_name)
    if m:
        street_type = m.group()
        if street_type not in expected:
            print street_type
            street_name = update_name(street_name, mapping)
            print street_name
    return street_name


# Explore Users

In [26]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#import xml.etree.cElementTree as ET
#import pprint
#import re

def get_user(element):
    return 
#    return element.get('user')

def process_map_users(filename):
    users = set()
    for _, element in ET.iterparse(filename):
        for x in element:
            if "user" in x.attrib:
                users.add(x.attrib["user"])
    return users

def find_users(filename):

    users = process_map_users(filename)
    pprint.pprint(len(users))
    pprint.pprint(users)
#    assert len(users) == 6


In [22]:
find_users('example.osm')

930
set(['1000hikes',
     '25or6to4',
     '32vJohnny',
     '3xodus',
     '7Evan007',
     'AFDave',
     'AFGIS',
     'AMY-jin',
     'Adam Boyle',
     'Adam Schneider',
     'Adamant1',
     'Airman99',
     'Alan Trick',
     'Aleks-Berlin',
     'Alex Stringham',
     'Alex-a',
     'Alisa Cherkashina',
     'Ammonj22',
     'Andre68',
     'Andrew Mac',
     'Arkinats',
     'ArminGh',
     'Arnold Wright',
     'Ashley Robinson',
     'Austin Wilcox',
     'Azrael Campbell',
     'BCNorwich',
     'Baloo Uriza',
     'Ben Harper 21',
     u'Beno\xeet Prieur',
     'Beyond Police',
     'BradleyGT',
     'Brandify Tran',
     'BreAnna H',
     'BreckT',
     'BrendanD015',
     'Brian Reavis',
     'Brian@Brea',
     'Bryce C Nesbitt',
     'BugBuster',
     'Camron Buhler',
     'Canyonsrcool',
     'CarniLvr79',
     'Cato_d_Ae',
     'CedricDV',
     'Cerritus',
     'Charles Murray',
     'Cheeto',
     'Chetan_Gowda',
     'Chetsup',
     'Chris Bell in California',
    

find_user results = 930

# tags.py (process_map)

In [23]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#import xml.etree.cElementTree as ET
#import pprint
#import re
"""
Your task is to explore the data a bit more.
Before you process the data and add it into your database, you should check the
"k" value for each "<tag>" and see if there are any potential problems.

We have provided you with 3 regular expressions to check for certain patterns
in the tags. As we saw in the quiz earlier, we would like to change the data
model and expand the "addr:street" type of keys to a dictionary like this:
{"address": {"street": "Some value"}}
So, we have to see if we have such tags, and if we have any tags with
problematic characters.

Please complete the function 'key_type', such that we have a count of each of
four tag categories in a dictionary:
  "lower", for tags that contain only lowercase letters and are valid,
  "lower_colon", for otherwise valid tags with a colon in their names,
  "problemchars", for tags with problematic characters, and
  "other", for other tags that do not fall into the other three categories.
See the 'process_map_tags' and 'test' functions for examples of the expected format.
"""

lower = re.compile(r'^([a-z]|_)*$')
lower_colon = re.compile(r'^([a-z]|_)*:([a-z]|_)*$')
problemchars = re.compile(r'[=\+/&<>;\'"\?%#$@\,\. \t\r\n]')


def key_type(element, keys):
    if element.tag == "tag":
        for tag in element.iter('tag'):
            k = tag.get('k')
            if lower.search(k):
                keys['lower'] +=1
            elif lower_colon.search(k):
                keys['lower_colon'] +=1
            elif problemchars.search(k):
                keys['problemchars'] +=1
            else:
                keys['other'] +=1
    return keys



def process_map_tags(filename):
    keys = {"lower": 0, "lower_colon": 0, "problemchars": 0, "other": 0}
    for _, element in ET.iterparse(filename):
        keys = key_type(element, keys)

    return keys


def test(filename):
    # You can use another testfile 'map.osm' to look at your solution
    # Note that the assertion below will be incorrect then.
    # Note as well that the test function here is only used in the Test Run;
    # when you submit, your code will be checked against a different dataset.
    keys = process_map_tags('example.osm')
    pprint.pprint(keys)
    assert keys == {'lower': 5, 'lower_colon': 0, 'other': 1, 'problemchars': 1}


#if __name__ == "__main__":
#    test()

In [24]:
process_map_tags('example.osm')

{'lower': 106992, 'lower_colon': 68484, 'other': 19711, 'problemchars': 0}

# data.py  (shape_element function, etc...)

In [275]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
After auditing is complete the next step is to prepare the data to be inserted into a SQL database.
To do so you will parse the elements in the OSM XML file, transforming them from document format to
tabular format, thus making it possible to write to .csv files.  These csv files can then easily be
imported to a SQL database as tables.

The process for this transformation is as follows:
- Use iterparse to iteratively step through each top level element in the XML
- Shape each element into several data structures using a custom function
- Utilize a schema and validation library to ensure the transformed data is in the correct format
- Write each data structure to the appropriate .csv files

We've already provided the code needed to load the data, perform iterative parsing and write the
output to csv files. Your task is to complete the shape_element function that will transform each
element into the correct format. To make this process easier we've already defined a schema (see
the schema.py file in the last code tab) for the .csv files and the eventual tables. Using the 
cerberus library we can validate the output against this schema to ensure it is correct.
"""
#ALREADY IMPORTED THESE AT THE TOP, LEAVING THESE IN JUST IN CASE WE RUN THIS SEPARATE.
import csv
import codecs
import pprint
import re
import xml.etree.cElementTree as ET
import cerberus
import schema

#OSM_PATH = "OREM.osm"   #real file
OSM_PATH = "example.osm" #sample file 

NODES_PATH = "nodes.csv"
NODE_TAGS_PATH = "nodes_tags.csv"
WAYS_PATH = "ways.csv"
WAY_NODES_PATH = "ways_nodes.csv"
WAY_TAGS_PATH = "ways_tags.csv"

LOWER_COLON = re.compile(r'^([a-z]|_)+:([a-z]|_)+')
PROBLEMCHARS = re.compile(r'[=\+/&<>;\'"\?%#$@\,\. \t\r\n]')

SCHEMA = schema.schema

# Make sure the fields order in the csvs matches the column order in the sql table schema
NODE_FIELDS = ['id', 'lat', 'lon', 'user', 'uid', 'version', 'changeset', 'timestamp']
NODE_TAGS_FIELDS = ['id', 'key', 'value', 'type']
WAY_FIELDS = ['id', 'user', 'uid', 'version', 'changeset', 'timestamp']
WAY_TAGS_FIELDS = ['id', 'key', 'value', 'type']
WAY_NODES_FIELDS = ['id', 'node_id', 'position']


def shape_element(element, node_attr_fields=NODE_FIELDS, way_attr_fields=WAY_FIELDS,
                  problem_chars=PROBLEMCHARS, default_tag_type='regular'):
    """Clean and shape node or way XML element to Python dict"""

    node_attribs = {}
    way_attribs = {}
    way_nodes = []
    tags = []  # Handle secondary tags the same way for both node and way elements

    # YOUR CODE HERE

    #node field, loop node field header looking for key properties which will be placed in the node attribs dictionary.
    if element.tag == 'node':
        for x in NODE_FIELDS:
            node_attribs[x] = element.attrib[x]
    #looping through the child to the tag element values. 
        for child in element:
            ca_k = child.attrib["k"]
            ca_v = child.attrib["v"]
            ea_id = element.attrib["id"]
            n_tags = {} 
#another way - per google sheet
#            if PROBLEMCHARS.match[ca_k]):
            if re.match(PROBLEMCHARS,ca_k):   
                continue 
            elif re.match(LOWER_COLON,ca_k):  
                n_tags["id"] = ea_id
                n_tags["key"] = ca_k.split(":",1)[1]

# next 6 lines are the cleaning function
                if ca_k == "addr:street":       #use cleaning function
                    n_tags["value"] = update_street_type(ca_v, mapping)
                elif ca_k == "addr:state":
                    n_tags["value"] = update_state_name(ca_v, mapping_states)  
                else:  #otherwise process as normal
                    n_tags["value"] = ca_v 
                n_tags["type"] = ca_k.split(":",1)[0]
                tags.append(n_tags)
            else:
                n_tags["id"] = ea_id
                n_tags["key"] = ca_k
                n_tags["value"] = ca_v
                n_tags["type"] = 'regular'
                tags.append(n_tags)

    elif element.tag == 'way':
        for x in WAY_FIELDS:
            way_attribs[x] = element.attrib[x]
        w_position = 0 
        for child in element:
            nd_w_nodes = {} 
            w_tags = {}  
            ea_id = element.attrib["id"]
            if child.tag == 'nd':
                nd_w_nodes["id"] = ea_id
                nd_w_nodes["node_id"] = child.attrib["ref"]
                nd_w_nodes["position"] = w_position
                w_position = w_position+1
                way_nodes.append(nd_w_nodes)
            elif child.tag == 'tag':
                ca_k = child.attrib["k"]
                ca_v = child.attrib["v"]
                if re.match(PROBLEMCHARS,ca_k):
                    continue  #do nothing!
                elif re.match(LOWER_COLON,ca_k):
                    w_tags["id"] = ea_id
                    w_tags["key"] = ca_k.split(":",1)[1]
# next lines are my cleaning function
                    if ca_k == "addr:street":       #use cleaning function
                        w_tags["value"] = update_street_type(ca_v, mapping)
                    elif ca_k == "addr:state":
                        w_tags["value"] = update_state_name(ca_v, mapping_states)  
                    else:  #otherwise process as normal
                        w_tags["value"] = ca_v
                    w_tags["type"] = ca_k.split(":",1)[0]
                    tags.append(w_tags)
                else:
                    w_tags["id"] = ea_id
                    w_tags["key"] = ca_k
                    w_tags["value"] = ca_v
                    w_tags["type"] = 'regular'
                    tags.append(w_tags)

    if element.tag == 'node':
        return {'node': node_attribs, 'node_tags': tags}
    elif element.tag == 'way':
        return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}


# ================================================== #
#               Helper Functions                     #
# ================================================== #
def get_element(osm_file, tags=('node', 'way', 'relation')):
    """Yield element if it is the right type of tag"""

    context = ET.iterparse(osm_file, events=('start', 'end'))
    _, root = next(context)
    for event, elem in context:
        if event == 'end' and elem.tag in tags:
            yield elem
            root.clear()


def validate_element(element, validator, schema=SCHEMA):
    """Raise ValidationError if element does not match schema"""
    if validator.validate(element, schema) is not True:
        field, errors = next(validator.errors.iteritems())
        message_string = "\nElement of type '{0}' has the following errors:\n{1}"
        error_string = pprint.pformat(errors)
        
        raise Exception(message_string.format(field, error_string))


class UnicodeDictWriter(csv.DictWriter, object):
    """Extend csv.DictWriter to handle Unicode input"""

    def writerow(self, row):
        super(UnicodeDictWriter, self).writerow({
            k: (v.encode('utf-8') if isinstance(v, unicode) else v) for k, v in row.iteritems()
        })

    def writerows(self, rows):
        for row in rows:
            self.writerow(row)


# ================================================== #
#               Main Function                        #
# ================================================== #
def process_map(file_in, validate):
    """Iteratively process each XML element and write to csv(s)"""

    with codecs.open(NODES_PATH, 'w') as nodes_file, \
         codecs.open(NODE_TAGS_PATH, 'w') as nodes_tags_file, \
         codecs.open(WAYS_PATH, 'w') as ways_file, \
         codecs.open(WAY_NODES_PATH, 'w') as way_nodes_file, \
         codecs.open(WAY_TAGS_PATH, 'w') as way_tags_file:

        nodes_writer = UnicodeDictWriter(nodes_file, NODE_FIELDS)
        node_tags_writer = UnicodeDictWriter(nodes_tags_file, NODE_TAGS_FIELDS)
        ways_writer = UnicodeDictWriter(ways_file, WAY_FIELDS)
        way_nodes_writer = UnicodeDictWriter(way_nodes_file, WAY_NODES_FIELDS)
        way_tags_writer = UnicodeDictWriter(way_tags_file, WAY_TAGS_FIELDS)

        nodes_writer.writeheader()
        node_tags_writer.writeheader()
        ways_writer.writeheader()
        way_nodes_writer.writeheader()
        way_tags_writer.writeheader()

        validator = cerberus.Validator()

        for element in get_element(file_in, tags=('node', 'way')):
            el = shape_element(element)
            if el:
                if validate is True:
                    validate_element(el, validator)

                if element.tag == 'node':
                    nodes_writer.writerow(el['node'])
                    node_tags_writer.writerows(el['node_tags'])
                elif element.tag == 'way':
                    ways_writer.writerow(el['way'])
                    way_nodes_writer.writerows(el['way_nodes'])
                    way_tags_writer.writerows(el['way_tags'])


if __name__ == '__main__':
    # Note: Validation is ~ 10X slower. For the project consider using a small
    # sample of the map when validating.
    process_map(OSM_PATH, validate=True)

West
South 100 West
West
North 500 West
Utah
UT
Utah
UT
West
North 800 West
Utah
UT
West
North 800 West
Utah
UT
Utah
UT
Utah
UT
Utah
UT
West
South 800 West
Utah
UT
South
West 200 South
Utah
UT
South
West 200 South
Utah
UT
South
West 200 South
Utah
UT
South
West 200 South
Utah
UT
South
West 200 South
Utah
UT
South
West 200 South
Utah
UT
South
West 200 South
Utah
UT
South
West 200 South
Utah
UT
South
West 200 South
Utah
UT
South
West 200 South
Utah
UT
South
West 200 South
Utah
UT
South
West 200 South
Utah
UT
South
West 200 South
Utah
UT
South
West 200 South
Utah
UT
South
West 200 South
Utah
UT
South
West 200 South
Utah
UT
South
West 200 South
Utah
UT
South
West 200 South
Utah
UT
South
West 200 South
Utah
UT
South
West 200 South
Utah
UT
South
West 200 South
Utah
UT
South
West 200 South
Utah
UT
West
South 400 West
Utah
UT
West
South 400 West
Utah
UT
North
West 440 North
North
East 800 North
Utah
UT
West
North 500 West
South
East 500 South
Utah
UT
West
North 500 West
Store
BYU Store
West
No

West
South 2020 West
West
South 2020 West
West
South 2020 West
West
South 2020 West
West
South 2020 West
South
West 475 South
South
West 475 South
South
West 475 South
West
South 1920 West
West
South 1920 West
West
South 1920 West
West
South 1920 West
West
South 1920 West
West
South 1920 West
West
South 1920 West
West
South 1920 West
West
South 1920 West
West
South 1920 West
West
South 1920 West
South
West 490 South
South
West 490 South
South
West 490 South
South
West 490 South
South
West 490 South
South
West 490 South
South
West 525 South
South
West 525 South
South
West 525 South
South
West 525 South
South
West 525 South
South
West 525 South
South
West 525 South
South
West 525 South
South
West 525 South
West
South 1875 West
South
West 525 South
West
South 1875 West
West
South 1875 West
West
South 1875 West
West
South 1875 West
West
South 1875 West
West
South 1920 West
West
South 1920 West
West
South 1920 West
West
South 1920 West
West
South 1920 West
West
South 1920 West
West
South 19

Way
West Golden Pond Way
Way
West Golden Pond Way
Way
West Golden Pond Way
Way
West Golden Pond Way
Way
West Golden Pond Way
Way
West Golden Pond Way
Way
West Golden Pond Way
Way
West Golden Pond Way
Way
West Golden Pond Way
West
South 1840 West
West
South 1840 West
West
South 1840 West
West
South 1840 West
West
South 1780 West
West
South 1780 West
West
South 1780 West
West
South 1780 West
West
South 1780 West
West
South 1780 West
South
West 860 South
West
South 1730 West
West
South 1730 West
South
West 800 South
South
West 800 South
West
South 1730 West
West
South 1730 West
West
South 1680 West
West
South 1680 West
West
South 1680 West
West
South 1680 West
West
South 1640 West
West
South 1640 West
West
South 1640 West
West
South 1590 West
South
West 860 South
West
South 1675 West
South
West 860 South
South
West 860 South
West
South 1675 West
West
South 1675 West
West
South 1675 West
West
South 1675 West
West
South 1675 West
West
South 1675 West
West
South 1675 West
West
South 1675 Wes

North
East 380 North
North
East 380 North
North
East 380 North
North
East 380 North
North
East 380 North
North
East 380 North
North
East 380 North
North
East 380 North
North
East 380 North
North
East 380 North
North
East 380 North
North
East 380 North
North
East 380 North
North
East 380 North
East
North 725 East
East
North 725 East
East
North 725 East
East
North 725 East
East
North 725 East
East
North 725 East
East
North 725 East
East
North 725 East
East
North 725 East
East
North 725 East
East
North 750 East
East
North 750 East
East
North 750 East
East
North 750 East
East
North 750 East
East
North 750 East
East
North 750 East
East
North 750 East
East
North 750 East
East
North 750 East
East
North 750 East
East
North 750 East
East
North 750 East
East
North 750 East
East
North 750 East
East
North 750 East
East
North 750 East
East
North 750 East
East
North 750 East
East
North 750 East
East
North 750 East
East
North 750 East
East
North 725 East
East
North 725 East
East
North 725 East
East
N

South
East 170 South
South
East 170 South
South
East 170 South
South
East 170 South
South
East 170 South
South
East 170 South
East
South 350 East
East
South 350 East
East
South 350 East
East
South 400 East
East
South 400 East
South
West 1970 South
South
West 1970 South
South
West 1970 South
South
West 1970 South
South
West 1970 South
South
West 1970 South
South
West 1970 South
South
West 1970 South
South
West 1970 South
South
West 1970 South
South
West 1970 South
South
West 1970 South
South
West 1970 South
South
West 1970 South
South
West 1970 South
South
West 1970 South
South
West 1970 South
South
West 1970 South
West
South 1120 West
West
South 1120 West
West
South 1120 West
West
South 1120 West
West
South 1120 West
West
South 1120 West
West
South 1120 West
West
South 1120 West
West
South 1120 West
West
South 1120 West
West
South 1120 West
West
South 1120 West
West
South 1080 West
West
South 1080 West
West
South 1080 West
West
South 1080 West
West
South 1080 West
West
South 1080 West


North
West 550 North
West
North 400 West
N
1535 North
Utah
UT
Rd
Squaw Peak Road
Utah
UT
South
East 680 South
Utah
UT
Utah
UT
Utah
UT
N
180 North
N
140 North
N
150 North
North
West 1100 North
Utah
UT
East
North 1200 East
Utah
UT
East
North 1200 East
West
North 1340 West
ut
UT
North
East 820 North
North
East 700 North
Utah
UT
South
East 950 South
East
North 500 East
North
West 700 North
North
West 700 North
East
600 North 200 East
East
600 North 200 East
Utah
UT
West
North 500 West
West
North 500 West
South
East 1180 South
North
East 750 North
ut
UT
West
North 6800 West
East
South 400 East
Utah
UT
North
West 920 North
Ut
UT
E
South 400 East
West
North 400 West
West
North 100 West
North
East 700 North
West
North 100 West
North
East 500 North
East
North 300 East
Utah
UT
South
West 250 South
West
North 300 West
South
West 200 South
Ut
UT
North
East 600 North
Ut
UT
East
South 100 East
Utah
UT
North
600 North
Utah
UT
East
South 50 East
East
South 50 East
East
South 50 East
East
South 50 East

West
South 1200 West
West
South 1200 West
West
South 1200 West
West
South 1200 West
West
South 1200 West
West
South 1200 West
West
South 1200 West
West
South 1200 West
West
South 1200 West
West
South 1200 West
West
South 1200 West
West
South 1200 West
Utah
UT
North
West 1720 North
Utah
UT
North
West 1720 North
Utah
UT
North
West 1720 North
Utah
UT
West
North 450 West
Utah
UT
West
North 450 West
Utah
UT
West
North 450 West
Utah
UT
North
West 1720 North
Utah
UT
West
North 450 West
West
North 100 West
West
North 100 West
West
North 100 West
West
North 100 West
West
North 100 West
West
North 100 West
West
North 100 West
West
North 100 West
West
North 100 West
West
North 100 West
West
North 100 West
West
North 100 West
West
North 100 West
Utah
UT
West
South 150 West
Utah
UT
South
West 580 South
Utah
UT
West
South 150 West
Utah
UT
West
South 150 West
Utah
UT
West
South 150 West
Utah
UT
West
South 150 West
Utah
UT
West
South 150 West
Utah
UT
South
West 580 South
Utah
UT
South
West 580 South
U

North
East 425 North
North
East 425 North
North
East 425 North
North
East 425 North
North
East 425 North
North
East 425 North
North
East 425 North
North
East 425 North
North
East 425 North
North
East 425 North
North
East 460 North
North
East 460 North
North
East 460 North
North
East 460 North
North
East 460 North
North
East 460 North
North
East 460 North
North
East 460 North
East
North 20 East
East
North 20 East
North
East 460 North
North
East 460 North
North
East 460 North
North
East 460 North
North
East 460 North
North
East 460 North
East
North 20 East
East
North 20 East
East
North 20 East
East
North 20 East
North
East 425 North
North
East 425 North
North
East 425 North
North
East 425 North
North
East 425 North
North
East 425 North
North
East 425 North
North
East 425 North
North
East 425 North
North
East 425 North
North
East 425 North
North
East 425 North
North
East 425 North
North
East 425 North
North
East 250 North
East
North 670 East
North
East 230 North
North
East 180 North
North

East
North 410 East
East
North 410 East
East
North 410 East
East
North 410 East
East
North 410 East
East
North 410 East
East
North 410 East
East
North 410 East
East
North 410 East
East
North 410 East
East
North 410 East
East
North 410 East
East
North 410 East
East
North 410 East
East
North 410 East
East
North 410 East
East
North 410 East
East
North 410 East
North
East 180 North
Paris
Rue de Paris
Paris
Rue de Paris
Paris
Rue de Paris
Paris
Rue de Paris
Paris
Rue de Paris
Paris
Rue de Paris
Paris
Rue de Paris
Paris
Rue de Paris
Paris
Rue de Paris
Paris
Rue de Paris
Paris
Rue de Paris
Paris
Rue de Paris
Paris
Rue de Paris
Paris
Rue de Paris
Paris
Rue de Paris
Paris
Rue de Paris
Paris
Rue de Paris
Paris
Rue de Paris
Paris
Rue de Paris
Paris
Rue de Paris
Paris
Rue de Paris
Paris
Rue de Paris
Paris
Rue de Paris
Paris
Rue de Paris
Paris
Rue de Paris
Cournot
Rue Cournot
Cournot
Rue Cournot
Cournot
Rue Cournot
Cournot
Rue Cournot
Cournot
Rue Cournot
Cournot
Rue Cournot
Cournot
Rue Cournot
Cour

Utah
UT
Utah
UT
South
East 640 South
Utah
UT
South
East 640 South
Utah
UT
East
South 900 East
Utah
UT
East
South 900 East
Utah
UT
East
South 900 East
Utah
UT
East
South 900 East
Utah
UT
East
South 900 East
Utah
UT
East
South 900 East
Utah
UT
East
South 900 East
Utah
UT
South
East 640 South
Utah
UT
Utah
UT
Utah
UT
Utah
UT
Utah
UT
East
South 860 East
Utah
UT
East
South 860 East
Utah
UT
East
South 860 East
Utah
UT
East
South 860 East
Utah
UT
East
South 860 East
Utah
UT
East
South 860 East
Utah
UT
East
South 860 East
Utah
UT
East
South 860 East
Utah
UT
East
South 860 East
Utah
UT
South
West 250 South
Utah
UT
South
West 250 South
Utah
UT
South
West 250 South
Utah
UT
South
West 250 South
Utah
UT
South
West 250 South
Circle
Chardonnay Circle
Circle
Chardonnay Circle
Circle
Chardonnay Circle
Circle
Chardonnay Circle
Circle
Chardonnay Circle
Circle
Chardonnay Circle
Circle
Chardonnay Circle
Circle
Chardonnay Circle
Circle
Chardonnay Circle
Circle
Chardonnay Circle
Circle
Chardonnay Circle
Circl

North
East 700 North
North
East 700 North
North
East 700 North
North
East 700 North
North
East 700 North
North
East 700 North
North
East 700 North
North
East 700 North
North
East 700 North
North
East 700 North
North
East 700 North
North
East 700 North
North
East 700 North
North
East 700 North
North
East 700 North
North
East 700 North
North
East 700 North
North
East 700 North
North
East 700 North
North
East 700 North
North
East 700 North
North
East 700 North
North
East 700 North
North
East 700 North
North
East 700 North
North
East 670 North
North
East 670 North
North
East 670 North
North
East 670 North
North
East 670 North
North
East 670 North
North
East 670 North
North
East 670 North
North
East 670 North
North
East 670 North
North
East 670 North
North
East 670 North
North
East 670 North
North
East 670 North
North
East 670 North
North
East 670 North
North
East 670 North
North
East 670 North
North
East 670 North
North
East 670 North
North
East 670 North
North
East 670 North
North
East 67

West
South 1080 West
West
South 1120 West
West
South 1120 West
West
South 1120 West
West
South 1120 West
West
South 1120 West
West
South 1120 West
West
South 1140 West
South
West 1940 South
West
South 1140 West
West
South 1080 West
West
South 1080 West
West
South 1080 West
West
South 1150 West
West
South 1150 West
South
West 1860 South
South
West 1860 South
South
West 1860 South
South
West 1860 South
South
West 1860 South
South
West 1860 South
South
West 1860 South
South
West 1860 South
South
West 1810 South
South
West 1810 South
South
West 1810 South
South
West 1810 South
South
West 1810 South
South
West 1810 South
South
West 1810 South
South
West 1810 South
West
South 1230 West
West
South 1230 West
West
South 1230 West
West
South 1150 West
West
South 1230 West
South
West 1810 South
West
North 4600 West
South
West 400 South
Utah
UT
West
South 400 West
West
South 300 West
West
South 500 West
North
East 450 North
Utah
UT
Way
Glendon Way
Utah
UT
Way
Glendon Way
Utah
UT
East
North 100 Eas

# additional idea - zip code updating

In [25]:
## Auditing postal codes

def audit_postal_codes(osmfile):
    postcode_list =[]
    osm_file = open(osmfile, "r")
    for event,element in ET.iterparse(osmfile):
        for elem in element.iter("tag"):
            if elem.attrib['k'] == "addr:postcode":
                postcode_list.append(elem.attrib['v'])
    pprint.pprint(Counter(postcode_list))
    osm_file.close()
# Check for consistency
# Then clean it
def update_name_postcode(postalcode):
    if len(postalcode) == 5:
        return postalcode
    else:
        postalcode = postalcode[:5]
        return postalcode

In [277]:
audit_postal_codes('example.osm')

Counter({'84059': 11718, '84062': 1932, '84058': 1746, '84601': 813, '84057': 375, '84604': 363, '84003': 282, '84606': 165, '84042': 153, '84097': 123, '84043': 102, '84602': 18, '84042-1526': 3, '84005': 3, '84096': 3})


In [15]:
update_name_postcode('84306-9729')

'84306'

# Resources

https://www.tutorialspoint.com/counters-in-python<br>
https://docs.google.com/a/wgu.edu/viewer?a=v&pid=sites&srcid=d2d1LmVkdXxjNzUwLS0tZGF0YS13cmFuZ2xpbmd8Z3g6MWU3ZDEwYjQyMWI2MTBjYw<br>
https://drive.google.com/file/d/18lI6B_RLa-z2Z1-fP7vtJKCMu8QharX6/view<br>
https://sites.google.com/a/wgu.edu/c750---data-wrangling/example-pseudocode-1<br>
https://gist.github.com/carlward/54ec1c91b62a5f911c42#file-sample_project-md<br>
https://github.com/M-Medhat/dand-mongodb-data-wrangling/blob/master/osm.py<br>
https://www.guru99.com/python-regular-expressions-complete-tutorial.html<br>
https://appdividend.com/2020/05/15/three-ways-to-get-file-size-in-python/<br>
http://wgu.libguides.com.wgu.idm.oclc.org/libhome 
