# Explore Data

In [1]:
import xml.etree.cElementTree as ET
import audit

sample = "vancouver.osm"

type_keys = set()
amenity_values = set()
cuisine_values = set()
shop_values = set()
religion_values = set()

with open(sample, "rb") as f:
    for event, elem in ET.iterparse(f):
        for child in elem:
            if child.tag == 'tag':
                if "type" in child.attrib['k']:
                    type_keys.add(child.attrib['k'])
                if 'amenity' in child.attrib['k']:
                    amenity_values.add(child.attrib['v'])
                if 'cuisine' in child.attrib['k']:
                    cuisine_values.add(child.attrib['v'])
                if 'shop' in child.attrib['k']:
                    shop_values.add(child.attrib['v'])
                if 'religion' in child.attrib['k']:
                    religion_values.add(child.attrib['v'])

In [2]:
type_keys

{'aerodrome:type',
 'artwork_type',
 'fence_type',
 'fire_hydrant:type',
 'leaf_type',
 'map_type',
 'seamark:type',
 'tower:type',
 'tracktype',
 'tree:type',
 'type',
 'waterway:type'}

In [3]:
amenity_values

{'Crematorium',
 'atm',
 'bank',
 'bar',
 'bench',
 'bicycle_parking',
 'bicycle_rental',
 'bureau_de_change',
 'bus_station',
 'cafe',
 'car_rental',
 'car_sharing',
 'car_wash',
 'charging_station',
 'childcare',
 'clinic',
 'college',
 'community_centre',
 'dentist',
 'drinking_water',
 'embassy',
 'fast_food',
 'fire_station',
 'fountain',
 'fuel',
 'ice_cream',
 'library',
 'nightclub',
 'parking',
 'parking_entrance',
 'pharmacy',
 'place_of_worship',
 'post_box',
 'post_office',
 'pub',
 'research_institute',
 'restaurant',
 'school',
 'shelter',
 'shower',
 'storage',
 'swimming_pool',
 'taxi',
 'telephone',
 'theatre',
 'toilets',
 'townhall',
 'vending_machine',
 'waste_basket'}

In [4]:
cuisine_values

{'Indian;vegetarian',
 'Malaysian',
 'asian',
 'bakery',
 'burger',
 'cake',
 'chinese',
 'coffee_shop',
 'fish',
 'fish_and_chips',
 'indian',
 'international',
 'italian',
 'japanese',
 'japanese;korean;asian',
 'japanese;sushi',
 'kebab',
 'mexican',
 'noodles;asian',
 'pizza',
 'ramen',
 'regional',
 'sandwich',
 'sushi',
 'sushi;seafood;fine_dining',
 'thai',
 'vegetarian',
 'vietnamese'}

In [5]:
shop_values

{'alcohol',
 'art',
 'bakery',
 'beauty',
 'bed',
 'beverages',
 'bicycle',
 'boat',
 'books',
 'boutique',
 'butcher',
 'car',
 'car_parts',
 'car_repair',
 'chemist',
 'clothes',
 'computer',
 'confectionery',
 'convenience',
 'deli',
 'department_store',
 'doityourself',
 'dry_cleaning',
 'electronics',
 'florist',
 'furniture',
 'gift',
 'greengrocer',
 'hairdresser',
 'hardware',
 'houseware',
 'interior_decoration',
 'jewelry',
 'kitchen',
 'laundry',
 'lingerie',
 'locksmith',
 'mall',
 'mobile_phone',
 'music',
 'optician',
 'pet',
 'photo',
 'photography',
 'print',
 'radiotechnics',
 'religion',
 'seafood',
 'self_storage',
 'shoe_repair',
 'shoes',
 'sports',
 'stationery',
 'supermarket',
 'tattoo',
 'tea',
 'thrift',
 'toys',
 'travel_agency',
 'tyres',
 'variety_store',
 'video',
 'wine',
 'yes'}

In [6]:
religion_values

{'christian', 'sikh', 'taoist'}

In [7]:
street_types =audit.audit(sample)

{'108': set(['8th Ave W #108']),
 'Alley': set(['Railspur Alley']),
 'Broadway': set(['East Broadway', 'West Broadway']),
 'Crescent': set(['Carrie Cates Crescent']),
 'Diversion': set(['Victoria Diversion']),
 'E': set(['37th Ave E']),
 'East': set(['Grand Boulevard East']),
 'Esplanade': set(['West Esplanade']),
 'Highway': set(['Lougheed Highway']),
 'Jarvis': set(['Jarvis']),
 'Kingsway': set(['Kingsway']),
 'Mall': set(['East Mall', 'Main Mall', 'Wesbrook Mall']),
 'North': set(['East Kent Avenue North']),
 'Rd.': set(['Boundary Rd.']),
 'St': set(['Robson St', 'Shaughnessy St', 'Whitchurch St']),
 'St.': set(['Mainland St.']),
 'Streer': set(['Water Streer']),
 'Terminal': set(['Station Terminal']),
 'Way': set(['Canada Way',
             'Clancy Loranger Way',
             'Eburne Way',
             'Van Horne Way',
             'Viscount Way',
             'Vulcan Way']),
 'West': set(['Grand Boulevard West'])}


In [8]:
street_types

(defaultdict(set,
             {'108': {'8th Ave W #108'},
              'Alley': {'Railspur Alley'},
              'Broadway': {'East Broadway', 'West Broadway'},
              'Crescent': {'Carrie Cates Crescent'},
              'Diversion': {'Victoria Diversion'},
              'E': {'37th Ave E'},
              'East': {'Grand Boulevard East'},
              'Esplanade': {'West Esplanade'},
              'Highway': {'Lougheed Highway'},
              'Jarvis': {'Jarvis'},
              'Kingsway': {'Kingsway'},
              'Mall': {'East Mall', 'Main Mall', 'Wesbrook Mall'},
              'North': {'East Kent Avenue North'},
              'Rd.': {'Boundary Rd.'},
              'St': {'Robson St', 'Shaughnessy St', 'Whitchurch St'},
              'St.': {'Mainland St.'},
              'Streer': {'Water Streer'},
              'Terminal': {'Station Terminal'},
              'Way': {'Canada Way',
               'Clancy Loranger Way',
               'Eburne Way',
               'Van 