In [2]:
#Project Goal: Clean up openstreetmap data in XML
#Convert the XML to a csv file
#Import file into SQL to use as a database

In [68]:
#Goal 1 sub goals
#Change all street types to be uniform
#Make sure all zip codes make sense
#Change all state routes to be uniform

In [404]:
import xml.etree.cElementTree as ET
import re
from collections import defaultdict
import pprint
import csv
import codecs
import sqlite3
import pandas as pd

In [405]:
#OSM File
wall_osm = r'C:\Users\chels\Downloads\Project 2\Wall_Twp.osm'

In [406]:
#Change all street types to be uniform

#First get a List of expected street types
expected = ["Street", "Avenue", "Boulevard", "Drive", "Court", "Place", "Square", "Lane", "Road", 
            "Trail", "Parkway", "Commons", "Way", "34", "13", "35", "33", "9", "88", "66", "Plaza", "70", "Circle", "Boardwalk", "Highway", "Front", "West", "71", "South", "Broadway"]

#Dictionary of street types to change/normalize
street_mapping = {  "St": "Street",
                    "St.": "Street",
                    "st": "Street",
                    "avenue": "Avenue",
                    "Ave.": "Avenue",
                    "Ave": "Avenue",
                    'ave': "Avenue",
                    "Unit": "",
                    "Rd": "Road",
                    "Blvd": "Boulevard",
                    "Northbound": "",
                    'Goldcrest': 'Goldcrest Drive',
                    'South': '',
                    "RT-35": "Route 35",
                    "NJ-35": "Route 35",
                    "NJ-88": "Route 88"
                  }          

#Dictionary of route names to change/normalize
route_mapping = {   "NJ 13": "Route 13",
                    "State Route 33": "Route 33",
                    "NJ-34 C3": "Route 34",
                    "NJ 35": "Route 35",
                    "Highway 35": "Route 35",
                    "State Highway 35": "Route 35",
                    "Hwy 35": "Route 35",
                    "State Route 66": "Route 66",
                    "Rt. 88": "Route 88",
                    "US. Rt. 9": "Route 9",
                    "Us Highway 9": "Route 9",
                    "Rt 9": "Route 9",
                    "U.S. 9": "Route 9",
                    'US 9': "Route 9",
                    'US Highway 9': "Route 9",
                    'US RT 9': "Route 9",
                    'US Rt. 9': "Route 9",
                    'US. Rt. 9': "Route 9",
                    'Us Highway 9': "Route 9",
                    "Route 9 Northbound": "Route 9"
}

# Dictionary to update the solitairy incorrect zipcode
zip_mapping = { "8730": "08730"
              }

#re to search for the end of a street name and zip codes & initializing variables to add these values to
street_type_re = re.compile(r'\b\S+\.?$')
street_types = defaultdict(set)

zip_code_re = re.compile(r'^\d{5}')
zips = defaultdict(set)

In [407]:
#functions to determine if element is street name or zip code
def is_street_name(elem):
    return (elem.attrib['k'] == "addr:street")
def is_zip_code(elem):
    return (elem.attrib['k'] == "addr:postcode")

In [408]:
#if re finds an unexpected street name adds it to street_types
def audit_street_type(street_types, street_name):
    m = street_type_re.search(street_name)
    if m:
        street_type = m.group()
        if street_type not in expected:
            street_types[street_type].add(street_name)

In [409]:
#if re finds an unexpected street name adds it to zips
def audit_zip_code(zips, zip_code):
    m = zip_code_re.search(zip_code)
    if not m:
        zips[zip_code].add(zip_code)

In [410]:
#full audit of zips & street names
def audit(osm_file):
    for event, elem in ET.iterparse(osm_file, events=("start",)):
        if elem.tag == "way" or elem.tag == "node":
            for tag in elem.iter("tag"):
                if is_street_name(tag):
                    audit_street_type(street_types, tag.attrib['v'])
                if is_zip_code(tag):
                    audit_zip_code(zips, tag.attrib['v'])           
    pprint.pprint(dict(street_types))
    pprint.pprint(dict(zips))
    
audit(wall_osm)

{'Ave': {'Sunset Ave', '210 5th Ave', 'Lewin Ave'},
 'Ave.': {'River Ave.'},
 'Blvd': {'Hance Blvd'},
 'C3': {'NJ-34 C3'},
 'Goldcrest': {'Goldcrest'},
 'NJ-35': {'NJ-35'},
 'NJ-88': {'NJ-88'},
 'Northbound': {'Route 9 Northbound'},
 'RT-35': {'RT-35'},
 'Rd': {'Beaver Dam Rd',
        'Chambersbridge Rd',
        'County Line Rd',
        'Herman Rd',
        'Redwood Rd',
        'Wemrock Rd'},
 'St': {'Court St', 'Bond St', 'Main St'},
 'Unit': {'North County Line Road Unit'},
 'avenue': {'elberon avenue'},
 'st': {'8th st'}}
{'8730': {'8730'}}


In [414]:
def update_street_name(name, mapping=street_mapping):
    m = street_type_re.search(name)
    if m:
        street_type = m.group()
        if street_type not in expected:
            name = re.sub(street_type_re, street_mapping[street_type], name).title()
    return name

In [415]:
def update_zip_code(number, mapping=zip_mapping):
    if number in mapping:
            number = mapping.get(number)
    return number

In [416]:
def update_route_name(name, mapping=route_mapping):
        if name in mapping:
            name = mapping.get(name)
            return name

In [417]:
#updates streets/routes names
def update_streets(item):
    if item in route_mapping:
        return update_route_name(item)
    elif item not in route_mapping:
        return update_street_name(item)
    else:
        return item

---

In [418]:
schema = {
    'node': {
        'type': 'dict',
        'schema': {
            'id': {'required': True, 'type': 'integer', 'coerce': int},
            'lat': {'required': True, 'type': 'float', 'coerce': float},
            'lon': {'required': True, 'type': 'float', 'coerce': float},
            'user': {'required': True, 'type': 'string'},
            'uid': {'required': True, 'type': 'integer', 'coerce': int},
            'version': {'required': True, 'type': 'string'},
            'changeset': {'required': True, 'type': 'integer', 'coerce': int},
            'timestamp': {'required': True, 'type': 'string'}
        }
    },
    'node_tags': {
        'type': 'list',
        'schema': {
            'type': 'dict',
            'schema': {
                'id': {'required': True, 'type': 'integer', 'coerce': int},
                'key': {'required': True, 'type': 'string'},
                'value': {'required': True, 'type': 'string'},
                'type': {'required': True, 'type': 'string'}
            }
        }
    },
    'way': {
        'type': 'dict',
        'schema': {
            'id': {'required': True, 'type': 'integer', 'coerce': int},
            'user': {'required': True, 'type': 'string'},
            'uid': {'required': True, 'type': 'integer', 'coerce': int},
            'version': {'required': True, 'type': 'string'},
            'changeset': {'required': True, 'type': 'integer', 'coerce': int},
            'timestamp': {'required': True, 'type': 'string'}
        }
    },
    'way_nodes': {
        'type': 'list',
        'schema': {
            'type': 'dict',
            'schema': {
                'id': {'required': True, 'type': 'integer', 'coerce': int},
                'node_id': {'required': True, 'type': 'integer', 'coerce': int},
                'position': {'required': True, 'type': 'integer', 'coerce': int}
            }
        }
    },
    'way_tags': {
        'type': 'list',
        'schema': {
            'type': 'dict',
            'schema': {
                'id': {'required': True, 'type': 'integer', 'coerce': int},
                'key': {'required': True, 'type': 'string'},
                'value': {'required': True, 'type': 'string'},
                'type': {'required': True, 'type': 'string'}
            }
        }
    }
}


In [419]:
OSM_PATH = r"C:\Users\chels\Downloads\Project 2\Wall_Twp.osm"

NODE_PATH = "node.csv"
NODE_TAG_PATH = "node_tags.csv"
WAY_PATH = "ways.csv"
WAY_NODES_PATH = "way_nodes.csv"
WAY_TAG_PATH = "ways_tags.csv"

LOWER_COLON = re.compile(r'^([a-z]|_)+:([a-z]|_)+')
PROBLEMCHARS = re.compile(r'[=\+/&<>;\'"\?%#$@\,\. \t\r\n]')

# Make sure the fields order in the csvs matches the column order in the sql table schema
NODE_FIELDS = ['id', 'lat', 'lon', 'user', 'uid', 'version', 'changeset', 'timestamp']
NODE_TAGS_FIELDS = ['id', 'key', 'value', 'type']
WAY_FIELDS = ['id', 'user', 'uid', 'version', 'changeset', 'timestamp']
WAY_TAGS_FIELDS = ['id', 'key', 'value', 'type']
WAY_NODES_FIELDS = ['id', 'node_id', 'position']


def shape_element(element, node_attr_fields=NODE_FIELDS, way_attr_fields=WAY_FIELDS,
                  problem_chars=PROBLEMCHARS, default_tag_type='regular'):
    """Clean and shape node or way XML element to Python dict"""

    node_attribs = {}
    way_attribs = {}
    way_nodes = []
    tags = []  # Handle secondary tags the same way for both node and way elements

    # YOUR CODE HERE
    
    if element.tag == 'node':
        for i in element.attrib:
            if i in NODE_FIELDS:
                node_attribs[i] = element.attrib[i] #checks elements attributes & if they are also in the NODE_FIELDS list adds them to node_attribs list
        for tag in element:
            elem_tags = {}
            elem_tags['id'] = element.attrib['id']
            if PROBLEMCHARS.match(tag.attrib['k']): #continues if problem characters encountered
                continue
            if LOWER_COLON.match(tag.attrib['k']): 
                elem_tags['type'] = tag.attrib['k'].split(':',1)[0]
                elem_tags['key'] = tag.attrib['k'].split(':',1)[1] #If we find a colon in the k attrib splits first part into the type and second part into key
                if is_street_name(tag):
                    elem_tags['value'] = update_streets(tag.attrib['v']) #updates the name if it's a street name
                elif is_zip_code(tag):
                    elem_tags['value'] = update_zip_code(tag.attrib['v']) #updates zip code
                else:
                    elem_tags['value'] = tag.attrib['v']
                tags.append(elem_tags)

            else:
                elem_tags['type'] = default_tag_type
                elem_tags['key'] = tag.attrib['k']
                if is_street_name(tag):
                    elem_tags['value'] = update_streets(tag.attrib['v'])   
                elif is_zip_code(tag):
                    elem_tags['value'] = update_zip_code(tag.attrib['v'])
                else:
                    elem_tags['value'] = tag.attrib['v']
                tags.append(elem_tags)
            
                     
    elif element.tag == "way":
        pos = 0 
        for i in element.attrib:
            if i in WAY_FIELDS:
                way_attribs[i] = element.attrib[i]
        for node in element:
            way_tag = {}
            way_tag['id'] = element.attrib['id']
            if node.tag == "tag":
                if PROBLEMCHARS.match(node.attrib['k']):
                        continue
                if LOWER_COLON.match(node.attrib['k']):
                    way_tag['type'] = node.attrib['k'].split(':',1)[0]
                    way_tag['key'] = node.attrib['k'].split(':',1)[1]
                    if is_street_name(node):
                         way_tag['value'] = update_streets(node.attrib['v'])    
                    if is_zip_code(node):
                         way_tag['value'] = update_zip_code(node.attrib['v'])
                    else:
                        way_tag['value'] = node.attrib['v']         
                else:
                    way_tag['type'] = default_tag_type
                    way_tag['key'] = node.attrib['k']
                    if is_street_name(node):
                        way_tag['value'] = update_streets(node.attrib['v'])    
                    elif is_zip_code(node):
                        way_tag['value'] = update_zip_code(node.attrib['v'])
                    else:
                        way_tag['value'] = node.attrib['v']
                tags.append(way_tag)
                
                
                
            elif node.tag == 'nd':
                way_node = {}
                way_node['id'] = element.attrib['id']
                way_node['node_id'] = node.attrib['ref']
                way_node['position'] = pos 
                pos += 1
                way_nodes.append(way_node)
                
    if element.tag == 'node':
        return {'node': node_attribs, 'node_tags': tags}
    elif element.tag == 'way':
        return {'way': way_attribs, 'way_nodes': way_nodes, 'ways_tags': tags}

# ================================================== #
#               Helper Functions                     #
# ================================================== #
def get_element(osm_file, tags=('node', 'way', 'relation')):
    """Yield element if it is the right type of tag"""

    context = ET.iterparse(osm_file, events=('start', 'end'))
    _, root = next(context)
    for event, elem in context:
        if event == 'end' and elem.tag in tags:
            yield elem
            root.clear()
    

class UnicodeDictWriter(csv.DictWriter, object):
    """Extend csv.DictWriter to handle Unicode input. Updated for Python 3"""
    def writerow(self, row):
        super(UnicodeDictWriter, self).writerow({
            k: (v.encode('utf-8') if isinstance(v, str) else v) for k, v in row.items()
        })
    def writerows(self, rows):
        for row in rows:
            self.writerow(row)

# ================================================== #
#               Main Function                        #
# ================================================== #
def process_map(file_in):
    """Iteratively process each XML element and write to csv(s). Updated for Python 3"""

    with codecs.open(NODE_PATH, 'w', 'utf-8') as nodes_file, \
         codecs.open(NODE_TAG_PATH, 'w', 'utf-8') as node_tags_file, \
         codecs.open(WAY_PATH, 'w', 'utf-8') as ways_file, \
         codecs.open(WAY_NODES_PATH, 'w', 'utf-8') as way_nodes_file, \
         codecs.open(WAY_TAG_PATH, 'w', 'utf-8') as way_tags_file:

        nodes_writer = csv.DictWriter(nodes_file, NODE_FIELDS)
        node_tags_writer = csv.DictWriter(node_tags_file, NODE_TAGS_FIELDS)
        ways_writer = csv.DictWriter(ways_file, WAY_FIELDS)
        way_nodes_writer = csv.DictWriter(way_nodes_file, WAY_NODES_FIELDS)
        way_tags_writer = csv.DictWriter(way_tags_file, WAY_TAGS_FIELDS)

        nodes_writer.writeheader()
        node_tags_writer.writeheader()
        ways_writer.writeheader()
        way_nodes_writer.writeheader()
        way_tags_writer.writeheader()

        

        for element in get_element(file_in, tags=('node', 'way')):
            el = shape_element(element)
            if el:
                
                if element.tag == 'node':
                    nodes_writer.writerow(el['node'])
                    node_tags_writer.writerows(el['node_tags'])
                elif element.tag == 'way':
                    ways_writer.writerow(el['way'])
                    way_nodes_writer.writerows(el['way_nodes'])
                    way_tags_writer.writerows(el['ways_tags'])


if __name__ == '__main__':

    process_map(OSM_PATH)


In [420]:
#Create db file and connection to sqlite3
wall_twp_db = r'C:\Users\chels\Downloads\wall_twp_db.db'
conn = sqlite3.connect(wall_twp_db)
c = conn.cursor()

In [421]:
#create our tables

c.execute('''CREATE TABLE IF NOT EXISTS node
             (id INTEGER NOT NULL, lat REAL, lon REAL, user TEXT, uid INTEGER, version INTEGER, changeset INTEGER, timestamp TEXT)''')
c.execute('''CREATE TABLE IF NOT EXISTS node_tags
             (id INTEGER NOT NULL, key TEXT, value TEXT, type TEXT, FOREIGN KEY (id) REFERENCES node(id))''')
c.execute('''CREATE TABLE IF NOT EXISTS ways
             (id INTEGER NOT NULL, user TEXT, uid TEXT, version INTEGER, changeset INTEGER, timestamp TEXT)''')
c.execute('''CREATE TABLE IF NOT EXISTS way_nodes
             (id INTEGER NOT NULL, node_id INTEGER NOT NULL, position INTEGER, FOREIGN KEY (id) REFERENCES ways(id), FOREIGN KEY (node_id) REFERENCES nodes(id))''')
c.execute('''CREATE TABLE IF NOT EXISTS ways_tags
             (id INTEGER NOT NULL, key TEXT, value TEXT, type TEXT, FOREIGN KEY (id) REFERENCES ways(id))''')

<sqlite3.Cursor at 0x1f11d890c70>

In [422]:
#read csv files into pandas and convert them to SQL adding them to our tables
node = pd.read_csv('node.csv', encoding='utf-8')
node_tags = pd.read_csv('node_tags.csv', encoding='utf-8')
ways = pd.read_csv('ways.csv', encoding='utf-8')
way_nodes = pd.read_csv('way_nodes.csv', encoding='utf-8')
ways_tags = pd.read_csv('ways_tags.csv', encoding='utf-8')

node.to_sql('node', conn, if_exists='append', index = False)
node_tags.to_sql('node_tags', conn, if_exists='append', index = False)
ways.to_sql('ways', conn, if_exists='append', index = False)
way_nodes.to_sql('way_nodes', conn, if_exists='append', index = False)
ways_tags.to_sql('ways_tags', conn, if_exists='append', index = False)

In [437]:
#Find our file sizes using count, make a column File Name, and output to a dataframe using pandas read to sql
print("File Size Analysis\n")
file_count = pd.read_sql('''
SELECT 'Node' as "File Name", COUNT(*) as Size FROM node
UNION ALL
SELECT 'Node Tags' as "File Name", COUNT(*)  FROM node_tags 
UNION ALL
SELECT 'Ways' as "File Name", COUNT(*) FROM ways 
UNION ALL
SELECT 'Way Nodes' as "File Name", COUNT(*) FROM way_nodes 
UNION ALL
SELECT 'Ways Tags' as "File Name", COUNT(*) FROM ways_tags
''', conn)

file_count

File Size Analysis



Unnamed: 0,File Name,Size
0,Node,1526319
1,Node Tags,120963
2,Ways,138429
3,Way Nodes,1829610
4,Ways Tags,539610


In [436]:
#total file size sum
file_total_count = file_count['Size'].sum()
print("The total size of all of the files is: {}".format(file_total_count))

The total size of all of the files is: 4154931


In [425]:
#Count distinct users from a union of user columns in ways and nodes
user_count = c.execute('''
SELECT COUNT(DISTINCT user) 
FROM(
SELECT user FROM node
UNION 
SELECT user FROM ways
)''').fetchall()

print("There are {} unique contributors.".format(user_count[0][0]))

There are 1020 unique contributors.


In [426]:
#Find top 10 contributors
top_users = pd.read_sql('''
SELECT user as User, COUNT(*) as "Total Contributions"
FROM
( 
SELECT user
FROM node
UNION ALL
SELECT user
FROM ways
) 
group by user
ORDER BY "Total Contributions" Desc
''', conn)

print("The top 10 contributors to our data are:")
top_users.head(10)

The top 10 contributors to our data are:


Unnamed: 0,User,Total Contributions
0,woodpeck_fixbot,275166
1,ppjj,246036
2,NJDataUploads,173589
3,Aurimas Fišeras,64254
4,MilkManHere,44736
5,Valustaides,44235
6,choess,35961
7,JriSv250,29478
8,TIGERcnl,22908
9,crystalwalrein,22845


In [427]:
#find a list of sports in descending order of occurance
top_sports = pd.read_sql('''
SELECT value as "Sports Facilities", COUNT(*) as Total
FROM
( 
SELECT value
FROM node_tags
WHERE node_tags.key = "sport"
UNION ALL
SELECT value
FROM ways_tags
WHERE ways_tags.key = "sport"
) 
GROUP BY value
ORDER BY Total DESC
''', conn)

print("Here is a list of all the sports facilities you can find. It seems tennis is the most popular!")
top_sports.head(10)

Here is a list of all the sports facilities you can find. It seems tennis is the most popular!


Unnamed: 0,Sports Facilities,Total
0,tennis,312
1,golf,267
2,baseball,228
3,soccer,153
4,basketball,150
5,swimming,36
6,equestrian,33
7,american_football,21
8,boules,18
9,badminton,18


In [428]:
#find a list of amenities in descending order of occurance
top_amenity = pd.read_sql('''
SELECT value as Amenity, COUNT(*) as Total
FROM
( 
SELECT value
FROM ways_tags
WHERE ways_tags.key = "amenity"
UNION ALL
SELECT value
FROM node_tags
WHERE node_tags.key = "amenity"
) 
group by value
ORDER BY Total Desc
''', conn)

print("Here are all of our amenities in descending order")
top_amenity.head(11)

Here are all of our amenities in descending order


Unnamed: 0,Amenity,Total
0,parking,1434
1,school,576
2,parking_space,516
3,place_of_worship,510
4,restaurant,459
5,fire_station,219
6,bicycle_parking,204
7,bench,153
8,fast_food,150
9,bank,99


In [429]:
#find a list of leisure locations in descending order of occurance
leisure_activities = pd.read_sql('''
SELECT value as Destination, COUNT(*) as Total
FROM
( 
SELECT value
FROM ways_tags
WHERE ways_tags.key = "leisure"
UNION ALL
SELECT value
FROM node_tags
WHERE node_tags.key = "leisure"
) 
group by value
ORDER BY Total Desc
''', conn)

print("Here is a list of all of the leisure items in our data. Quite a bit to do! We are right on the coast, so the marinas and beaches are lovely!")
leisure_activities

Here is a list of all of the leisure items in our data. Quite a bit to do! We are right on the coast, so the marinas and beaches are lovely!


Unnamed: 0,Destination,Total
0,pitch,1239
1,swimming_pool,570
2,park,411
3,playground,213
4,picnic_table,156
5,golf_course,63
6,sports_centre,27
7,fitness_centre,27
8,miniature_golf,21
9,amusement_arcade,21


In [430]:
#find a list of tourist items in descending order of occurance
tourism = pd.read_sql('''
SELECT value as Destination, COUNT(*) as Total
FROM
( 
SELECT value
FROM ways_tags
WHERE ways_tags.key = "tourism"
UNION ALL
SELECT value
FROM node_tags
WHERE node_tags.key = "tourism"
) 
group by value
ORDER BY Total Desc
''', conn)

print("For tourism there are many museums and a couple of theme parks that catch my eye. Here is a list:")
tourism

For tourism there are many museums and a couple of theme parks that catch my eye. Here is a list:


Unnamed: 0,Destination,Total
0,hotel,51
1,attraction,42
2,museum,36
3,motel,21
4,artwork,18
5,picnic_site,15
6,gallery,15
7,guest_house,12
8,viewpoint,6
9,camp_site,6


In [431]:
#find a list of church faiths and their frequency
churches = pd.read_sql('''
SELECT value as Religion, COUNT(*) as Total
FROM
( 
SELECT value
FROM ways_tags
WHERE ways_tags.key = "religion"
AND ways_tags.id IN 
(
SELECT id
FROM ways_tags
WHERE ways_tags.value = "place_of_worship"
)
UNION ALL
SELECT value
FROM node_tags
WHERE node_tags.key = "religion"
AND node_tags.id IN 
(
SELECT id
FROM node_tags
WHERE node_tags.value = "place_of_worship"
)
) 
group by value
ORDER BY Total Desc
''', conn)

print("Here are the religions practiced by the local churches and the count of the churches:")
churches

Here are the religions practiced by the local churches and the count of the churches:


Unnamed: 0,Religion,Total
0,christian,405
1,jewish,39
2,muslim,3


In [432]:
# Make a list of religious denominations per church and their frequency
denominations = pd.read_sql('''
SELECT value as Denomination, COUNT(*) as Total
FROM
( 
SELECT value
FROM ways_tags
WHERE ways_tags.key = "denomination"
AND ways_tags.id IN 
(
SELECT id
FROM ways_tags
WHERE ways_tags.value = "place_of_worship"
)
UNION ALL
SELECT value
FROM node_tags
WHERE node_tags.key = "denomination"
AND node_tags.id IN 
(
SELECT id
FROM node_tags
WHERE node_tags.value = "place_of_worship"
)
) 
group by value
ORDER BY Total Desc
''', conn)

print("Here is a list of all of the local religious denominations. I grew up methodist and now I know that must be quite common!")
denominations

Here is a list of all of the local religious denominations. I grew up methodist and now I know that must be quite common!


Unnamed: 0,Denomination,Total
0,methodist,84
1,baptist,57
2,orthodox,24
3,catholic,24
4,lutheran,15
5,roman_catholic,12
6,pentecostal,9
7,presbyterian,6
8,jehovahs_witness,6
9,nondenominational,3


In [438]:
# List of cuisines by occurance
cuisines = pd.read_sql('''
SELECT value as Cuisine, COUNT(*) as Total
FROM
( 
SELECT value
FROM ways_tags
WHERE ways_tags.key = "cuisine"
AND ways_tags.id IN 
(
SELECT id
FROM ways_tags
WHERE ways_tags.value = "restaurant"
OR ways_tags.value = "fast_food"
)
UNION ALL
SELECT value
FROM node_tags
WHERE node_tags.key = "cuisine"
AND node_tags.id IN 
(
SELECT id
FROM node_tags
WHERE node_tags.value = "restaurant"
OR node_tags.value = "fast_food"
)
) 
group by value
ORDER BY Total Desc
''', conn)

print("No surprises here. This list shows pizza and italian as being among the highest in count of cuisines. It is a very italian area.")
cuisines

No surprises here. This list shows pizza and italian as being among the highest in count of cuisines. It is a very italian area.


Unnamed: 0,Cuisine,Total
0,pizza,45
1,italian,33
2,burger,33
3,mexican,18
4,sandwich,15
5,seafood,12
6,american,12
7,donut;coffee_shop,9
8,donut,9
9,thai,6


In [439]:
# List of cafes by occurance
cafes = pd.read_sql('''
SELECT value as Cafe, COUNT(*) as Total
FROM
( 
SELECT value
FROM ways_tags
WHERE ways_tags.key = "name"
AND ways_tags.id IN 
(
SELECT id
FROM ways_tags
WHERE ways_tags.value = "cafe"
)
UNION ALL
SELECT value
FROM node_tags
WHERE node_tags.key = "name"
AND node_tags.id IN 
(
SELECT id
FROM node_tags
WHERE node_tags.value = "cafe"
)
) 
group by value
ORDER BY Total Desc
''', conn)

print("Initially this surprised me very much, seeing as how I thought Dunkin' Donuts would be king in my hometown.\nThis is actually due to a data inconsistancy where D&D is sometimes listed as just fast food and not a cafe")
cafes

Initially this surprised me very much, seeing as how I thought Dunkin' Donuts would be king in my hometown.
This is actually due to a data inconsistancy where D&D is sometimes listed as just fast food and not a cafe


Unnamed: 0,Cafe,Total
0,Starbucks,15
1,Playa Bowls,9
2,Dunkin' Donuts,6
3,Playa Bowls Ocean,3
4,Ms. Bagel,3
5,Inkwell Coffeehouse,3
6,Green Planet Coffee,3
7,Catsbury Park,3
8,Cathy's Bagels,3
9,Cafe Volan,3


In [440]:
# List of fast food by occurance
fast_food = pd.read_sql('''
SELECT value as "Fast Food Restaurant", COUNT(*) as Total
FROM
( 
SELECT value
FROM ways_tags
WHERE ways_tags.key = "name"
AND ways_tags.id IN 
(
SELECT id
FROM ways_tags
WHERE ways_tags.value = "fast_food"
)
UNION ALL
SELECT value
FROM node_tags
WHERE node_tags.key = "name"
AND node_tags.id IN 
(
SELECT id
FROM node_tags
WHERE node_tags.value = "fast_food"
)
) 
group by value
ORDER BY Total Desc
''', conn)

print("Now this makes more sense! Here is a list of the fast food locations in the area.\nThere are a ton more Dunkin' Donuts in the area than Starbucks, they are just listed as fast food! I guess they can be both!\nLiving in Seattle now, I don't know where my loyalties lie!")
fast_food

Now this makes more sense! Here is a list of the fast food locations in the area.
There are a ton more Dunkin' Donuts in the area than Starbucks, they are just listed as fast food! I guess they can be both!
Living in Seattle now, I don't know where my loyalties lie!


Unnamed: 0,Fast Food Restaurant,Total
0,Dunkin' Donuts,33
1,McDonald's,12
2,Burger King,12
3,Wendy's,6
4,Taco Bell,6
5,Jersey Mike's Subs,6
6,Windmill HotDogs,3
7,White Castle,3
8,Vintage Subs,3
9,Tony’s,3
