This notebook is for uploading leisure, shop, and amenities data to the database.

In [1]:
from dotenv import load_dotenv
import numpy as np
import os
import pymysql
import pandas as pd

In [2]:
load_dotenv()
DB_HOST = os.getenv("DB_HOST")
DB_PORT = int(os.getenv("DB_PORT"))
DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
DB_DATABASE = os.getenv("DB_DATABASE")

In [82]:
df_sh = pd.read_csv("osm_places_shop_cleaned.csv")
df_le = pd.read_csv("osm_places_leisure_cleaned.csv")
df_am = pd.read_csv("osm_places_amenity_cleaned.csv")
df_joined = pd.read_csv("places.csv")

In [83]:
df_am = df_am[df_am['id'].isin(df_joined['id'])]

In [85]:
for val in df_am['cuisine'].unique():
    print(val)

burger
korean
nan
donut;coffee_shop
american;pub
mexican;burrito;tacos;nachos;enchilada
sandwich;health_food
dessert
italian
thai
middle_eastern;american;brunch
american;continental
chinese;shanghainese
latin_american;breakfast
middle_eastern;breakfast
sushi
continental;eclectic
vietnamese
coffee_shop
charcuterie;snack;coffee_shop
german
pub;american
coffee_shop;sandwich
american
tex-mex
french;loire_valley
pub
irish
pizza
mexican
chinese
salad
bagel
sandwich
irish;gastropub
diner
snack
korean;american
burger;sandwich
mediterranean;american
italian;pizza
japanese
deli
donut
falafel;middle_eastern
juice
caribbean
turkish
new_american
indian
steak_house
brunch
international
breakfast;sandwich;burger
gastropub
burger;sandwich;coffee_shop;italian;ice_cream;seafood
sandwich;breakfast;salad
seafood
ice_cream
mediterranean
greek
breakfast;sandwhich;chicken;coffee_shop
bagel;sandwich
pub;brunch
french
coffee_shop;ice_cream
lebanese
russian
american;brunch
american;eclectic
spanish
fish_and_chi

In [86]:
# Get top 10 cuisines
for val in df_am['cuisine'].value_counts()[:10].index:
    print(val)

coffee_shop
pizza
mexican
chinese
burger
italian
american
donut;coffee_shop
japanese
sandwich


In [87]:
cuisines = [
    'pizza',
    'chinese',
    'coffee_shop',
    'mexican',
    'italian',
    'burger',
    'donut',
    'sandwich',
    'japanese',
    'american'
]

In [88]:
df_am['cuisine'] = df_am['cuisine'].astype(str) 

In [90]:

for cuisine in cuisines:
    df_am[cuisine] = df_am['cuisine'].apply(lambda s: cuisine in s.split(';'))

In [91]:
df_am['burger'].value_counts()

burger
False    1045
True       65
Name: count, dtype: int64

In [93]:
df_am.columns

Index(['id', 'lat', 'lon', 'addr:city', 'addr:housenumber', 'addr:postcode',
       'addr:state', 'addr:street', 'amenity', 'cuisine', 'drive_through',
       'name', 'opening_hours', 'phone', 'website', 'outdoor_seating',
       'cocktails', 'drink:beer', 'drink:liquor', 'drink:wine', 'wheelchair',
       'email', 'diet:vegan', 'diet:vegetarian', 'drink:coffee', 'drink:tea',
       'contact:instagram', 'pizza', 'chinese', 'coffee_shop', 'mexican',
       'italian', 'burger', 'donut', 'sandwich', 'japanese', 'american'],
      dtype='object')

In [95]:
sql_table = """CREATE TABLE amenities (
id BIGINT,
cuisine_pizza BOOLEAN,
cuisine_chinese BOOLEAN,
cuisine_coffee_shop BOOLEAN,
cuisine_mexican BOOLEAN,
cuisine_italian BOOLEAN,
cuisine_burger BOOLEAN,
cuisine_donut BOOLEAN,
cuisine_sandwich BOOLEAN,
cuisine_japanese BOOLEAN,
cuisine_american BOOLEAN,
diet_vegan ENUM('yes', 'no', 'unknown'),
drink_beer ENUM('yes', 'no', 'unknown'),
drink_tea ENUM('yes', 'no', 'unknown'),
drink_wine ENUM('yes', 'no', 'unknown'),
indoor_seating ENUM('yes', 'no', 'unknown'),
outdoor_seating ENUM('yes', 'no', 'unknown'),
wheelchair ENUM('yes', 'no', 'limited'),
PRIMARY KEY (id),
FOREIGN KEY (id) REFERENCES places(id)
);
"""

In [47]:
try:
    # Establish a connection to the database
    connection = pymysql.connect(
        host=DB_HOST,
        port=DB_PORT,
        user=DB_USER,
        password=DB_PASSWORD,
        database=DB_DATABASE
    )

        # Create a cursor object to interact with the database
    cursor = connection.cursor()
except pymysql.Error as e:
    print(f"Error connecting to the database: {e}")

In [105]:
# Create the table
try:
    cursor.execute(sql_table)
except pymysql.MySQLError as e:
    print(f"Error: {e}")

In [106]:
insert_query = """
INSERT INTO amenities (
id,
cuisine_pizza,
cuisine_chinese,
cuisine_coffee_shop,
cuisine_mexican,
cuisine_italian,
cuisine_burger,
cuisine_donut,
cuisine_sandwich,
cuisine_japanese,
cuisine_american,
diet_vegan,
drink_beer,
drink_tea,
drink_wine,
outdoor_seating,
wheelchair
)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
"""

data = [] # Data to be used

for i, row in df_am.iterrows():
    data.append([row['id'], row['pizza'], row['chinese'], row['coffee_shop'], row['mexican'],
                 row['italian'], row['burger'], row['donut'], row['sandwich'], row['japanese'], 
                 row['american'], row['diet:vegan'], row['drink:beer'], 
                 row['drink:tea'], row['drink:wine'],
                 row['outdoor_seating'], row['wheelchair']])


try:
    cursor.executemany(insert_query, data)
except pymysql.MySQLError as e:
    print(f"Error: {e}")
    connection.rollback()

connection.commit()

In [112]:
query = """
SELECT *
FROM amenities
"""
print(cursor.execute(query))
cursor.fetchone()

1110


(349323821,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 'no',
 'no',
 'no',
 'no',
 None,
 'no',
 'no')

In [104]:
delete_query = """
DROP TABLE amenities
"""

cursor.execute(delete_query)

0