This notebook is for uploading leisure, shop, and amenities data to the database.

In [46]:
from dotenv import load_dotenv
import numpy as np
import os
import pymysql
import pandas as pd

In [47]:
load_dotenv()
DB_HOST = os.getenv("DB_HOST")
DB_PORT = int(os.getenv("DB_PORT"))
DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
DB_DATABASE = os.getenv("DB_DATABASE")

In [82]:
df_sh = pd.read_csv("../../data_evaluation/osm_places/osm_places_shop_cleaned.csv") # OpenStreetMaps shop data
df_le = pd.read_csv("../../data_evaluation/osm_places/osm_places_leisure_cleaned.csv") # OpenStreetMaps leisure data
df_am = pd.read_csv("../../data_evaluation/osm_places/osm_places_amenity_cleaned.csv") # OpenStreetMaps amenity data
df_joined = pd.read_csv("places.csv")

In [83]:
# Filter out amenities that don't have google photos data yet
df_sh = df_sh[df_sh['id'].isin(df_joined['id'])]
df_le = df_le[df_le['id'].isin(df_joined['id'])]
df_am = df_am[df_am['id'].isin(df_joined['id'])]

In [102]:
# Top 10 cuisines, will be added as a variable
cuisines = [
    'pizza',
    'chinese',
    'coffee_shop',
    'mexican',
    'italian',
    'burger',
    'donut',
    'sandwich',
    'japanese',
    'american'
]

df_am['cuisine'] = df_am['cuisine'].astype(str) 

for cuisine in cuisines:
    df_am[cuisine] = df_am['cuisine'].apply(lambda s: cuisine in s.split(';'))

In [87]:
df_sh

Unnamed: 0,id,lat,lon,name,opening_hours,shop,website,addr:city,addr:housenumber,addr:postcode,addr:state,addr:street,phone,outdoor_seating,wheelchair,email,drink:coffee
0,357623896,40.661101,-73.953359,Hee-Space Thrift Shop,Th-Su 12:00-18:00,charity,https://www.hee-space.com,,,,,,,unknown,unknown,,unknown
1,368053310,40.736926,-73.989601,Barnes & Noble,"Su 10:00-21:00; Mo-Th 09:00-21:00; Fr, Sa 09:0...",books,https://stores.barnesandnoble.com/store/2675,New York,33,10003.0,NY,East 17th Street,+1 212-253-0810,unknown,unknown,,unknown
3,418520887,40.636934,-74.076656,Everything Goes Book Cafe,,books,,Staten Island,208,,NY,Bay Street,,yes,limited,,unknown
4,419362653,40.727268,-73.990374,The Hidden Rose,Tu-Sa 11:30-20:00,tattoo,,,,,,,,unknown,unknown,,unknown
5,419366609,40.743566,-73.979928,Craft + Carry,,beverages,,,,,,,,yes,unknown,,unknown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
476,4900466400,40.708752,-74.005271,Midtown Comics Downtown,,books,https://www.midtowncomics.com/downtownstore,,64,10038.0,,Fulton Street,,unknown,unknown,,unknown
477,4913377713,40.755677,-73.990684,Infinity Tattoo,,tattoo,,,612,,,8th Avenue,,unknown,unknown,,unknown
478,4922311228,40.761249,-73.990350,Amy's Bread,,bakery,https://www.amysbread.com/,,672,,,9th Avenue,+1-212-977-2670,unknown,unknown,,unknown
479,4924615250,40.724421,-73.948502,Moe's Doughs Doughnuts,,bakery,,Brooklyn,126,,,Nassau Avenue,,unknown,unknown,,unknown


In [99]:
am_sql_table = """CREATE TABLE amenities (
id BIGINT,
cuisine_pizza BOOLEAN,
cuisine_chinese BOOLEAN,
cuisine_coffee_shop BOOLEAN,
cuisine_mexican BOOLEAN,
cuisine_italian BOOLEAN,
cuisine_burger BOOLEAN,
cuisine_donut BOOLEAN,
cuisine_sandwich BOOLEAN,
cuisine_japanese BOOLEAN,
cuisine_american BOOLEAN,
diet_vegan ENUM('yes', 'no', 'unknown'),
drink_beer ENUM('yes', 'no', 'unknown'),
drink_tea ENUM('yes', 'no', 'unknown'),
drink_wine ENUM('yes', 'no', 'unknown'),
outdoor_seating ENUM('yes', 'no', 'unknown'),
wheelchair ENUM('yes', 'no', 'limited', 'unknown'),
PRIMARY KEY (id),
FOREIGN KEY (id) REFERENCES places(id)
);
"""

In [91]:
sh_sql_table = """CREATE TABLE shop (
id BIGINT,
drink_coffee ENUM('yes', 'no', 'unknown'),
outdoor_seating ENUM('yes', 'no', 'unknown'),
wheelchair ENUM('yes', 'no', 'limited', 'unknown'),
PRIMARY KEY (id),
FOREIGN KEY (id) REFERENCES places(id)
);
"""

In [65]:
try:
    # Establish a connection to the database
    connection = pymysql.connect(
        host=DB_HOST,
        port=DB_PORT,
        user=DB_USER,
        password=DB_PASSWORD,
        database=DB_DATABASE
    )

        # Create a cursor object to interact with the database
    cursor = connection.cursor()
except pymysql.Error as e:
    print(f"Error connecting to the database: {e}")

In [100]:
# Create the amenities table
try:
    cursor.execute(am_sql_table)
except pymysql.MySQLError as e:
    print(f"Error: {e}")

In [93]:
# Create the amenities table
try:
    cursor.execute(sh_sql_table)
except pymysql.MySQLError as e:
    print(f"Error: {e}")

In [103]:
am_insert_query = """
INSERT INTO amenities (
id,
cuisine_pizza,
cuisine_chinese,
cuisine_coffee_shop,
cuisine_mexican,
cuisine_italian,
cuisine_burger,
cuisine_donut,
cuisine_sandwich,
cuisine_japanese,
cuisine_american,
diet_vegan,
drink_beer,
drink_tea,
drink_wine,
outdoor_seating,
wheelchair
)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
"""

data = [] # Data to be used

for i, row in df_am.iterrows():
    data.append([row['id'], row['pizza'], row['chinese'], row['coffee_shop'], row['mexican'],
                 row['italian'], row['burger'], row['donut'], row['sandwich'], row['japanese'], 
                 row['american'], row['diet:vegan'], row['drink:beer'], 
                 row['drink:tea'], row['drink:wine'],
                 row['outdoor_seating'], row['wheelchair']])


try:
    cursor.executemany(am_insert_query, data)
except pymysql.MySQLError as e:
    print(f"Error: {e}")
    connection.rollback()

connection.commit()

In [94]:
sh_insert_query = """
INSERT INTO shop (
id,
drink_coffee,
outdoor_seating,
wheelchair
)
VALUES (%s,%s,%s,%s)
"""

data = [] # Data to be used

for i, row in df_sh.iterrows():
    data.append([row['id'], row['drink:coffee'],
                 row['outdoor_seating'], row['wheelchair']])

try:
    cursor.executemany(sh_insert_query, data)
except pymysql.MySQLError as e:
    print(f"Error: {e}")
    connection.rollback()

connection.commit()

In [104]:
query = """
SELECT *
FROM amenities
"""
print(cursor.execute(query))
cursor.fetchmany(5)

1110


((349323821,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  'unknown',
  'unknown',
  'unknown',
  'unknown',
  'unknown',
  'unknown'),
 (357618253,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  'unknown',
  'unknown',
  'unknown',
  'unknown',
  'yes',
  'unknown'),
 (357620442,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  'unknown',
  'yes',
  'unknown',
  'yes',
  'unknown',
  'unknown'),
 (357621192,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  'unknown',
  'unknown',
  'unknown',
  'unknown',
  'unknown',
  'unknown'),
 (368042980,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  'unknown',
  'unknown',
  'unknown',
  'unknown',
  'unknown',
  'unknown'))

In [98]:
# Delete the table
delete_query = """
DROP TABLE amenities
"""

cursor.execute(delete_query)

0

In [92]:
# Delete the shop table
delete_query = """
DROP TABLE shop
"""

cursor.execute(delete_query)

0

In [64]:
# Close the cursor and connection
if cursor:
    cursor.close()
if connection:
    connection.close()
    print("Database connection closed.")

Database connection closed.
