This notebook is for uploading leisure, shop, and amenities data to the database.

In [46]:
from dotenv import load_dotenv
import numpy as np
import os
import pymysql
import pandas as pd

In [47]:
load_dotenv()
DB_HOST = os.getenv("DB_HOST")
DB_PORT = int(os.getenv("DB_PORT"))
DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
DB_DATABASE = os.getenv("DB_DATABASE")

In [48]:
df_sh = pd.read_csv("../../data_evaluation/osm_places/osm_places_shop_cleaned.csv") # OpenStreetMaps shop data
df_le = pd.read_csv("../../data_evaluation/osm_places/osm_places_leisure_cleaned.csv") # OpenStreetMaps leisure data
df_am = pd.read_csv("../../data_evaluation/osm_places/osm_places_amenity_cleaned.csv") # OpenStreetMaps amenity data
df_joined = pd.read_csv("places.csv")

In [49]:
# Filter out amenities that don't have google photos data yet
df_am = df_am[df_am['id'].isin(df_joined['id'])]

In [55]:
# Top 10 cuisines, will be added as a variable
cuisines = [
    'pizza',
    'chinese',
    'coffee_shop',
    'mexican',
    'italian',
    'burger',
    'donut',
    'sandwich',
    'japanese',
    'american'
]

df_am['cuisine'] = df_am['cuisine'].astype(str) 

for cuisine in cuisines:
    df_am[cuisine] = df_am['cuisine'].apply(lambda s: cuisine in s.split(';'))

In [70]:
df_sh.columns

Index(['id', 'lat', 'lon', 'name', 'opening_hours', 'shop', 'website',
       'addr:city', 'addr:housenumber', 'addr:postcode', 'addr:state',
       'addr:street', 'phone', 'outdoor_seating', 'wheelchair', 'email',
       'drink:coffee'],
      dtype='object')

In [58]:
am_sql_table = """CREATE TABLE amenities (
id BIGINT,
cuisine_pizza BOOLEAN,
cuisine_chinese BOOLEAN,
cuisine_coffee_shop BOOLEAN,
cuisine_mexican BOOLEAN,
cuisine_italian BOOLEAN,
cuisine_burger BOOLEAN,
cuisine_donut BOOLEAN,
cuisine_sandwich BOOLEAN,
cuisine_japanese BOOLEAN,
cuisine_american BOOLEAN,
diet_vegan ENUM('yes', 'no', 'unknown'),
drink_beer ENUM('yes', 'no', 'unknown'),
drink_tea ENUM('yes', 'no', 'unknown'),
drink_wine ENUM('yes', 'no', 'unknown'),
outdoor_seating ENUM('yes', 'no', 'unknown'),
wheelchair ENUM('yes', 'no', 'limited'),
PRIMARY KEY (id),
FOREIGN KEY (id) REFERENCES places(id)
);
"""

In [None]:
sh_sql_table = """CREATE TABLE shop (
id BIGINT,
drink_coffee ENUM('yes', 'no', 'unknown'),
outdoor_seating ENUM('yes', 'no', 'unknown'),
wheelchair ENUM('yes', 'no', 'limited'),
PRIMARY KEY (id),
FOREIGN KEY (id) REFERENCES places(id)
);
"""

In [65]:
try:
    # Establish a connection to the database
    connection = pymysql.connect(
        host=DB_HOST,
        port=DB_PORT,
        user=DB_USER,
        password=DB_PASSWORD,
        database=DB_DATABASE
    )

        # Create a cursor object to interact with the database
    cursor = connection.cursor()
except pymysql.Error as e:
    print(f"Error connecting to the database: {e}")

In [66]:
# Create the table
try:
    cursor.execute(am_sql_table)
except pymysql.MySQLError as e:
    print(f"Error: {e}")

In [67]:
insert_query = """
INSERT INTO amenities (
id,
cuisine_pizza,
cuisine_chinese,
cuisine_coffee_shop,
cuisine_mexican,
cuisine_italian,
cuisine_burger,
cuisine_donut,
cuisine_sandwich,
cuisine_japanese,
cuisine_american,
diet_vegan,
drink_beer,
drink_tea,
drink_wine,
outdoor_seating,
wheelchair
)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
"""

data = [] # Data to be used

for i, row in df_am.iterrows():
    data.append([row['id'], row['pizza'], row['chinese'], row['coffee_shop'], row['mexican'],
                 row['italian'], row['burger'], row['donut'], row['sandwich'], row['japanese'], 
                 row['american'], row['diet:vegan'], row['drink:beer'], 
                 row['drink:tea'], row['drink:wine'],
                 row['outdoor_seating'], row['wheelchair']])


try:
    cursor.executemany(insert_query, data)
except pymysql.MySQLError as e:
    print(f"Error: {e}")
    connection.rollback()

connection.commit()

In [68]:
query = """
SELECT *
FROM amenities
"""
print(cursor.execute(query))
cursor.fetchmany(5)

1110


((349323821,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  'unknown',
  'unknown',
  'unknown',
  'unknown',
  'unknown',
  ''),
 (357618253,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  'unknown',
  'unknown',
  'unknown',
  'unknown',
  'yes',
  ''),
 (357620442,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  'unknown',
  'yes',
  'unknown',
  'yes',
  'unknown',
  ''),
 (357621192,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  'unknown',
  'unknown',
  'unknown',
  'unknown',
  'unknown',
  ''),
 (368042980,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  'unknown',
  'unknown',
  'unknown',
  'unknown',
  'unknown',
  ''))

In [63]:
# Delete the table
delete_query = """
DROP TABLE amenities
"""

cursor.execute(delete_query)

0

In [64]:
# Close the cursor and connection
if cursor:
    cursor.close()
if connection:
    connection.close()
    print("Database connection closed.")

Database connection closed.
