In [10]:
import gc
import os

# from cryptography.fernet import Fernet
# import hashlib
import pandas as pd
import pyarrow.parquet as pq
import sqlite3
from sqlite3 import Error

from CFG import Config
from security_config import SecurityConfig

In [11]:
config_ref = Config()
config = config_ref.get_config()


In [16]:

table_name = 'business'
data_file_path = os.path.join(config['data_dir'], f'{table_name}.parquet')
db_path = os.path.join(config['data_db_storage_dir'], config['data_db_name'])


In [64]:

conn = sqlite3.connect(db_path) 


In [65]:
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' \
                AND name=?", (table_name,))
result = cursor.fetchone()

if result:
    print("Table already exists. Dropping...")
    cursor.execute("DROP TABLE IF EXISTS " + table_name)
    print("Table dropped successfully.")
    
print("Reading data from parquet file...")
df = pq.read_table(data_file_path).to_pandas()

# Temporary fix for the issue with the data
# 'attributes', 'hours' columns contain dictionary data type values.
# sqlite3 does not support dictionary data type. 
df.drop(['attributes', 'hours'], axis=1, inplace=True) 


print(f"Creating table {table_name}...")
df.to_sql(table_name, conn)
print("Table created successfully.")


Table already exists. Dropping...
Table dropped successfully.
Reading data from parquet file...


Creating table business...
Table created successfully.


In [66]:
fetch_sample = 5

cursor.execute(f"SELECT * FROM {table_name} LIMIT {fetch_sample}".format(table_name))
result = cursor.fetchmany(fetch_sample)
for row in result:
    print(row)


(0, 'Pns2l4eNsfO8kk83dixA6A', 'Abby Rappoport, LAC, CMQ', '1616 Chapala St, Ste 2', 'Santa Barbara', 'CA', '93101', 34.42667770385742, -119.71119689941406, 5.0, 7, 0, 'Doctors, Traditional Chinese Medicine, Naturopathic/Holistic, Acupuncture, Health & Medical, Nutritionists')
(1, 'mpf3x-BjTdTEA3yCZrAYPw', 'The UPS Store', '87 Grasso Plaza Shopping Center', 'Affton', 'MO', '63123', 38.551124572753906, -90.335693359375, 3.0, 15, 1, 'Shipping Centers, Local Services, Notaries, Mailbox Centers, Printing Services')
(2, 'tUFrWirKiKi_TAnsVWINQQ', 'Target', '5255 E Broadway Blvd', 'Tucson', 'AZ', '85711', 32.223236083984375, -110.88045501708984, 3.5, 22, 0, 'Department Stores, Shopping, Fashion, Home & Garden, Electronics, Furniture Stores')
(3, 'MTSW4McQd7CbVtyjqoe9mw', 'St Honore Pastries', '935 Race St', 'Philadelphia', 'PA', '19107', 39.95550537109375, -75.15556335449219, 4.0, 80, 1, 'Restaurants, Food, Bubble Tea, Coffee & Tea, Bakeries')
(4, 'mWMc6_wTdE0EUBKIGXDVfA', 'Perkiomen Valley Br

In [67]:
cursor.execute("PRAGMA table_info({})".format(table_name))
columns = [column[1] for column in cursor.fetchall()]
print(columns)


['index', 'business_id', 'name', 'address', 'city', 'state', 'postal_code', 'latitude', 'longitude', 'stars', 'review_count', 'is_open', 'categories']


In [68]:
cursor.execute(f"SELECT categories FROM {table_name} LIMIT 5")
result = cursor.fetchall()
for row in result:
    print(row[0])


Doctors, Traditional Chinese Medicine, Naturopathic/Holistic, Acupuncture, Health & Medical, Nutritionists
Shipping Centers, Local Services, Notaries, Mailbox Centers, Printing Services
Department Stores, Shopping, Fashion, Home & Garden, Electronics, Furniture Stores
Restaurants, Food, Bubble Tea, Coffee & Tea, Bakeries
Brewpubs, Breweries, Food


In [74]:
query = f"SELECT * FROM {table_name} WHERE 'Restaurants' IN (categories) LIMIT 5"
cursor.execute(query)
result = cursor.fetchmany(5)
for row in result:
    print(row)



(3655, 'x2J-YIFeGZ-nsezzooVA9g', 'Twenty 21', '2005 Market St', 'Philadelphia', 'PA', '19103', 39.95410919189453, -75.17466735839844, 3.0, 8, 0, 'Restaurants')
(5873, 'VlrSuulqTFeQfV2PToJGvg', 'Fung Garden', '5118 Gall Blvd', 'Zephyrhills', 'FL', '33542', 28.232059478759766, -82.1803970336914, 3.5, 8, 0, 'Restaurants')
(6856, 'mFE9V6LPpsDRUQLEBsBRRA', 'Pearl of East', '2049 W Oregon Ave', 'Philadelphia', 'PA', '19145', 39.918636322021484, -75.18241882324219, 3.0, 17, 1, 'Restaurants')
(9801, 'BXUqeFDqvTK2uL6sQd5YnQ', 'Crazy D’s Hot Chicken', '101 University Ter', 'Reno', 'NV', '89503', 39.53628921508789, -119.81822967529297, 3.5, 37, 1, 'Restaurants')
(11413, 'Bf1cdbdHXi8Omlkc7KShkg', 'Siam Cafe', '435 Esplanade Ave', 'New Orleans', 'LA', '70116', 29.96196746826172, -90.05735778808594, 4.0, 9, 0, 'Restaurants')


In [75]:
query = f"SELECT * FROM {table_name} WHERE 'Restaurants' IN (categories) AND city = 'Philadelphia' LIMIT 5"
cursor.execute(query)
result = cursor.fetchall()
for row in result:
    print(row)


(3655, 'x2J-YIFeGZ-nsezzooVA9g', 'Twenty 21', '2005 Market St', 'Philadelphia', 'PA', '19103', 39.95410919189453, -75.17466735839844, 3.0, 8, 0, 'Restaurants')
(6856, 'mFE9V6LPpsDRUQLEBsBRRA', 'Pearl of East', '2049 W Oregon Ave', 'Philadelphia', 'PA', '19145', 39.918636322021484, -75.18241882324219, 3.0, 17, 1, 'Restaurants')
(14327, 'TCdNYh5tdjoNmA1WMx3ZPg', "Roselena's", '1623 E Passyunk Ave', 'Philadelphia', 'PA', '19148', 39.92937469482422, -75.1638412475586, 4.0, 10, 0, 'Restaurants')
(25351, 'x2xQUz1YfOzy5A9FRuCpMw', 'Passage To India', '1320 Walnut St', 'Philadelphia', 'PA', '19107', 39.948951721191406, -75.16304016113281, 3.0, 23, 0, 'Restaurants')
(40722, 'qaPADjsU6c0Ufho0zdL6Yw', 'Azure', '931 N 2nd St', 'Philadelphia', 'PA', '19123', 39.964820861816406, -75.1402359008789, 3.5, 22, 0, 'Restaurants')


In [69]:
# conn.close()