In [1]:
# Importing dependencies
import os
import pandas as pd
from pandas.io.json import json_normalize
import numpy as np

In [2]:
# Path to JSON
businessJSON = os.path.join('sourceData', 'business.json')

In [3]:
# Creating pd dataframe
business = pd.read_json(businessJSON, lines=True)

In [4]:
# Select only the businesses in Ontario
business_on = business.loc[business['state'] == 'ON']

In [5]:
# Dropping any rows with blank values in these categories
business_on_clean = business_on.dropna(subset=['name', 'address', 'postal_code', 'city', 'state', 'latitude', 'longitude', 'attributes',
                                                          'categories', 'hours']).reset_index(drop=True)

In [6]:
# Selecting all of the restaurants
restaurant = business_on_clean[business_on_clean['categories'].str.contains('Restaurants')].reset_index(drop=True)

In [7]:
# Regex to fix spelling mistakes 
restaurant = restaurant.replace({'city': {'^AGINCOURT$': 'Agincourt',
                                            '^Bradford West Gwillimbury$': 'Bradford',
                                            '^East Ajax$': 'Ajax',
                                            '^Caledon.{,8}$': 'Caledon',
                                            '^East Gwil{1,2}imbury$': 'East Gwillimbury',
                                            '(?i)^.*icoke$': 'Etobicoke',
                                            '^.{,9}Toro?nto.{,9}$': 'Toronto',
                                            'Malton': 'Mississauga',
                                            '^.{,5}Missis{1,2}a?ua?g.{1,2}$': 'Mississauga',
                                            '^Regional Municipality of York$': 'North York',
                                            '(?i)^North.{0,2}York$': 'North York',
                                            '^York Regional Municipality$': 'York',
                                            '^Willowdale$': 'North York',
                                            '^North of Brampton$': 'Brampton',
                                            '(?i)^Oak.?ridges$': 'Oak Ridges',
                                            '^oakville$': 'Oakville',
                                            '(?i)^Richmond?.?Hill?$': 'Richmond Hill',
                                            '^.{,8}Scar.?bo?rough$': 'Scarborough',
                                            '^.{,11}Stouffville$': 'Stouffville',
                                            '(?i)^Thornhil{,2}$': 'Thornhill',
                                            '^.*Vaugh.{,3}$': 'Vaughan',
                                            '^Wh.?i.?by$': 'Whitby'}}, regex=True)

In [8]:
# Only taking these columns
restaurant = restaurant.loc[:, ['name', 'address', 'postal_code', 'city', 'state', 'latitude', 'longitude','categories', 'stars', 'hours', 'attributes']]
restaurant.columns = ['Name', 'Address', 'Postal_code', 'City', 'Province', 'Latitude', 'Longitude', 'Categories', 'Stars', 'Hours', 'Attributes']

In [9]:
hours_raw = pd.DataFrame(json_normalize(data=restaurant['Hours']))

In [10]:
# Reorganise columns
hours_raw = hours_raw.loc[:,['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']]

Unnamed: 0,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Sunday
10712,0:0-0:0,11:0-22:0,11:0-22:0,11:0-22:0,11:0-23:0,12:0-23:0,12:0-23:0
10713,11:30-22:30,11:30-22:30,11:30-22:30,11:30-23:0,11:30-23:0,11:30-23:0,11:30-22:30
10714,11:30-22:0,,11:30-22:0,11:30-22:0,11:30-23:45,12:0-23:45,12:0-22:0
10715,0:0-0:0,11:0-23:0,11:0-23:0,11:0-23:0,11:0-23:0,12:0-23:0,12:0-22:0
10716,11:0-21:30,11:0-21:30,11:0-21:30,11:0-21:30,11:0-22:30,11:0-22:30,16:0-21:30


In [11]:
# Create a new DF with opening and closing hours
columns=hours_raw.columns
hours = hours_raw

for column in columns:
    hours[[f"{column}_open", f"{column}_close"]]=hours_raw[column].str.split('-', expand=True)
#     print(column)
hours.drop(columns=columns, inplace=True)

In [12]:
# Add hours column to the original DF
restaurant = restaurant.join(hours)
restaurant.drop(columns='Hours', inplace=True)

Unnamed: 0,Name,Address,Postal_code,City,Province,Latitude,Longitude,Categories,Stars,Attributes,...,Wednesday_open,Wednesday_close,Thursday_open,Thursday_close,Friday_open,Friday_close,Saturday_open,Saturday_close,Sunday_open,Sunday_close
10712,The King's Kitchen,"9275 Hwy 48, Unit 11",L6E 1A2,Markham,ON,43.893994,-79.263055,"Imported Food, Chinese, Food, Seafood, Special...",3.0,"{'Caters': 'True', 'RestaurantsGoodForGroups':...",...,11:0,22:0,11:0,22:0,11:0,23:0,12:0,23:0,12:0,23:0
10713,KOKO! Share Bar,81 Yorkville Avenue,M5R 1C1,Toronto,ON,43.670948,-79.391502,"Korean, Japanese, Asian Fusion, Restaurants",3.5,"{'RestaurantsTakeOut': 'True', 'Alcohol': 'u'f...",...,11:30,22:30,11:30,23:0,11:30,23:0,11:30,23:0,11:30,22:30
10714,Indian Hero,8920 Highway 50,L6P 3A3,Brampton,ON,43.775089,-79.653807,"Restaurants, Indian",3.0,"{'WiFi': ''free'', 'RestaurantsTakeOut': 'True...",...,11:30,22:0,11:30,22:0,11:30,23:45,12:0,23:45,12:0,22:0
10715,Thai Fantasy,578 Yonge Street,M4Y 1Z3,Toronto,ON,43.66512,-79.384809,"Restaurants, Thai",4.0,"{'RestaurantsPriceRange2': '2', 'RestaurantsGo...",...,11:0,23:0,11:0,23:0,11:0,23:0,12:0,23:0,12:0,22:0
10716,Asia Hut,1450 Kingston Rd,L1V 1C1,Pickering,ON,43.841844,-79.083881,"Restaurants, Soup, Chinese, Caribbean",4.5,"{'BikeParking': 'True', 'RestaurantsAttire': '...",...,11:0,21:30,11:0,21:30,11:0,22:30,11:0,22:30,16:0,21:30


In [19]:
# Create categories df sorted by restaurant_id
category = restaurant['Categories'].str.split(',', expand=True)
category['Restaurant_id']=category.index
category = pd.melt(category, id_vars='Restaurant_id', value_name='Category').drop(columns='variable').sort_values('Restaurant_id')

(192906, 2)

In [30]:
# Eliminating all Restaurant and None categories
category = category.loc[~category['Category'].isin(['Restaurants',None])]

In [51]:
# Print the unique categories
print(category['Category'].sort_values().unique())

[' Acai Bowls' ' Acne Treatment' ' Active Life' ' Adult Entertainment'
 ' Afghan' ' African' ' Airport Lounges' ' Alternative Medicine'
 ' American (New)' ' American (Traditional)' ' Amusement Parks'
 ' Antiques' ' Appliances' ' Arabian' ' Arcades' ' Argentine'
 ' Art Classes' ' Art Galleries' ' Art Schools' ' Arts & Crafts'
 ' Arts & Entertainment' ' Asian Fusion' ' Australian' ' Austrian'
 ' Auto Parts & Supplies' ' Automotive' ' Bagels' ' Bakeries'
 ' Bangladeshi' ' Barbeque' ' Bars' ' Basque' ' Beaches' ' Beauty & Spas'
 ' Bed & Breakfast' ' Beer' ' Beer Bar' ' Beer Hall' ' Belgian' ' Bistros'
 ' Body Shops' ' Books' ' Bookstores' ' Bowling' ' Brasseries'
 ' Brazilian' ' Breakfast & Brunch' ' Breweries' ' Brewpubs' ' British'
 ' Bubble Tea' ' Buffets' ' Burgers' ' Burmese' ' Butcher' ' Cabaret'
 ' Cafes' ' Cafeteria' ' Cajun/Creole' ' Cambodian' ' Canadian (New)'
 ' Candy Stores' ' Cantonese' ' Car Dealers' ' Car Rental'
 ' Cards & Stationery' ' Caribbean' ' Casinos' ' Caterers'
 '

In [None]:
# Keep only the categories of interest
# category = category.loc[~category['Category'].isin([[' Acai Bowls', 'Acne Treatment', 'Active Life', 'Adult Entertainment', 'Afghan', 'African', 'Airport Lounges', 'Alternative Medicine','American (New)', 'American (Traditional)', 'Amusement Parks', 'Antiques', 'Appliances', 'Arabian', 'Arcades', 'Argentine', 'Art Classes', 'Art Galleries', 'Art Schools', 'Arts & Crafts', 'Arts & Entertainment', 'Asian Fusion', 'Australian', 'Austrian', 'Auto Parts & Supplies', 'Automotive', 'Bagels', 'Bakeries', 'Bangladeshi', 'Barbeque', 'Bars', 'Basque', 'Beaches', 'Beauty & Spas', 'Bed & Breakfast', 'Beer', 'Beer Bar', 'Beer Hall', 'Belgian', 'Bistros', 'Body Shops', 'Books', 'Bookstores', 'Bowling', 'Brasseries', 'Brazilian', 'Breakfast & Brunch', 'Breweries', 'Brewpubs', 'British', 'Bubble Tea', 'Buffets', 'Burgers', 'Burmese', 'Butcher', 'Cabaret', 'Cafes', 'Cafeteria', 'Cajun/Creole', 'Cambodian', 'Canadian (New)', 'Candy Stores', 'Cantonese', 'Car Dealers', 'Car Rental', 'Cards & Stationery', 'Caribbean', 'Casinos', 'Caterers', 'Champagne Bars', 'Cheese Shops', 'Cheesesteaks', 'Chicken Shop', 'Chicken Wings', 'Chinese', 'Chiropractors', 'Chocolatiers & Shops', 'Churches', 'Cideries', 'Cinema', 'Cocktail Bars', 'Coffee & Tea', 'Coffee & Tea Supplies', 'Coffee Roasteries', 'Colombian', 'Comedy Clubs', 'Comfort Food', 'Comic Books', 'Community Centers', 'Community Service/Non-Profit', 'Contractors', 'Convenience Stores', 'Cooking Classes', 'Cooking Schools', 'Couriers & Delivery Services', 'Creperies', 'Cuban', 'Custom Cakes', 'Czech', 'DJs', 'Dance Clubs', 'Day Spas', 'Delicatessen', 'Delis', 'Department Stores', 'Desserts', 'Dim Sum', 'Diners', 'Dinner Theater', 'Discount Store', 'Dive Bars', 'Do-It-Yourself Food', 'Donairs', 'Donuts', 'Dry Cleaning & Laundry', 'Education', 'Egyptian', 'Electronics', 'Escape Games', 'Ethical Grocery', 'Ethiopian', 'Ethnic Food', 'Ethnic Grocery', 'Event Planning & Services', 'Eyelash Service', 'Falafel', 'Farmers Market', 'Fashion', 'Fast Food', 'Festivals', 'Filipino', 'Fish & Chips', 'Fitness & Instruction', 'Flea Markets', 'Florists', 'Flowers & Gifts', 'Fondue', 'Food', 'Food Court', 'Food Delivery Services', 'Food Stands', 'Food Tours', 'Food Trucks', 'French', 'Fruits & Veggies', 'Furniture Stores', 'Gas Stations', 'Gastropubs', 'Gay Bars', 'Gelato', 'German', 'Gift Shops', 'Gluten-Free', 'Golf', 'Greek', 'Grocery', 'Gyms', 'Hainan', 'Hair Removal', 'Hair Salons', 'Haitian', 'Hakka', 'Halal', 'Hawaiian', 'Head Shops', 'Health & Medical', 'Health Markets', 'Heating & Air Conditioning/HVAC', 'Himalayan/Nepalese', 'Hobby Shops', 'Home & Garden', 'Home Decor', 'Home Health Care', 'Home Services', 'Hong Kong Style Cafe', 'Hookah Bars', 'Hot Dogs', 'Hot Pot', 'Hotels', 'Hotels & Travel', 'Hungarian', 'IT Services & Computer Repair', 'Iberian', 'Ice Cream & Frozen Yogurt', 'Imported Food', 'Indian', 'Indonesian', 'Indoor Playcentre', 'International', 'International Grocery', 'Internet Cafes', 'Irish', 'Irish Pub', 'Italian', 'Izakaya', 'Japanese', 'Jazz & Blues', 'Jewelry', 'Juice Bars & Smoothies', 'Karaoke', 'Kebab', 'Kids Activities', 'Kitchen & Bath', 'Kombucha', 'Korean', 'Kosher', 'Landscape Architects', 'Landscaping', 'Laotian', 'Latin American', 'Laundromat', 'Laundry Services', 'Leather Goods', 'Lebanese', 'Life Coach', 'Live/Raw Food', 'Local Flavor', 'Local Services', 'Lounges', 'Macarons', 'Mags', 'Malaysian', 'Mass Media', 'Massage', 'Mattresses', 'Meat Shops', 'Medical Spas', 'Meditation Centers', 'Mediterranean' " Men's Clothing"  ' Mexican', 'Middle Eastern', 'Milkshake Bars', 'Minho', 'Mobile Phone Repair', 'Modern European', 'Mongolian', 'Moroccan', 'Music & DVDs', 'Music & Video', 'Music Venues', 'Musical Instruments & Teachers', 'Nicaraguan', 'Nightlife', 'Noodles', 'Nutritionists', 'Oil Change Stations', 'Organic Stores', 'Outdoor Furniture Stores', 'Pakistani', 'Pan Asian', 'Party & Event Planning', 'Party Supplies', 'Pasta Shops', 'Patisserie/Cake Shop', 'Performing Arts', 'Persian/Iranian', 'Personal Chefs', 'Peruvian', 'Pets', 'Piano Bars', 'Pizza', 'Playgrounds', 'Plumbing', 'Poke', 'Polish', 'Pool Halls', 'Pop-Up Restaurants', 'Pop-up Shops', 'Portuguese', 'Poutineries', 'Print Media', 'Professional Services', 'Public Markets', 'Public Services & Government', 'Pubs', 'Ramen', 'Recording & Rehearsal Studios', 'Recreation Centers', 'Religious Organizations', 'Restaurants', 'Reunion', 'Russian', 'Salad', 'Salvadoran', 'Sandwiches', 'Scandinavian', 'Seafood', 'Seafood Markets', 'Shoe Stores', 'Shopping', 'Shopping Centers', 'Singaporean', 'Ski Resorts', 'Ski Schools', 'Skin Care', 'Slovakian', 'Smokehouse', 'Social Clubs', 'Soul Food', 'Soup', 'South African', 'Southern', 'Spanish', 'Special Education', 'Specialty Food', 'Specialty Schools', 'Sporting Goods', 'Sports Bars', 'Sports Clubs', 'Sports Wear', 'Squash', 'Sri Lankan', 'Steakhouses', 'Street Vendors', 'Strip Clubs', 'Summer Camps', 'Supernatural Readings', 'Supper Clubs', 'Sushi Bars', 'Swimming Pools', 'Swiss Food', 'Syrian', 'Szechuan', 'Tabletop Games', 'Tacos', 'Taiwanese', 'Tapas Bars', 'Tapas/Small Plates', 'Tea Rooms', 'Team Building Activities', 'Tempura', 'Tennis', 'Teppanyaki', 'Tex-Mex', 'Thai', 'Themed Cafes', 'Tiki Bars', 'Tobacco Shops', 'Tonkatsu', 'Tours', 'Toy Stores', 'Trainers', 'Trinidadian', 'Turkish', 'Udon', 'Ukrainian', 'Vegan', 'Vegetarian', 'Venezuelan', 'Venues & Event Spaces', 'Videos & Video Game Rental', 'Vietnamese', 'Vinyl Records', 'Vitamins & Supplements', 'Waffles', 'Walking Tours', 'Waxing', 'Wedding Planning', 'Whiskey Bars', 'Wholesale Stores', 'Wigs', 'Wine & Spirits', 'Wine Bars', 'Wineries' " Women's Clothing" ' Wraps'  'Accessories', 'Active Life', 'Adult Entertainment', 'Afghan', 'African'  'Airports', 'American (New)', 'American (Traditional)', 'Animal Shelters'  'Appliances', 'Arabian', 'Arcades', 'Argentine', 'Art Galleries'  'Art Schools', 'Arts & Crafts', 'Arts & Entertainment', 'Asian Fusion'  'Australian', 'Automotive', 'Bagels', 'Bakeries', 'Bangladeshi', 'Barbeque'  'Bars', 'Bartenders', 'Beauty & Spas', 'Bed & Breakfast', 'Beer', 'Beer Bar'  'Belgian', 'Bistros', 'Books', 'Brasseries', 'Brazilian', 'Breakfast & Brunch'  'Breweries', 'Brewpubs', 'British', 'Bubble Tea', 'Buffets', 'Burgers'  'Burmese', 'Butcher', 'Cafes', 'Cajun/Creole', 'Cambodian', 'Canadian (New)'  'Cantonese', 'Caribbean', 'Caterers', 'Cheese Shops', 'Cheesesteaks'  'Chicken Shop', 'Chicken Wings', 'Chinese', 'Chocolatiers & Shops'  'Cideries', 'Cocktail Bars', 'Coffee & Tea', 'Coffee & Tea Supplies'  'Coffee Roasteries', 'Colombian', 'Comfort Food'  'Community Service/Non-Profit', 'Convenience Stores', 'Creperies'  'Custom Cakes', 'Czech', 'Czech/Slovakian', 'Dance Clubs', 'Delicatessen'  'Delis', 'Desserts', 'Dim Sum', 'Diners', 'Dinner Theater', 'Dive Bars'  'Do-It-Yourself Food', 'Donairs', 'Donuts', 'Education', 'Egyptian'  'Escape Games', 'Ethical Grocery', 'Ethiopian', 'Ethnic Food'  'Event Planning & Services', 'Falafel', 'Farmers Market', 'Fashion'  'Fast Food', 'Filipino', 'Fish & Chips', 'Food', 'Food Court'  'Food Delivery Services', 'Food Stands', 'Food Trucks', 'French'  'Furniture Stores', 'Gastropubs', 'Gay Bars', 'Gelato', 'German'  'Gluten-Free', 'Golf', 'Greek', 'Grocery', 'Hair Salons', 'Hakka', 'Halal'  'Hawaiian', 'Health Markets', 'Himalayan/Nepalese', 'Hobby Shops'  'Home & Garden', 'Home Decor', 'Home Services', 'Hong Kong Style Cafe'  'Hookah Bars', 'Hot Dogs', 'Hot Pot', 'Hotels', 'Hotels & Travel', 'Hungarian'  'Iberian', 'Ice Cream & Frozen Yogurt', 'Imported Food', 'Indian'  'Indonesian', 'Interior Design', 'International', 'International Grocery'  'Internet Cafes', 'Irish', 'Irish Pub', 'Italian', 'Japanese', 'Jazz & Blues'  'Juice Bars & Smoothies', 'Karaoke', 'Kebab', 'Korean', 'Kosher', 'Laotian'  'Laser Hair Removal', 'Latin American', 'Lebanese', 'Live/Raw Food'  'Local Flavor', 'Local Services', 'Lounges', 'Macarons', 'Malaysian'  'Mattresses', 'Mauritius', 'Meat Shops', 'Mediterranean', 'Mexican'  'Middle Eastern', 'Modern European', 'Moroccan', 'Mosques', 'Music Venues'  'Nail Salons', 'Nightlife', 'Noodles', 'Nurseries & Gardening'  'Office Equipment', 'Organic Stores', 'Pakistani', 'Pan Asian'  'Party & Event Planning', 'Patisserie/Cake Shop', 'Performing Arts'  'Persian/Iranian', 'Peruvian', 'Pizza', 'Poke', 'Polish', 'Pool Halls'  'Pop-Up Restaurants', 'Portuguese', 'Poutineries', 'Professional Services'  'Pubs', 'Ramen', 'Recreation Centers', 'Religious Organizations', 'Russian'  'Salad', 'Salvadoran', 'Sandwiches', 'Scottish', 'Seafood', 'Seafood Markets'  'Shanghainese', 'Shaved Ice', 'Shaved Snow', 'Shopping', 'Shopping Centers'  'Singaporean', 'Slovakian', 'Smokehouse', 'Social Clubs', 'Soul Food', 'Soup'  'South African', 'Southern', 'Spanish', 'Speakeasies', 'Specialty Food'  'Sports Bars', 'Sri Lankan', 'Steakhouses', 'Street Vendors', 'Sushi Bars'  'Swiss Food', 'Taiwanese', 'Tapas Bars', 'Tapas/Small Plates', 'Tea Rooms'  'Tex-Mex', 'Thai', 'Themed Cafes', 'Trainers', 'Turkish', 'Ukrainian', 'Used Bookstore', 'Vape Shops', 'Vegan', 'Vegetarian', 'Venezuelan'  'Venues & Event Spaces', 'Video Game Stores', 'Vietnamese', 'Waffles'  'Wedding Planning', 'Wigs', 'Wine Bars', 'Wine Tours', 'Wineries']])]