In [1]:
# Importing dependencies
import os
import pandas as pd
from pandas.io.json import json_normalize

In [2]:
# Path to JSON
businessJSON = os.path.join('..', 'sourceData', 'business.json')

In [3]:
# Creating pd dataframe
business = pd.read_json(businessJSON, lines=True)

In [4]:
# Select only the businesses in Ontario
business_on = business.loc[business['state'] == 'ON']

In [5]:
# Dropping any rows with blank values in these categories
business_on_clean = business_on.dropna(subset=['name', 'address', 'postal_code', 'city', 'state', 'latitude', 'longitude', 'attributes',
                                                          'categories', 'hours']).reset_index(drop=True)

In [6]:
# Selecting all of the restaurants
restaurant = business_on_clean[business_on_clean['categories'].str.contains('Restaurants')].reset_index(drop=True)

In [7]:
# Regex to fix spelling mistakes 
restaurant = restaurant.replace({'city': {'^AGINCOURT$': 'Agincourt',
                                                            '^Bradford West Gwillimbury$': 'Bradford',
                                                            '^East Ajax$': 'Ajax',
                                                            '^Caledon.{,8}$': 'Caledon',
                                                            '^East Gwil{1,2}imbury$': 'East Gwillimbury',
                                                            '(?i)^.*icoke$': 'Etobicoke',
                                                            '^.{,9}Toro?nto.{,9}$': 'Toronto',
                                                            'Malton': 'Mississauga',
                                                            '^.{,5}Missis{1,2}a?ua?g.{1,2}$': 'Mississauga',
                                                            '^Regional Municipality of York$': 'North York',
                                                            '(?i)^North.{0,2}York$': 'North York',
                                                            '^York Regional Municipality$': 'York',
                                                            '^Willowdale$': 'North York',
                                                            '^North of Brampton$': 'Brampton',
                                                            '(?i)^Oak.?ridges$': 'Oak Ridges',
                                                            '^oakville$': 'Oakville',
                                                            '(?i)^Richmond?.?Hill?$': 'Richmond Hill',
                                                            '^.{,8}Scar.?bo?rough$': 'Scarborough',
                                                            '^.{,11}Stouffville$': 'Stouffville',
                                                            '(?i)^Thornhil{,2}$': 'Thornhill',
                                                            '^.*Vaugh.{,3}$': 'Vaughan',
                                                            '^Wh.?i.?by$': 'Whitby'}}, regex=True)

In [8]:
restaurant.head()

Unnamed: 0,address,attributes,business_id,categories,city,hours,is_open,latitude,longitude,name,postal_code,review_count,stars,state
0,30 Eglinton Avenue W,"{'RestaurantsReservations': 'True', 'GoodForMe...",QXAEGFB4oINsVuTFxEYKFQ,"Specialty Food, Restaurants, Dim Sum, Imported...",Mississauga,"{'Monday': '9:0-0:0', 'Tuesday': '9:0-0:0', 'W...",1,43.605499,-79.652289,Emerald Chinese Restaurant,L5R 3E7,128,2.5,ON
1,1170 Queen Street W,"{'WiFi': 'u'no'', 'BikeParking': 'True', 'Rest...",NDuUMJfrWk52RA-H-OtrpA,"Juice Bars & Smoothies, Food, Restaurants, Fas...",Toronto,"{'Monday': '8:0-21:0', 'Tuesday': '8:0-21:0', ...",1,43.642889,-79.425429,Bolt Fresh Bar,M6J 1J5,57,3.0,ON
2,1051 Bloor Street W,"{'BusinessParking': '{'garage': False, 'street...",SP_YXIEwkFPPl_9anCYmpQ,"Restaurants, Nightlife, Breakfast & Brunch, Ve...",Toronto,"{'Tuesday': '9:0-18:0', 'Wednesday': '9:0-18:0...",0,43.660494,-79.432099,The Steady Cafe & Bar,M6H 1M4,29,3.5,ON
3,582 College Street,"{'Alcohol': 'u'full_bar'', 'Caters': 'False', ...",mlHC2XcU9Bows6cnYEmRgg,"Restaurants, Breakfast & Brunch, Bars, Modern ...",Toronto,"{'Thursday': '18:0-2:0', 'Friday': '18:0-2:0',...",0,43.65542,-79.413352,Mad Crush Wine Bar,M6G 1B3,9,4.0,ON
4,3085 Hurontario Street,"{'Ambience': '{'romantic': False, 'intimate': ...",9UTpmQ4OhX5jNFUIu7dPPQ,"Restaurants, Korean",Mississauga,"{'Monday': '11:0-22:0', 'Tuesday': '11:0-22:0'...",1,43.582262,-79.618858,Buk Chang Dong Soon Tofu,L5A,103,4.0,ON


In [9]:
# Only taking these columns
restaurant = restaurant.loc[:, ['name', 'address', 'postal_code', 'city', 'state', 'latitude', 'longitude','categories', 'stars', 'hours', 'attributes']]
restaurant.columns = ['Name', 'Address', 'Postal Code', 'City', 'Province', 'Latitude', 'Longitude', 'Categories', 'Stars', 'Hours', 'Attributes']

In [10]:
restaurant.head()

Unnamed: 0,Name,Address,Postal Code,City,Province,Latitude,Longitude,Categories,Stars,Hours,Attributes
0,Emerald Chinese Restaurant,30 Eglinton Avenue W,L5R 3E7,Mississauga,ON,43.605499,-79.652289,"Specialty Food, Restaurants, Dim Sum, Imported...",2.5,"{'Monday': '9:0-0:0', 'Tuesday': '9:0-0:0', 'W...","{'RestaurantsReservations': 'True', 'GoodForMe..."
1,Bolt Fresh Bar,1170 Queen Street W,M6J 1J5,Toronto,ON,43.642889,-79.425429,"Juice Bars & Smoothies, Food, Restaurants, Fas...",3.0,"{'Monday': '8:0-21:0', 'Tuesday': '8:0-21:0', ...","{'WiFi': 'u'no'', 'BikeParking': 'True', 'Rest..."
2,The Steady Cafe & Bar,1051 Bloor Street W,M6H 1M4,Toronto,ON,43.660494,-79.432099,"Restaurants, Nightlife, Breakfast & Brunch, Ve...",3.5,"{'Tuesday': '9:0-18:0', 'Wednesday': '9:0-18:0...","{'BusinessParking': '{'garage': False, 'street..."
3,Mad Crush Wine Bar,582 College Street,M6G 1B3,Toronto,ON,43.65542,-79.413352,"Restaurants, Breakfast & Brunch, Bars, Modern ...",4.0,"{'Thursday': '18:0-2:0', 'Friday': '18:0-2:0',...","{'Alcohol': 'u'full_bar'', 'Caters': 'False', ..."
4,Buk Chang Dong Soon Tofu,3085 Hurontario Street,L5A,Mississauga,ON,43.582262,-79.618858,"Restaurants, Korean",4.0,"{'Monday': '11:0-22:0', 'Tuesday': '11:0-22:0'...","{'Ambience': '{'romantic': False, 'intimate': ..."


In [11]:
hours_raw = pd.DataFrame(json_normalize(data=restaurant['Hours']))

In [16]:
hours_raw.tail()

Unnamed: 0,Friday,Monday,Saturday,Sunday,Thursday,Tuesday,Wednesday
10712,11:0-23:0,0:0-0:0,12:0-23:0,12:0-23:0,11:0-22:0,11:0-22:0,11:0-22:0
10713,11:30-23:0,11:30-22:30,11:30-23:0,11:30-22:30,11:30-23:0,11:30-22:30,11:30-22:30
10714,11:30-23:45,11:30-22:0,12:0-23:45,12:0-22:0,11:30-22:0,,11:30-22:0
10715,11:0-23:0,0:0-0:0,12:0-23:0,12:0-22:0,11:0-23:0,11:0-23:0,11:0-23:0
10716,11:0-22:30,11:0-21:30,11:0-22:30,16:0-21:30,11:0-21:30,11:0-21:30,11:0-21:30


In [15]:
friday = hours_raw['Friday'].str.split(pat='-', expand=True)
print(friday)


           0      1
0        9:0    1:0
1        8:0   21:0
2        9:0    2:0
3       18:0    2:0
4       11:0   22:0
5        8:0   18:0
6       11:0   23:0
7        9:0   19:0
8       10:0   23:0
9        7:0   23:0
10      11:0   22:0
11      12:0   23:0
12      11:0    2:0
13      11:0   22:0
14      7:30   18:0
15     11:30   22:0
16      12:0   22:0
17      11:0   22:0
18      11:0   22:0
19      11:0   17:0
20     10:30    1:0
21      11:0   21:0
22     11:30  22:30
23      11:0    1:0
24       8:0   20:0
25      11:0   21:0
26     11:30   23:0
27       6:0   16:0
28       9:0   21:0
29      11:0    0:0
...      ...    ...
10687   10:0   22:0
10688  11:30   22:0
10689   10:0   19:0
10690  11:30   23:0
10691  11:30   22:0
10692  11:30   21:0
10693   17:0    2:0
10694   11:0    1:0
10695  11:30   23:0
10696   11:0   23:0
10697  17:30   23:0
10698   11:0    0:0
10699   11:0   23:0
10700  11:30   22:0
10701    0:0    0:0
10702   11:0   22:0
10703   12:0   23:0
10704   17:0   23:0
