In [1]:
# Import Dependencies
import pandas as pd
import numpy as np

In [2]:
# Read Yelp's Business JSON dataset using Pandas
yelp_business_data_path = "static/assets/data/yelp_academic_dataset_business.json"
yelp_business_data = pd.read_json(yelp_business_data_path, lines=True)

In [3]:
# Read Yelp's category CSV dataset using Pandas
yelp_categories_path = "static/assets/data/yelp_categories.csv"
yelp_categories = pd.read_csv(yelp_categories_path)

In [4]:
# Verify business data
print(yelp_business_data.shape)
yelp_business_data.head()

(209393, 14)


Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours
0,f9NumwFMBDn751xgFiRbNA,The Range At Lake Norman,10913 Bailey Rd,Cornelius,NC,28031,35.462724,-80.852612,3.5,36,1,"{'BusinessAcceptsCreditCards': 'True', 'BikePa...","Active Life, Gun/Rifle Ranges, Guns & Ammo, Sh...","{'Monday': '10:0-18:0', 'Tuesday': '11:0-20:0'..."
1,Yzvjg0SayhoZgCljUJRF9Q,"Carlos Santo, NMD","8880 E Via Linda, Ste 107",Scottsdale,AZ,85258,33.569404,-111.890264,5.0,4,1,"{'GoodForKids': 'True', 'ByAppointmentOnly': '...","Health & Medical, Fitness & Instruction, Yoga,...",
2,XNoUzKckATkOD1hP6vghZg,Felinus,3554 Rue Notre-Dame O,Montreal,QC,H4C 1P4,45.479984,-73.58007,5.0,5,1,,"Pets, Pet Services, Pet Groomers",
3,6OAZjbxqM5ol29BuHsil3w,Nevada House of Hose,1015 Sharp Cir,North Las Vegas,NV,89030,36.219728,-115.127725,2.5,3,0,"{'BusinessAcceptsCreditCards': 'True', 'ByAppo...","Hardware Stores, Home Services, Building Suppl...","{'Monday': '7:0-16:0', 'Tuesday': '7:0-16:0', ..."
4,51M2Kk903DFYI6gnB5I6SQ,USE MY GUY SERVICES LLC,4827 E Downing Cir,Mesa,AZ,85205,33.428065,-111.726648,4.5,26,1,"{'BusinessAcceptsCreditCards': 'True', 'ByAppo...","Home Services, Plumbing, Electricians, Handyma...","{'Monday': '0:0-0:0', 'Tuesday': '9:0-16:0', '..."


In [5]:
# Verify category data
print(yelp_categories.shape)
yelp_categories.head()

(22, 1)


Unnamed: 0,categories
0,Active Life
1,Arts & Entertainment
2,Automotive
3,Beauty & Spas
4,Education


In [6]:
# Verify number of businesses in the dataset that are identified as open ("1") and closed ("0") for business
yelp_business_data.is_open.value_counts()

1    168903
0     40490
Name: is_open, dtype: int64

In [7]:
# Filter out businesses that are closed; save only the ones that are open into a new DataFrame
yelp_business_data_new = yelp_business_data[yelp_business_data['is_open']==1]
# Verify the open businesses
print(yelp_business_data_new.shape)
yelp_business_data_new.head()

(168903, 14)


Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours
0,f9NumwFMBDn751xgFiRbNA,The Range At Lake Norman,10913 Bailey Rd,Cornelius,NC,28031,35.462724,-80.852612,3.5,36,1,"{'BusinessAcceptsCreditCards': 'True', 'BikePa...","Active Life, Gun/Rifle Ranges, Guns & Ammo, Sh...","{'Monday': '10:0-18:0', 'Tuesday': '11:0-20:0'..."
1,Yzvjg0SayhoZgCljUJRF9Q,"Carlos Santo, NMD","8880 E Via Linda, Ste 107",Scottsdale,AZ,85258,33.569404,-111.890264,5.0,4,1,"{'GoodForKids': 'True', 'ByAppointmentOnly': '...","Health & Medical, Fitness & Instruction, Yoga,...",
2,XNoUzKckATkOD1hP6vghZg,Felinus,3554 Rue Notre-Dame O,Montreal,QC,H4C 1P4,45.479984,-73.58007,5.0,5,1,,"Pets, Pet Services, Pet Groomers",
4,51M2Kk903DFYI6gnB5I6SQ,USE MY GUY SERVICES LLC,4827 E Downing Cir,Mesa,AZ,85205,33.428065,-111.726648,4.5,26,1,"{'BusinessAcceptsCreditCards': 'True', 'ByAppo...","Home Services, Plumbing, Electricians, Handyma...","{'Monday': '0:0-0:0', 'Tuesday': '9:0-16:0', '..."
5,cKyLV5oWZJ2NudWgqs8VZw,Oasis Auto Center - Gilbert,"1720 W Elliot Rd, Ste 105",Gilbert,AZ,85233,33.350399,-111.827142,4.5,38,1,{'BusinessAcceptsCreditCards': 'True'},"Auto Repair, Automotive, Oil Change Stations, ...","{'Monday': '7:0-18:0', 'Tuesday': '7:0-18:0', ..."


In [8]:
# Convert/Save the DataFrame of Yelp businessess to a new JSON records file
# orientation = "records"
# yelp_business_data_new.to_json(f"static/assets/data/yelp_converted_business_dataset_{orientation}.json", orient=orientation)

In [9]:
# Verify the number of businesses in the dataset by State
yelp_business_data_new.state.value_counts()

AZ     49415
NV     31180
ON     28433
OH     13762
NC     13433
PA     10206
QC      8215
AB      7038
WI      4433
IL      1593
SC      1113
NY        22
CA        21
TX         5
WA         4
GA         3
AL         3
FL         2
CO         2
VT         2
UT         2
MI         2
NE         2
BC         2
CT         1
HPL        1
MB         1
YT         1
DUR        1
OR         1
DOW        1
MO         1
HI         1
VA         1
Name: state, dtype: int64

In [10]:
# Verify the number of businesses in the dataset by City
yelp_business_data_new.city.value_counts().head(50)

Las Vegas          25043
Phoenix            16305
Toronto            14962
Charlotte           8500
Scottsdale          7341
Calgary             6772
Pittsburgh          6107
Mesa                5482
MontrÃ©al           5367
Henderson           4293
Tempe               3688
Chandler            3671
Gilbert             3171
Glendale            3165
Cleveland           3082
Mississauga         3001
Madison             2888
Peoria              1778
Markham             1510
North Las Vegas     1416
North York          1084
Surprise            1036
Scarborough         1030
Champaign           1018
Brampton            1007
Concord              975
Richmond Hill        868
Vaughan              862
Goodyear             812
Etobicoke            714
Matthews             656
Avondale             616
Huntersville         588
Fort Mill            566
Gastonia             493
Mentor               485
Lakewood             445
Akron                428
Laval                413
Cornelius            402


In [11]:
# Filter on businesses that are in Toronto ON, and have category data, dataframe was already filtered on being open
yelp_toronto_data = yelp_business_data_new[yelp_business_data_new['categories'].notnull() & (yelp_business_data_new['city'] == "Toronto")& (yelp_business_data_new['state'] == "ON")]
#yelp_toronto_data.describe()
yelp_toronto_data.head()

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours
9,EosRKXIGeSWFYWwpkbhNnA,Xtreme Couture,700 Kipling Avenue Etobicoke,Toronto,ON,M8Z 5G3,43.624539,-79.529108,3.0,16,1,"{'GoodForKids': 'True', 'ByAppointmentOnly': '...","Martial Arts, Gyms, Fitness & Instruction, Act...","{'Monday': '5:30-23:0', 'Tuesday': '5:30-23:0'..."
26,1wWneWD_E1pBIyVpdHMaQg,Air Jordan Store,306 Yonge Street,Toronto,ON,M5B 1R4,43.656542,-79.381308,4.0,9,1,"{'RestaurantsPriceRange2': '3', 'BikeParking':...","Shopping, Shoe Stores, Fashion","{'Monday': '10:0-21:0', 'Tuesday': '10:0-21:0'..."
52,9JCjKd6eFXsAMVwouTh_4Q,Bath and Body Works,1900 Eglintion Ave E,Toronto,ON,M1L 2L9,43.727189,-79.293008,3.5,7,1,"{'RestaurantsPriceRange2': '2', 'ByAppointment...","Cosmetics & Beauty Supply, Shopping, Beauty & ...",
73,0QjROMVW9ACKjhSEfHqNCQ,Mi Mi Restaurant,688 Gerrard Street E,Toronto,ON,M4M 1Y3,43.666376,-79.348773,4.0,116,1,"{'RestaurantsTakeOut': 'True', 'Alcohol': 'u'b...","Vietnamese, Restaurants","{'Monday': '11:0-22:0', 'Tuesday': '11:0-22:0'..."
74,OT-8IUWo_2M-rHddjzz_Cg,Equipment Sales and Long Term Rentals - Redlin...,"510 Coronation Dr. Unit #18, Unit 18",Toronto,ON,M1E 4X6,43.765266,-79.166977,1.5,3,1,{'WheelchairAccessible': 'True'},"Event Planning & Services, Photo Booth Rentals...",{'Monday': '9:0-17:0'}


In [12]:
# Export DF to CSV
#yelp_toronto_data.to_csv('toronto_data.csv')

In [13]:
# Read Yelp's category CSV dataset using Pandas
yelp_toronto_path = "static/assets/data/toronto_data.csv"
yelp_toronto = pd.read_csv(yelp_toronto_path)
yelp_toronto.head()

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,parent,hours
0,EosRKXIGeSWFYWwpkbhNnA,Xtreme Couture,700 Kipling Avenue Etobicoke,Toronto,ON,M8Z 5G3,43.624539,-79.529108,3.0,16,1,"{'GoodForKids': 'True', 'ByAppointmentOnly': '...","Martial Arts, Gyms, Fitness & Instruction, Act...",Active Life,"{'Monday': '5:30-23:0', 'Tuesday': '5:30-23:0'..."
1,1wWneWD_E1pBIyVpdHMaQg,Air Jordan Store,306 Yonge Street,Toronto,ON,M5B 1R4,43.656542,-79.381308,4.0,9,1,"{'RestaurantsPriceRange2': '3', 'BikeParking':...","Shopping, Shoe Stores, Fashion",Shopping,"{'Monday': '10:0-21:0', 'Tuesday': '10:0-21:0'..."
2,9JCjKd6eFXsAMVwouTh_4Q,Bath and Body Works,1900 Eglintion Ave E,Toronto,ON,M1L 2L9,43.727189,-79.293008,3.5,7,1,"{'RestaurantsPriceRange2': '2', 'ByAppointment...","Cosmetics & Beauty Supply, Shopping, Beauty & ...",Shopping,
3,0QjROMVW9ACKjhSEfHqNCQ,Mi Mi Restaurant,688 Gerrard Street E,Toronto,ON,M4M 1Y3,43.666376,-79.348773,4.0,116,1,"{'RestaurantsTakeOut': 'True', 'Alcohol': ""u'b...","Vietnamese, Restaurants",Restaurants,"{'Monday': '11:0-22:0', 'Tuesday': '11:0-22:0'..."
4,OT-8IUWo_2M-rHddjzz_Cg,Equipment Sales and Long Term Rentals - Redlin...,"510 Coronation Dr. Unit #18, Unit 18",Toronto,ON,M1E 4X6,43.765266,-79.166977,1.5,3,1,{'WheelchairAccessible': 'True'},"Event Planning & Services, Photo Booth Rentals...",Event Planning & Services,{'Monday': '9:0-17:0'}


In [14]:
# Convert/Save the DataFrame of Yelp Toronto businessess to a new JSON records file
orientation = "records"
yelp_toronto.to_json(f"static/assets/data/yelp_toronto_business_dataset_{orientation}.json", orient=orientation)

In [17]:
#Create group by data frame for parent categories and compute business count, total review count, and avg rating by category 
d = {'is_open':'businesses_count', 'review_count':'total_reviews','stars':'average_review'}
yelp_toronto_summary = yelp_toronto.groupby('parent',as_index=False).agg({'is_open':'sum', 'review_count':'sum','stars':'mean'}).rename(columns=d)
yelp_toronto_summary = yelp_toronto_summary.round({"average_review":1}) 
yelp_toronto_summary
# Convert/Save the DataFrame of Yelp Toronto businessess summary to a new JSON records file
orientation = "records"
yelp_toronto_summary.to_json(f"static/assets/data/yelp_toronto_business_summary_{orientation}.json", orient=orientation)

In [18]:
#Create group by data frame for star/rating categories and compute business count by star and parent category
e = {'is_open':'businesses_count'}
yelp_toronto_summary1 = yelp_toronto.groupby(['stars','parent'], as_index=False).agg({'is_open':'sum'}).rename(columns=e) 
yelp_toronto_summary1
# Convert/Save the DataFrame of Yelp Toronto businessess summary to a new JSON records file
orientation = "records"
yelp_toronto_summary1.to_json(f"static/assets/data/yelp_toronto_stars_summary_{orientation}.json", orient=orientation)

In [19]:
# Read Yelp's Tips JSON dataset using Pandas
yelp_tips_data_path = "static/assets/data/yelp_academic_dataset_tip.json"
yelp_tips_data = pd.read_json(yelp_tips_data_path, lines=True)

In [20]:
# Verify tips data
print(yelp_tips_data.shape)
yelp_tips_data.head()

(1320761, 5)


Unnamed: 0,user_id,business_id,text,date,compliment_count
0,hf27xTME3EiCp6NL6VtWZQ,UYX5zL_Xj9WEc_Wp-FrqHw,Here for a quick mtg,2013-11-26 18:20:08,0
1,uEvusDwoSymbJJ0auR3muQ,Ch3HkwQYv1YKw_FO06vBWA,Cucumber strawberry refresher,2014-06-15 22:26:45,0
2,AY-laIws3S7YXNl_f_D6rQ,rDoT-MgxGRiYqCmi0bG10g,Very nice good service good food,2016-07-18 22:03:42,0
3,Ue_7yUlkEbX4AhnYdUfL7g,OHXnDV01gLokiX1ELaQufA,It's a small place. The staff is friendly.,2014-06-06 01:10:34,0
4,LltbT_fUMqZ-ZJP-vJ84IQ,GMrwDXRlAZU2zj5nH6l4vQ,"8 sandwiches, $24 total...what a bargain!!! An...",2011-04-08 18:12:01,0


In [21]:
# Verify user compliment counts in the tips dataset
yelp_tips_data.compliment_count.value_counts()

0     1302089
1       16914
2        1448
3         228
4          50
5          16
6           8
7           3
15          1
12          1
11          1
9           1
8           1
Name: compliment_count, dtype: int64

In [22]:
# Merge of business and tips dataframes for Toronto
tips_business_merge = pd.merge(yelp_toronto, yelp_tips_data, how ='inner', on ='business_id')
tips_business_merge.head()

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,parent,hours,user_id,text,date,compliment_count
0,EosRKXIGeSWFYWwpkbhNnA,Xtreme Couture,700 Kipling Avenue Etobicoke,Toronto,ON,M8Z 5G3,43.624539,-79.529108,3.0,16,1,"{'GoodForKids': 'True', 'ByAppointmentOnly': '...","Martial Arts, Gyms, Fitness & Instruction, Act...",Active Life,"{'Monday': '5:30-23:0', 'Tuesday': '5:30-23:0'...",WeVkkF5L39888IPPlRhNpg,Monitor your bank accounts regularly. They com...,2016-06-02 14:38:44,1
1,1wWneWD_E1pBIyVpdHMaQg,Air Jordan Store,306 Yonge Street,Toronto,ON,M5B 1R4,43.656542,-79.381308,4.0,9,1,"{'RestaurantsPriceRange2': '3', 'BikeParking':...","Shopping, Shoe Stores, Fashion",Shopping,"{'Monday': '10:0-21:0', 'Tuesday': '10:0-21:0'...",rGd8YUtvhSdMm8-9cMdQ4Q,Only one in Canada! Customize your purchases!,2017-06-02 00:23:05,0
2,9JCjKd6eFXsAMVwouTh_4Q,Bath and Body Works,1900 Eglintion Ave E,Toronto,ON,M1L 2L9,43.727189,-79.293008,3.5,7,1,"{'RestaurantsPriceRange2': '2', 'ByAppointment...","Cosmetics & Beauty Supply, Shopping, Beauty & ...",Shopping,,pn_flI3EBNugBEYFp9okxQ,Stock up when they have their buy 3 get 3 free...,2017-10-25 22:01:44,0
3,9JCjKd6eFXsAMVwouTh_4Q,Bath and Body Works,1900 Eglintion Ave E,Toronto,ON,M1L 2L9,43.727189,-79.293008,3.5,7,1,"{'RestaurantsPriceRange2': '2', 'ByAppointment...","Cosmetics & Beauty Supply, Shopping, Beauty & ...",Shopping,,eluOwBo91IiRcdfy4My_CQ,Giant semi annual sale until Jan 17!!!,2015-01-10 22:40:07,0
4,0QjROMVW9ACKjhSEfHqNCQ,Mi Mi Restaurant,688 Gerrard Street E,Toronto,ON,M4M 1Y3,43.666376,-79.348773,4.0,116,1,"{'RestaurantsTakeOut': 'True', 'Alcohol': ""u'b...","Vietnamese, Restaurants",Restaurants,"{'Monday': '11:0-22:0', 'Tuesday': '11:0-22:0'...",EUicxwHSRHFyJaO6gqbPGA,A new adventure for J &I today,2011-07-22 17:00:11,0


In [23]:
tips_business_merge
tips_business_merge_min = tips_business_merge.drop(["name","address","city","state","latitude","longitude","stars","review_count","is_open","attributes","categories","hours"], axis=1)
tips_business_merge_min

Unnamed: 0,business_id,postal_code,parent,user_id,text,date,compliment_count
0,EosRKXIGeSWFYWwpkbhNnA,M8Z 5G3,Active Life,WeVkkF5L39888IPPlRhNpg,Monitor your bank accounts regularly. They com...,2016-06-02 14:38:44,1
1,1wWneWD_E1pBIyVpdHMaQg,M5B 1R4,Shopping,rGd8YUtvhSdMm8-9cMdQ4Q,Only one in Canada! Customize your purchases!,2017-06-02 00:23:05,0
2,9JCjKd6eFXsAMVwouTh_4Q,M1L 2L9,Shopping,pn_flI3EBNugBEYFp9okxQ,Stock up when they have their buy 3 get 3 free...,2017-10-25 22:01:44,0
3,9JCjKd6eFXsAMVwouTh_4Q,M1L 2L9,Shopping,eluOwBo91IiRcdfy4My_CQ,Giant semi annual sale until Jan 17!!!,2015-01-10 22:40:07,0
4,0QjROMVW9ACKjhSEfHqNCQ,M4M 1Y3,Restaurants,EUicxwHSRHFyJaO6gqbPGA,A new adventure for J &I today,2011-07-22 17:00:11,0
...,...,...,...,...,...,...,...
58815,BtTCTHwoIfr3fBQjaOkpIw,M5E 1B8,Food,OKTF6Jj7SkHG1hLxT1KeRw,They have a clover machine here. Really brings...,2014-07-19 15:25:42,0
58816,BtTCTHwoIfr3fBQjaOkpIw,M5E 1B8,Food,p7Yi0vdwzWpW_wCSRmoU9w,Power doors do not operate.,2016-08-20 19:14:25,0
58817,BtTCTHwoIfr3fBQjaOkpIw,M5E 1B8,Food,IIVTHna4vd7EOJJTKaS5uw,Soy green tea latte classic has a milder conco...,2013-06-15 23:55:06,0
58818,BtTCTHwoIfr3fBQjaOkpIw,M5E 1B8,Food,wKX7dU0pcF0nQ9m2V26M-g,Nice & friendly staff,2014-04-20 15:39:04,0


In [24]:
# Convert/Save the DataFrame of Yelp tips to a new JSON records file
orientation = "records"
tips_business_merge_min.to_json(f"static/assets/data/yelp_toronto_tips_business_dataset_{orientation}.json", orient=orientation)

In [25]:
# Read Yelp's checkin JSON dataset using Pandas
yelp_checkin_data_path = "static/assets/data/yelp_academic_dataset_checkin.json"
yelp_checkin_data = pd.read_json(yelp_checkin_data_path, lines=True)

In [26]:
# Verify checkin data
print(yelp_checkin_data.shape)
yelp_checkin_data.head()

(175187, 2)


Unnamed: 0,business_id,date
0,--1UhMGODdWsrMastO9DZw,"2016-04-26 19:49:16, 2016-08-30 18:36:57, 2016..."
1,--6MefnULPED_I942VcFNA,"2011-06-04 18:22:23, 2011-07-23 23:51:33, 2012..."
2,--7zmmkVg-IMGaXbuVd0SQ,"2014-12-29 19:25:50, 2015-01-17 01:49:14, 2015..."
3,--8LPVSo5i0Oo61X01sV9A,2016-07-08 16:43:30
4,--9QQLMTbFzLJ_oT-ON3Xw,"2010-06-26 17:39:07, 2010-08-01 20:06:21, 2010..."


In [27]:
# Merge of business and checkin dataframes for Toronto
checkins_business_merge = pd.merge(yelp_toronto, yelp_checkin_data, how ='inner', on ='business_id')
checkins_business_merge.head()

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,parent,hours,date
0,EosRKXIGeSWFYWwpkbhNnA,Xtreme Couture,700 Kipling Avenue Etobicoke,Toronto,ON,M8Z 5G3,43.624539,-79.529108,3.0,16,1,"{'GoodForKids': 'True', 'ByAppointmentOnly': '...","Martial Arts, Gyms, Fitness & Instruction, Act...",Active Life,"{'Monday': '5:30-23:0', 'Tuesday': '5:30-23:0'...","2012-06-12 19:19:12, 2013-02-11 14:43:02, 2013..."
1,1wWneWD_E1pBIyVpdHMaQg,Air Jordan Store,306 Yonge Street,Toronto,ON,M5B 1R4,43.656542,-79.381308,4.0,9,1,"{'RestaurantsPriceRange2': '3', 'BikeParking':...","Shopping, Shoe Stores, Fashion",Shopping,"{'Monday': '10:0-21:0', 'Tuesday': '10:0-21:0'...","2017-05-28 23:04:12, 2017-06-02 00:22:09, 2017..."
2,9JCjKd6eFXsAMVwouTh_4Q,Bath and Body Works,1900 Eglintion Ave E,Toronto,ON,M1L 2L9,43.727189,-79.293008,3.5,7,1,"{'RestaurantsPriceRange2': '2', 'ByAppointment...","Cosmetics & Beauty Supply, Shopping, Beauty & ...",Shopping,,"2011-11-13 18:30:52, 2011-12-27 22:54:14, 2012..."
3,0QjROMVW9ACKjhSEfHqNCQ,Mi Mi Restaurant,688 Gerrard Street E,Toronto,ON,M4M 1Y3,43.666376,-79.348773,4.0,116,1,"{'RestaurantsTakeOut': 'True', 'Alcohol': ""u'b...","Vietnamese, Restaurants",Restaurants,"{'Monday': '11:0-22:0', 'Tuesday': '11:0-22:0'...","2010-03-16 18:05:19, 2010-06-06 16:53:37, 2010..."
4,umDBj-8WUNkNBODa6P0G-Q,Natural Scents,143 Sheppard Avenue W,Toronto,ON,M2N 1M7,43.759675,-79.418122,4.5,8,1,"{'BikeParking': 'True', 'BusinessParking': ""{'...","Health & Medical, Massage Therapy, Eyelash Ser...",Health & Medical,"{'Monday': '0:0-0:0', 'Tuesday': '10:0-20:0', ...","2018-12-21 14:22:13, 2019-02-16 18:07:16, 2019..."


In [28]:
# Convert/Save the DataFrame of Yelp checkins to a new JSON records file
orientation = "records"
checkins_business_merge.to_json(f"static/assets/data/yelp_toronto_checkin_business_dataset_{orientation}.json", orient=orientation)