In [57]:
import pandas as pd
import requests
import re

# Yelp Data

### Extract CSVs into DataFrames

In [58]:
yelp_file = "../Resources/yelp_data.csv"
yelp_df = pd.read_csv(yelp_file)
yelp_df

Unnamed: 0,restaurant_name,yelp_url,restaurant_rating,price_cuisine,address1,address2,restaurant_website
0,Rosie’s Trattoria,https://www.yelp.com/biz/rosie-s-trattoria-ran...,4.5,"$$$Italian, Bars, Venues & Event Spaces",1181 Sussex Tpke,"Randolph, NJ 07869",Find a Table
1,SubUrban Bar & Kitchen,https://www.yelp.com/biz/suburban-bar-and-kitc...,4.0,"$$American (New), Bars, Pizza",500 NJ-10,"Randolph, NJ 07869",sbknj.com
2,4 Seasons Mediterranean Restaurant,https://www.yelp.com/biz/4-seasons-mediterrane...,4.5,"$$Wine Bars, French, Italian",322 S Main St,"Wharton, NJ 07885",4seasonswharton.com
3,Quiet Man Pub,https://www.yelp.com/biz/quiet-man-pub-dover-2...,4.5,"$$Irish Pub, Seafood, Tapas/Small Plates",64 E Mcfarlan St,"Dover, NJ 07801",quietmanpub.com
4,Verona Restaurant,https://www.yelp.com/biz/verona-restaurant-ran...,4.5,$$Italian,1171 Sussex Tpke,"Randolph, NJ 07869",veronarestaurant.com
...,...,...,...,...,...,...,...
233,Big D’s Hot Dogs,https://www.yelp.com/biz/big-ds-hot-dogs-dover...,5.0,"$Food Trucks, American (Traditional), Hot Dogs",Dover Train Station,"Dover, NJ 07801",
234,Luigis Pizza,https://www.yelp.com/biz/luigis-pizza-succasun...,4.0,$Pizza,275 State Rt 10 E,"Succasunna, NJ 07876",
235,Naranja Juice Bar,https://www.yelp.com/biz/naranja-juice-bar-dov...,5.0,"$Juice Bars & Smoothies, Soup, Sandwiches",6B S Warren St,"Dover, NJ 07801",
236,Clean Juice,https://www.yelp.com/biz/clean-juice-morristow...,4.5,"Juice Bars & Smoothies, Acai Bowls, Wraps",68 South St,"Morristown, NJ 07960",cleanjuice.com/locations/morri…


In [59]:
# All columns
yelp_df.columns

Index(['restaurant_name', 'yelp_url', 'restaurant_rating', 'price_cuisine',
       'address1', 'address2', 'restaurant_website'],
      dtype='object')

In [60]:
# restaurant_name check
yelp_df["restaurant_name"].value_counts()


Sandwiches Unlimited Lunch Box           2
South + Pine American Eatery             1
Twist On Thai Cafe                       1
Theodoras Family Restaurant and Pizza    1
Sheimer’s Deli                           1
                                        ..
Rinconcito Peruano                       1
Fresco Mexican                           1
Ashirwad Palace                          1
Aoyama                                   1
Thai Kitchen                             1
Name: restaurant_name, Length: 237, dtype: int64

### Dropping duplicate restaurant_name and address1 value

In [61]:
# clear leading and trailing spaces
yelp_df['restaurant_name'] = yelp_df['restaurant_name'].str.strip()
yelp_df['address1'] = yelp_df['address1'].str.strip()
yelp_df['address2'] = yelp_df['address2'].str.strip()

# drop duplicates for restaurant name and address
yelp_df = yelp_df.drop_duplicates(subset=['restaurant_name', 'address1','address2'])
yelp_df

Unnamed: 0,restaurant_name,yelp_url,restaurant_rating,price_cuisine,address1,address2,restaurant_website
0,Rosie’s Trattoria,https://www.yelp.com/biz/rosie-s-trattoria-ran...,4.5,"$$$Italian, Bars, Venues & Event Spaces",1181 Sussex Tpke,"Randolph, NJ 07869",Find a Table
1,SubUrban Bar & Kitchen,https://www.yelp.com/biz/suburban-bar-and-kitc...,4.0,"$$American (New), Bars, Pizza",500 NJ-10,"Randolph, NJ 07869",sbknj.com
2,4 Seasons Mediterranean Restaurant,https://www.yelp.com/biz/4-seasons-mediterrane...,4.5,"$$Wine Bars, French, Italian",322 S Main St,"Wharton, NJ 07885",4seasonswharton.com
3,Quiet Man Pub,https://www.yelp.com/biz/quiet-man-pub-dover-2...,4.5,"$$Irish Pub, Seafood, Tapas/Small Plates",64 E Mcfarlan St,"Dover, NJ 07801",quietmanpub.com
4,Verona Restaurant,https://www.yelp.com/biz/verona-restaurant-ran...,4.5,$$Italian,1171 Sussex Tpke,"Randolph, NJ 07869",veronarestaurant.com
...,...,...,...,...,...,...,...
233,Big D’s Hot Dogs,https://www.yelp.com/biz/big-ds-hot-dogs-dover...,5.0,"$Food Trucks, American (Traditional), Hot Dogs",Dover Train Station,"Dover, NJ 07801",
234,Luigis Pizza,https://www.yelp.com/biz/luigis-pizza-succasun...,4.0,$Pizza,275 State Rt 10 E,"Succasunna, NJ 07876",
235,Naranja Juice Bar,https://www.yelp.com/biz/naranja-juice-bar-dov...,5.0,"$Juice Bars & Smoothies, Soup, Sandwiches",6B S Warren St,"Dover, NJ 07801",
236,Clean Juice,https://www.yelp.com/biz/clean-juice-morristow...,4.5,"Juice Bars & Smoothies, Acai Bowls, Wraps",68 South St,"Morristown, NJ 07960",cleanjuice.com/locations/morri…


In [6]:
# average rating
average_rating = yelp_df["restaurant_rating"].sum()/yelp_df["restaurant_name"].count()
print("average_rating")
print(average_rating)


average_rating
3.9936974789915967


In [7]:
# fill empty cells
yelp_df=yelp_df.fillna("")
yelp_df

Unnamed: 0,restaurant_name,yelp_url,restaurant_rating,price_cuisine,address1,address2,restaurant_website
0,Rosie’s Trattoria,https://www.yelp.com/biz/rosie-s-trattoria-ran...,4.5,"$$$Italian, Bars, Venues & Event Spaces",1181 Sussex Tpke,"Randolph, NJ 07869",Find a Table
1,SubUrban Bar & Kitchen,https://www.yelp.com/biz/suburban-bar-and-kitc...,4.0,"$$American (New), Bars, Pizza",500 NJ-10,"Randolph, NJ 07869",sbknj.com
2,4 Seasons Mediterranean Restaurant,https://www.yelp.com/biz/4-seasons-mediterrane...,4.5,"$$Wine Bars, French, Italian",322 S Main St,"Wharton, NJ 07885",4seasonswharton.com
3,Quiet Man Pub,https://www.yelp.com/biz/quiet-man-pub-dover-2...,4.5,"$$Irish Pub, Seafood, Tapas/Small Plates",64 E Mcfarlan St,"Dover, NJ 07801",quietmanpub.com
4,Verona Restaurant,https://www.yelp.com/biz/verona-restaurant-ran...,4.5,$$Italian,1171 Sussex Tpke,"Randolph, NJ 07869",veronarestaurant.com
...,...,...,...,...,...,...,...
233,Big D’s Hot Dogs,https://www.yelp.com/biz/big-ds-hot-dogs-dover...,5.0,"$Food Trucks, American (Traditional), Hot Dogs",Dover Train Station,"Dover, NJ 07801",
234,Luigis Pizza,https://www.yelp.com/biz/luigis-pizza-succasun...,4.0,$Pizza,275 State Rt 10 E,"Succasunna, NJ 07876",
235,Naranja Juice Bar,https://www.yelp.com/biz/naranja-juice-bar-dov...,5.0,"$Juice Bars & Smoothies, Soup, Sandwiches",6B S Warren St,"Dover, NJ 07801",
236,Clean Juice,https://www.yelp.com/biz/clean-juice-morristow...,4.5,"Juice Bars & Smoothies, Acai Bowls, Wraps",68 South St,"Morristown, NJ 07960",cleanjuice.com/locations/morri…


In [8]:
# restaurant_rating check
yelp_df["restaurant_rating"].count()

238

### Clear restaurant website if it does not exist

In [9]:
# restaurant_url check
yelp_df["restaurant_website"].value_counts()

                                   52
Find a Table                        3
sandwichesunlimitedlunchbox.co…     2
anthonyandsonsbakery.com            2
chicosmexicangrill.com              1
                                   ..
oolalavietkitchen.com               1
townbarandkitchen.com               1
casadpizzadenville.com              1
asiandinerrestaurantnj.com          1
hibiscuscuisine.com                 1
Name: restaurant_website, Length: 183, dtype: int64

In [10]:
# check for valid website
for index, row in yelp_df.iterrows():
    if ('.' not in row['restaurant_website']) | (' ' in row['restaurant_website']):
        yelp_df.loc[index, 'restaurant_website'] = ""
    
yelp_df

Unnamed: 0,restaurant_name,yelp_url,restaurant_rating,price_cuisine,address1,address2,restaurant_website
0,Rosie’s Trattoria,https://www.yelp.com/biz/rosie-s-trattoria-ran...,4.5,"$$$Italian, Bars, Venues & Event Spaces",1181 Sussex Tpke,"Randolph, NJ 07869",
1,SubUrban Bar & Kitchen,https://www.yelp.com/biz/suburban-bar-and-kitc...,4.0,"$$American (New), Bars, Pizza",500 NJ-10,"Randolph, NJ 07869",sbknj.com
2,4 Seasons Mediterranean Restaurant,https://www.yelp.com/biz/4-seasons-mediterrane...,4.5,"$$Wine Bars, French, Italian",322 S Main St,"Wharton, NJ 07885",4seasonswharton.com
3,Quiet Man Pub,https://www.yelp.com/biz/quiet-man-pub-dover-2...,4.5,"$$Irish Pub, Seafood, Tapas/Small Plates",64 E Mcfarlan St,"Dover, NJ 07801",quietmanpub.com
4,Verona Restaurant,https://www.yelp.com/biz/verona-restaurant-ran...,4.5,$$Italian,1171 Sussex Tpke,"Randolph, NJ 07869",veronarestaurant.com
...,...,...,...,...,...,...,...
233,Big D’s Hot Dogs,https://www.yelp.com/biz/big-ds-hot-dogs-dover...,5.0,"$Food Trucks, American (Traditional), Hot Dogs",Dover Train Station,"Dover, NJ 07801",
234,Luigis Pizza,https://www.yelp.com/biz/luigis-pizza-succasun...,4.0,$Pizza,275 State Rt 10 E,"Succasunna, NJ 07876",
235,Naranja Juice Bar,https://www.yelp.com/biz/naranja-juice-bar-dov...,5.0,"$Juice Bars & Smoothies, Soup, Sandwiches",6B S Warren St,"Dover, NJ 07801",
236,Clean Juice,https://www.yelp.com/biz/clean-juice-morristow...,4.5,"Juice Bars & Smoothies, Acai Bowls, Wraps",68 South St,"Morristown, NJ 07960",cleanjuice.com/locations/morri…


In [11]:
# restaurant_website check
yelp_df["restaurant_website"].value_counts()

                                   55
anthonyandsonsbakery.com            2
sandwichesunlimitedlunchbox.co…     2
popschickentogo.com                 1
revolutionmorristown.com            1
                                   ..
dantesitaliancuisine.com            1
oolalavietkitchen.com               1
townbarandkitchen.com               1
casadpizzadenville.com              1
thecommittedpig.com                 1
Name: restaurant_website, Length: 182, dtype: int64

### Split the price_cuisine column in to price_level and cuisine column

In [12]:
# price_cuisine check
yelp_df["price_cuisine"].value_counts()

$$Italian                               12
$$Thai                                   6
$$Mexican                                5
$$Indian                                 5
$Chinese                                 5
                                        ..
$$Colombian, Spanish, Bars               1
$$Breakfast & Brunch, Burgers, Salad     1
$$Seafood, Sports Bars, Gluten-Free      1
$$Mediterranean                          1
$Delis, Caterers, Cafes                  1
Name: price_cuisine, Length: 183, dtype: int64

In [13]:
price_level = []
cuisine = []
for index, row in yelp_df.iterrows():
    price_cuisine = re.split(r'(\$+)', row['price_cuisine'])
    if len(price_cuisine) == 1:
        price_level.append("")
        cuisine.append(price_cuisine[0])
    else:
        price_level.append(price_cuisine[1])
        cuisine.append(price_cuisine[2])
yelp_df['price_level'] = price_level
yelp_df['cuisine'] = cuisine
yelp_df

Unnamed: 0,restaurant_name,yelp_url,restaurant_rating,price_cuisine,address1,address2,restaurant_website,price_level,cuisine
0,Rosie’s Trattoria,https://www.yelp.com/biz/rosie-s-trattoria-ran...,4.5,"$$$Italian, Bars, Venues & Event Spaces",1181 Sussex Tpke,"Randolph, NJ 07869",,$$$,"Italian, Bars, Venues & Event Spaces"
1,SubUrban Bar & Kitchen,https://www.yelp.com/biz/suburban-bar-and-kitc...,4.0,"$$American (New), Bars, Pizza",500 NJ-10,"Randolph, NJ 07869",sbknj.com,$$,"American (New), Bars, Pizza"
2,4 Seasons Mediterranean Restaurant,https://www.yelp.com/biz/4-seasons-mediterrane...,4.5,"$$Wine Bars, French, Italian",322 S Main St,"Wharton, NJ 07885",4seasonswharton.com,$$,"Wine Bars, French, Italian"
3,Quiet Man Pub,https://www.yelp.com/biz/quiet-man-pub-dover-2...,4.5,"$$Irish Pub, Seafood, Tapas/Small Plates",64 E Mcfarlan St,"Dover, NJ 07801",quietmanpub.com,$$,"Irish Pub, Seafood, Tapas/Small Plates"
4,Verona Restaurant,https://www.yelp.com/biz/verona-restaurant-ran...,4.5,$$Italian,1171 Sussex Tpke,"Randolph, NJ 07869",veronarestaurant.com,$$,Italian
...,...,...,...,...,...,...,...,...,...
233,Big D’s Hot Dogs,https://www.yelp.com/biz/big-ds-hot-dogs-dover...,5.0,"$Food Trucks, American (Traditional), Hot Dogs",Dover Train Station,"Dover, NJ 07801",,$,"Food Trucks, American (Traditional), Hot Dogs"
234,Luigis Pizza,https://www.yelp.com/biz/luigis-pizza-succasun...,4.0,$Pizza,275 State Rt 10 E,"Succasunna, NJ 07876",,$,Pizza
235,Naranja Juice Bar,https://www.yelp.com/biz/naranja-juice-bar-dov...,5.0,"$Juice Bars & Smoothies, Soup, Sandwiches",6B S Warren St,"Dover, NJ 07801",,$,"Juice Bars & Smoothies, Soup, Sandwiches"
236,Clean Juice,https://www.yelp.com/biz/clean-juice-morristow...,4.5,"Juice Bars & Smoothies, Acai Bowls, Wraps",68 South St,"Morristown, NJ 07960",cleanjuice.com/locations/morri…,,"Juice Bars & Smoothies, Acai Bowls, Wraps"


### Split address2 to city, state and zip code

In [14]:
# split with comma as delimiter to get city and state + zipcode
yelp_df[['city','state+zipcode']]=yelp_df.address2.str.split(",",expand=True,)
yelp_df

Unnamed: 0,restaurant_name,yelp_url,restaurant_rating,price_cuisine,address1,address2,restaurant_website,price_level,cuisine,city,state+zipcode
0,Rosie’s Trattoria,https://www.yelp.com/biz/rosie-s-trattoria-ran...,4.5,"$$$Italian, Bars, Venues & Event Spaces",1181 Sussex Tpke,"Randolph, NJ 07869",,$$$,"Italian, Bars, Venues & Event Spaces",Randolph,NJ 07869
1,SubUrban Bar & Kitchen,https://www.yelp.com/biz/suburban-bar-and-kitc...,4.0,"$$American (New), Bars, Pizza",500 NJ-10,"Randolph, NJ 07869",sbknj.com,$$,"American (New), Bars, Pizza",Randolph,NJ 07869
2,4 Seasons Mediterranean Restaurant,https://www.yelp.com/biz/4-seasons-mediterrane...,4.5,"$$Wine Bars, French, Italian",322 S Main St,"Wharton, NJ 07885",4seasonswharton.com,$$,"Wine Bars, French, Italian",Wharton,NJ 07885
3,Quiet Man Pub,https://www.yelp.com/biz/quiet-man-pub-dover-2...,4.5,"$$Irish Pub, Seafood, Tapas/Small Plates",64 E Mcfarlan St,"Dover, NJ 07801",quietmanpub.com,$$,"Irish Pub, Seafood, Tapas/Small Plates",Dover,NJ 07801
4,Verona Restaurant,https://www.yelp.com/biz/verona-restaurant-ran...,4.5,$$Italian,1171 Sussex Tpke,"Randolph, NJ 07869",veronarestaurant.com,$$,Italian,Randolph,NJ 07869
...,...,...,...,...,...,...,...,...,...,...,...
233,Big D’s Hot Dogs,https://www.yelp.com/biz/big-ds-hot-dogs-dover...,5.0,"$Food Trucks, American (Traditional), Hot Dogs",Dover Train Station,"Dover, NJ 07801",,$,"Food Trucks, American (Traditional), Hot Dogs",Dover,NJ 07801
234,Luigis Pizza,https://www.yelp.com/biz/luigis-pizza-succasun...,4.0,$Pizza,275 State Rt 10 E,"Succasunna, NJ 07876",,$,Pizza,Succasunna,NJ 07876
235,Naranja Juice Bar,https://www.yelp.com/biz/naranja-juice-bar-dov...,5.0,"$Juice Bars & Smoothies, Soup, Sandwiches",6B S Warren St,"Dover, NJ 07801",,$,"Juice Bars & Smoothies, Soup, Sandwiches",Dover,NJ 07801
236,Clean Juice,https://www.yelp.com/biz/clean-juice-morristow...,4.5,"Juice Bars & Smoothies, Acai Bowls, Wraps",68 South St,"Morristown, NJ 07960",cleanjuice.com/locations/morri…,,"Juice Bars & Smoothies, Acai Bowls, Wraps",Morristown,NJ 07960


In [15]:
# clear leading spaces
yelp_df["state+zipcode"]=yelp_df["state+zipcode"].str.strip()

In [16]:
# split with space as delimiter to get state and zip code
yelp_df[['state', 'zip code']] = yelp_df["state+zipcode"].str.split(" ",expand=True)
yelp_df

Unnamed: 0,restaurant_name,yelp_url,restaurant_rating,price_cuisine,address1,address2,restaurant_website,price_level,cuisine,city,state+zipcode,state,zip code
0,Rosie’s Trattoria,https://www.yelp.com/biz/rosie-s-trattoria-ran...,4.5,"$$$Italian, Bars, Venues & Event Spaces",1181 Sussex Tpke,"Randolph, NJ 07869",,$$$,"Italian, Bars, Venues & Event Spaces",Randolph,NJ 07869,NJ,07869
1,SubUrban Bar & Kitchen,https://www.yelp.com/biz/suburban-bar-and-kitc...,4.0,"$$American (New), Bars, Pizza",500 NJ-10,"Randolph, NJ 07869",sbknj.com,$$,"American (New), Bars, Pizza",Randolph,NJ 07869,NJ,07869
2,4 Seasons Mediterranean Restaurant,https://www.yelp.com/biz/4-seasons-mediterrane...,4.5,"$$Wine Bars, French, Italian",322 S Main St,"Wharton, NJ 07885",4seasonswharton.com,$$,"Wine Bars, French, Italian",Wharton,NJ 07885,NJ,07885
3,Quiet Man Pub,https://www.yelp.com/biz/quiet-man-pub-dover-2...,4.5,"$$Irish Pub, Seafood, Tapas/Small Plates",64 E Mcfarlan St,"Dover, NJ 07801",quietmanpub.com,$$,"Irish Pub, Seafood, Tapas/Small Plates",Dover,NJ 07801,NJ,07801
4,Verona Restaurant,https://www.yelp.com/biz/verona-restaurant-ran...,4.5,$$Italian,1171 Sussex Tpke,"Randolph, NJ 07869",veronarestaurant.com,$$,Italian,Randolph,NJ 07869,NJ,07869
...,...,...,...,...,...,...,...,...,...,...,...,...,...
233,Big D’s Hot Dogs,https://www.yelp.com/biz/big-ds-hot-dogs-dover...,5.0,"$Food Trucks, American (Traditional), Hot Dogs",Dover Train Station,"Dover, NJ 07801",,$,"Food Trucks, American (Traditional), Hot Dogs",Dover,NJ 07801,NJ,07801
234,Luigis Pizza,https://www.yelp.com/biz/luigis-pizza-succasun...,4.0,$Pizza,275 State Rt 10 E,"Succasunna, NJ 07876",,$,Pizza,Succasunna,NJ 07876,NJ,07876
235,Naranja Juice Bar,https://www.yelp.com/biz/naranja-juice-bar-dov...,5.0,"$Juice Bars & Smoothies, Soup, Sandwiches",6B S Warren St,"Dover, NJ 07801",,$,"Juice Bars & Smoothies, Soup, Sandwiches",Dover,NJ 07801,NJ,07801
236,Clean Juice,https://www.yelp.com/biz/clean-juice-morristow...,4.5,"Juice Bars & Smoothies, Acai Bowls, Wraps",68 South St,"Morristown, NJ 07960",cleanjuice.com/locations/morri…,,"Juice Bars & Smoothies, Acai Bowls, Wraps",Morristown,NJ 07960,NJ,07960


### Clean Yelp DataFrame to have only Zip Codes of Morris County.

In [17]:
# retrieve all Morris County zip codes and add them to morris_county_zipcodes list
tables=pd.read_html('http://www.ciclt.net/sn/clt/capitolimpact/gw_ziplist.aspx?ClientCode=capitolimpact&State=nj&StName=&StFIPS=&FIPS=34027')
morris_county_df = tables[2]
morris_county_df['Zip Code'] = morris_county_df['Zip Code'].astype(str)
morris_county_zipcodes = morris_county_df['Zip Code'].str.zfill(5)
morris_county_zipcodes

0     07005
1     07034
2     07035
3     07045
4     07046
5     07054
6     07058
7     07082
8     07405
9     07405
10    07440
11    07444
12    07457
13    07801
14    07802
15    07803
16    07803
17    07806
18    07806
19    07828
20    07834
21    07836
22    07842
23    07845
24    07847
25    07849
26    07850
27    07852
28    07853
29    07856
30    07857
31    07866
32    07869
33    07869
34    07870
35    07876
36    07878
37    07885
38    07926
39    07927
40    07928
41    07930
42    07932
43    07933
44    07935
45    07936
46    07940
47    07945
48    07946
49    07950
50    07950
51    07960
Name: Zip Code, dtype: object

In [18]:
# keep those restaurants that are in Morris County
yelp_df = yelp_df[yelp_df['zip code'].isin(morris_county_zipcodes)]
yelp_df

Unnamed: 0,restaurant_name,yelp_url,restaurant_rating,price_cuisine,address1,address2,restaurant_website,price_level,cuisine,city,state+zipcode,state,zip code
0,Rosie’s Trattoria,https://www.yelp.com/biz/rosie-s-trattoria-ran...,4.5,"$$$Italian, Bars, Venues & Event Spaces",1181 Sussex Tpke,"Randolph, NJ 07869",,$$$,"Italian, Bars, Venues & Event Spaces",Randolph,NJ 07869,NJ,07869
1,SubUrban Bar & Kitchen,https://www.yelp.com/biz/suburban-bar-and-kitc...,4.0,"$$American (New), Bars, Pizza",500 NJ-10,"Randolph, NJ 07869",sbknj.com,$$,"American (New), Bars, Pizza",Randolph,NJ 07869,NJ,07869
2,4 Seasons Mediterranean Restaurant,https://www.yelp.com/biz/4-seasons-mediterrane...,4.5,"$$Wine Bars, French, Italian",322 S Main St,"Wharton, NJ 07885",4seasonswharton.com,$$,"Wine Bars, French, Italian",Wharton,NJ 07885,NJ,07885
3,Quiet Man Pub,https://www.yelp.com/biz/quiet-man-pub-dover-2...,4.5,"$$Irish Pub, Seafood, Tapas/Small Plates",64 E Mcfarlan St,"Dover, NJ 07801",quietmanpub.com,$$,"Irish Pub, Seafood, Tapas/Small Plates",Dover,NJ 07801,NJ,07801
4,Verona Restaurant,https://www.yelp.com/biz/verona-restaurant-ran...,4.5,$$Italian,1171 Sussex Tpke,"Randolph, NJ 07869",veronarestaurant.com,$$,Italian,Randolph,NJ 07869,NJ,07869
...,...,...,...,...,...,...,...,...,...,...,...,...,...
233,Big D’s Hot Dogs,https://www.yelp.com/biz/big-ds-hot-dogs-dover...,5.0,"$Food Trucks, American (Traditional), Hot Dogs",Dover Train Station,"Dover, NJ 07801",,$,"Food Trucks, American (Traditional), Hot Dogs",Dover,NJ 07801,NJ,07801
234,Luigis Pizza,https://www.yelp.com/biz/luigis-pizza-succasun...,4.0,$Pizza,275 State Rt 10 E,"Succasunna, NJ 07876",,$,Pizza,Succasunna,NJ 07876,NJ,07876
235,Naranja Juice Bar,https://www.yelp.com/biz/naranja-juice-bar-dov...,5.0,"$Juice Bars & Smoothies, Soup, Sandwiches",6B S Warren St,"Dover, NJ 07801",,$,"Juice Bars & Smoothies, Soup, Sandwiches",Dover,NJ 07801,NJ,07801
236,Clean Juice,https://www.yelp.com/biz/clean-juice-morristow...,4.5,"Juice Bars & Smoothies, Acai Bowls, Wraps",68 South St,"Morristown, NJ 07960",cleanjuice.com/locations/morri…,,"Juice Bars & Smoothies, Acai Bowls, Wraps",Morristown,NJ 07960,NJ,07960


In [19]:
# drop unwanted columns and rename columns
yelp_df = yelp_df.drop(columns=['price_cuisine', 'address2', 'state+zipcode'])
yelp_df = yelp_df.rename(columns={'restaurant_rating':'yelp_rating',
                                  'price_level':'yelp_price_level',
                                  'cuisine':'yelp_cuisine'
                                 })
yelp_df['restaurant_name'] = yelp_df['restaurant_name'].str.strip()
yelp_df['address1'] = yelp_df['address1'].str.strip()

yelp_df

Unnamed: 0,restaurant_name,yelp_url,yelp_rating,address1,restaurant_website,yelp_price_level,yelp_cuisine,city,state,zip code
0,Rosie’s Trattoria,https://www.yelp.com/biz/rosie-s-trattoria-ran...,4.5,1181 Sussex Tpke,,$$$,"Italian, Bars, Venues & Event Spaces",Randolph,NJ,07869
1,SubUrban Bar & Kitchen,https://www.yelp.com/biz/suburban-bar-and-kitc...,4.0,500 NJ-10,sbknj.com,$$,"American (New), Bars, Pizza",Randolph,NJ,07869
2,4 Seasons Mediterranean Restaurant,https://www.yelp.com/biz/4-seasons-mediterrane...,4.5,322 S Main St,4seasonswharton.com,$$,"Wine Bars, French, Italian",Wharton,NJ,07885
3,Quiet Man Pub,https://www.yelp.com/biz/quiet-man-pub-dover-2...,4.5,64 E Mcfarlan St,quietmanpub.com,$$,"Irish Pub, Seafood, Tapas/Small Plates",Dover,NJ,07801
4,Verona Restaurant,https://www.yelp.com/biz/verona-restaurant-ran...,4.5,1171 Sussex Tpke,veronarestaurant.com,$$,Italian,Randolph,NJ,07869
...,...,...,...,...,...,...,...,...,...,...
233,Big D’s Hot Dogs,https://www.yelp.com/biz/big-ds-hot-dogs-dover...,5.0,Dover Train Station,,$,"Food Trucks, American (Traditional), Hot Dogs",Dover,NJ,07801
234,Luigis Pizza,https://www.yelp.com/biz/luigis-pizza-succasun...,4.0,275 State Rt 10 E,,$,Pizza,Succasunna,NJ,07876
235,Naranja Juice Bar,https://www.yelp.com/biz/naranja-juice-bar-dov...,5.0,6B S Warren St,,$,"Juice Bars & Smoothies, Soup, Sandwiches",Dover,NJ,07801
236,Clean Juice,https://www.yelp.com/biz/clean-juice-morristow...,4.5,68 South St,cleanjuice.com/locations/morri…,,"Juice Bars & Smoothies, Acai Bowls, Wraps",Morristown,NJ,07960


# Google data

### Store CSV data into a DataFrame

In [20]:
csv_file = "../Resources/google_data.csv"
google_data_df = pd.read_csv(csv_file)
google_data_df

Unnamed: 0,restaurant_name,place_id,price_level,restaurant_rating,google_maps_url,address,restaurant_website
0,Popeyes Louisiana Kitchen,ChIJ3dh_VfcKw4kR3lrLsxKB7tY,1.0,3.7,https://maps.google.com/?cid=15487458085899557598,Rockaway Townsquare Mall-301 Mt Hope Road - Su...,https://www.popeyes.com/store-locator/store/re...
1,Buffalo Wild Wings,ChIJAYlPufkKw4kRr7-IhJCnC3Y,2.0,4.2,https://maps.google.com/?cid=8506076560359997359,"343 Mt Hope Ave, Rockaway, NJ 07866, USA",https://www.buffalowildwings.com/en/locations/...
2,Auntie Anne's,ChIJ3fcDtfsKw4kRsy2xOBfI0aQ,1.0,4.2,https://maps.google.com/?cid=11876493694413647283,"301 Mt Hope Ave Suite 1048, Rockaway, NJ 07866...",https://www.auntieannes.com/nj/rockaway/119-1?...
3,Tabor Road Tavern,ChIJg8StYWinw4kRMFNslu1L9xg,3.0,4.4,https://maps.google.com/?cid=1798990059960816432,"510 Tabor Rd, Morris Plains, NJ 07950, USA",http://www.taborroadtavern.com/
4,Subway,ChIJ6YlEVO8Jw4kR4n_aDP4NbIc,1.0,3.4,https://maps.google.com/?cid=9758189877393522658,"559 E Main St Store # 2, Denville, NJ 07834, USA",https://order.subway.com/en-us/restaurant/2197...
...,...,...,...,...,...,...,...
355,Anthony & Sons Bakery & Italian Deli,ChIJ1yZeabefw4kRfvmIeEYa2xY,2.0,4.5,https://maps.google.com/?cid=1646938978706323838,"15 NJ-10, Succasunna, NJ 07876, USA",http://anthonyandsonsbakerynj.com/
356,Pizza Express Mt. Arlington,ChIJM4tBOgJ1w4kRa9U1mz6xgfI,1.0,3.7,https://maps.google.com/?cid=17474442911624320363,"312 Howard Blvd, Mt Arlington, NJ 07856, USA",https://pizzaexpressmtarlington.com/
357,Shell,ChIJWf-qdf2fw4kR_MdkDkTL6Jo,2.0,3.1,https://maps.google.com/?cid=11162395169597474812,"274 US-46, Mine Hill Township, NJ 07803, USA",https://find.shell.com/us/fuel/12395193-274-rt...
358,Dunkin',ChIJNcJSc_2fw4kRIA4rycWRc58,1.0,4.2,https://maps.google.com/?cid=11489687353021894176,"274 Rte 46, Mine Hill Township, NJ 07803, USA",https://smart.link/abu8tr12sc1y6?utm_source=go...


In [21]:
# All columns
google_data_df.columns

Index(['restaurant_name', 'place_id', 'price_level', 'restaurant_rating',
       'google_maps_url', 'address', 'restaurant_website'],
      dtype='object')

In [22]:
# restaurant name check
google_data_df["restaurant_name"].value_counts()

Subway                         12
Dunkin'                        11
Starbucks                      10
Wendy's                         4
Panera Bread                    4
                               ..
Golden Burrito Mexican Spot     1
Pren Kitchen                    1
China Garden                    1
bp                              1
Thai Kitchen                    1
Name: restaurant_name, Length: 245, dtype: int64

### Dropping duplicate restaurant address and restaurant_name value

In [23]:
# clear leading and trailing spaces
google_data_df['restaurant_name'] = google_data_df['restaurant_name'].str.strip()
google_data_df['address'] = google_data_df['address'].str.strip()

# drop duplicates for restaurant name and address
google_data_df = google_data_df.drop_duplicates(subset=['restaurant_name', 'address'])
google_data_df

Unnamed: 0,restaurant_name,place_id,price_level,restaurant_rating,google_maps_url,address,restaurant_website
0,Popeyes Louisiana Kitchen,ChIJ3dh_VfcKw4kR3lrLsxKB7tY,1.0,3.7,https://maps.google.com/?cid=15487458085899557598,Rockaway Townsquare Mall-301 Mt Hope Road - Su...,https://www.popeyes.com/store-locator/store/re...
1,Buffalo Wild Wings,ChIJAYlPufkKw4kRr7-IhJCnC3Y,2.0,4.2,https://maps.google.com/?cid=8506076560359997359,"343 Mt Hope Ave, Rockaway, NJ 07866, USA",https://www.buffalowildwings.com/en/locations/...
2,Auntie Anne's,ChIJ3fcDtfsKw4kRsy2xOBfI0aQ,1.0,4.2,https://maps.google.com/?cid=11876493694413647283,"301 Mt Hope Ave Suite 1048, Rockaway, NJ 07866...",https://www.auntieannes.com/nj/rockaway/119-1?...
3,Tabor Road Tavern,ChIJg8StYWinw4kRMFNslu1L9xg,3.0,4.4,https://maps.google.com/?cid=1798990059960816432,"510 Tabor Rd, Morris Plains, NJ 07950, USA",http://www.taborroadtavern.com/
4,Subway,ChIJ6YlEVO8Jw4kR4n_aDP4NbIc,1.0,3.4,https://maps.google.com/?cid=9758189877393522658,"559 E Main St Store # 2, Denville, NJ 07834, USA",https://order.subway.com/en-us/restaurant/2197...
...,...,...,...,...,...,...,...
355,Anthony & Sons Bakery & Italian Deli,ChIJ1yZeabefw4kRfvmIeEYa2xY,2.0,4.5,https://maps.google.com/?cid=1646938978706323838,"15 NJ-10, Succasunna, NJ 07876, USA",http://anthonyandsonsbakerynj.com/
356,Pizza Express Mt. Arlington,ChIJM4tBOgJ1w4kRa9U1mz6xgfI,1.0,3.7,https://maps.google.com/?cid=17474442911624320363,"312 Howard Blvd, Mt Arlington, NJ 07856, USA",https://pizzaexpressmtarlington.com/
357,Shell,ChIJWf-qdf2fw4kR_MdkDkTL6Jo,2.0,3.1,https://maps.google.com/?cid=11162395169597474812,"274 US-46, Mine Hill Township, NJ 07803, USA",https://find.shell.com/us/fuel/12395193-274-rt...
358,Dunkin',ChIJNcJSc_2fw4kRIA4rycWRc58,1.0,4.2,https://maps.google.com/?cid=11489687353021894176,"274 Rte 46, Mine Hill Township, NJ 07803, USA",https://smart.link/abu8tr12sc1y6?utm_source=go...


### Clean DataFrame

In [24]:
# price_level check
google_data_df["price_level"].value_counts()

2.0    134
1.0     78
3.0     21
4.0      2
Name: price_level, dtype: int64

In [25]:
# restaurant_rating check
google_data_df["restaurant_rating"].value_counts()

4.2    47
4.3    44
4.5    39
4.4    39
4.1    26
4.6    20
4.0    14
4.7    13
3.9     6
3.4     5
3.8     5
3.7     5
3.6     4
4.8     3
5.0     3
3.3     2
3.5     2
2.5     2
2.9     1
3.2     1
3.0     1
2.0     1
3.1     1
Name: restaurant_rating, dtype: int64

In [26]:
# fill empty cells
google_data_df=google_data_df.fillna("")
google_data_df

Unnamed: 0,restaurant_name,place_id,price_level,restaurant_rating,google_maps_url,address,restaurant_website
0,Popeyes Louisiana Kitchen,ChIJ3dh_VfcKw4kR3lrLsxKB7tY,1,3.7,https://maps.google.com/?cid=15487458085899557598,Rockaway Townsquare Mall-301 Mt Hope Road - Su...,https://www.popeyes.com/store-locator/store/re...
1,Buffalo Wild Wings,ChIJAYlPufkKw4kRr7-IhJCnC3Y,2,4.2,https://maps.google.com/?cid=8506076560359997359,"343 Mt Hope Ave, Rockaway, NJ 07866, USA",https://www.buffalowildwings.com/en/locations/...
2,Auntie Anne's,ChIJ3fcDtfsKw4kRsy2xOBfI0aQ,1,4.2,https://maps.google.com/?cid=11876493694413647283,"301 Mt Hope Ave Suite 1048, Rockaway, NJ 07866...",https://www.auntieannes.com/nj/rockaway/119-1?...
3,Tabor Road Tavern,ChIJg8StYWinw4kRMFNslu1L9xg,3,4.4,https://maps.google.com/?cid=1798990059960816432,"510 Tabor Rd, Morris Plains, NJ 07950, USA",http://www.taborroadtavern.com/
4,Subway,ChIJ6YlEVO8Jw4kR4n_aDP4NbIc,1,3.4,https://maps.google.com/?cid=9758189877393522658,"559 E Main St Store # 2, Denville, NJ 07834, USA",https://order.subway.com/en-us/restaurant/2197...
...,...,...,...,...,...,...,...
355,Anthony & Sons Bakery & Italian Deli,ChIJ1yZeabefw4kRfvmIeEYa2xY,2,4.5,https://maps.google.com/?cid=1646938978706323838,"15 NJ-10, Succasunna, NJ 07876, USA",http://anthonyandsonsbakerynj.com/
356,Pizza Express Mt. Arlington,ChIJM4tBOgJ1w4kRa9U1mz6xgfI,1,3.7,https://maps.google.com/?cid=17474442911624320363,"312 Howard Blvd, Mt Arlington, NJ 07856, USA",https://pizzaexpressmtarlington.com/
357,Shell,ChIJWf-qdf2fw4kR_MdkDkTL6Jo,2,3.1,https://maps.google.com/?cid=11162395169597474812,"274 US-46, Mine Hill Township, NJ 07803, USA",https://find.shell.com/us/fuel/12395193-274-rt...
358,Dunkin',ChIJNcJSc_2fw4kRIA4rycWRc58,1,4.2,https://maps.google.com/?cid=11489687353021894176,"274 Rte 46, Mine Hill Township, NJ 07803, USA",https://smart.link/abu8tr12sc1y6?utm_source=go...


### Split the Split address to address, city, state, zip code and country

In [27]:
# split from right with comma as delimiter
google_data_df[['address1','city','state_zip','country']] = google_data_df['address'].str.rsplit(',', 3, expand=True)
google_data_df['state_zip'] = google_data_df['state_zip'].str.lstrip()
google_data_df[['state','zip code']] = google_data_df['state_zip'].str.split(' ', 1, expand=True)
google_data_df

Unnamed: 0,restaurant_name,place_id,price_level,restaurant_rating,google_maps_url,address,restaurant_website,address1,city,state_zip,country,state,zip code
0,Popeyes Louisiana Kitchen,ChIJ3dh_VfcKw4kR3lrLsxKB7tY,1,3.7,https://maps.google.com/?cid=15487458085899557598,Rockaway Townsquare Mall-301 Mt Hope Road - Su...,https://www.popeyes.com/store-locator/store/re...,Rockaway Townsquare Mall-301 Mt Hope Road - Su...,Rockaway,NJ 07866,USA,NJ,07866
1,Buffalo Wild Wings,ChIJAYlPufkKw4kRr7-IhJCnC3Y,2,4.2,https://maps.google.com/?cid=8506076560359997359,"343 Mt Hope Ave, Rockaway, NJ 07866, USA",https://www.buffalowildwings.com/en/locations/...,343 Mt Hope Ave,Rockaway,NJ 07866,USA,NJ,07866
2,Auntie Anne's,ChIJ3fcDtfsKw4kRsy2xOBfI0aQ,1,4.2,https://maps.google.com/?cid=11876493694413647283,"301 Mt Hope Ave Suite 1048, Rockaway, NJ 07866...",https://www.auntieannes.com/nj/rockaway/119-1?...,301 Mt Hope Ave Suite 1048,Rockaway,NJ 07866,USA,NJ,07866
3,Tabor Road Tavern,ChIJg8StYWinw4kRMFNslu1L9xg,3,4.4,https://maps.google.com/?cid=1798990059960816432,"510 Tabor Rd, Morris Plains, NJ 07950, USA",http://www.taborroadtavern.com/,510 Tabor Rd,Morris Plains,NJ 07950,USA,NJ,07950
4,Subway,ChIJ6YlEVO8Jw4kR4n_aDP4NbIc,1,3.4,https://maps.google.com/?cid=9758189877393522658,"559 E Main St Store # 2, Denville, NJ 07834, USA",https://order.subway.com/en-us/restaurant/2197...,559 E Main St Store # 2,Denville,NJ 07834,USA,NJ,07834
...,...,...,...,...,...,...,...,...,...,...,...,...,...
355,Anthony & Sons Bakery & Italian Deli,ChIJ1yZeabefw4kRfvmIeEYa2xY,2,4.5,https://maps.google.com/?cid=1646938978706323838,"15 NJ-10, Succasunna, NJ 07876, USA",http://anthonyandsonsbakerynj.com/,15 NJ-10,Succasunna,NJ 07876,USA,NJ,07876
356,Pizza Express Mt. Arlington,ChIJM4tBOgJ1w4kRa9U1mz6xgfI,1,3.7,https://maps.google.com/?cid=17474442911624320363,"312 Howard Blvd, Mt Arlington, NJ 07856, USA",https://pizzaexpressmtarlington.com/,312 Howard Blvd,Mt Arlington,NJ 07856,USA,NJ,07856
357,Shell,ChIJWf-qdf2fw4kR_MdkDkTL6Jo,2,3.1,https://maps.google.com/?cid=11162395169597474812,"274 US-46, Mine Hill Township, NJ 07803, USA",https://find.shell.com/us/fuel/12395193-274-rt...,274 US-46,Mine Hill Township,NJ 07803,USA,NJ,07803
358,Dunkin',ChIJNcJSc_2fw4kRIA4rycWRc58,1,4.2,https://maps.google.com/?cid=11489687353021894176,"274 Rte 46, Mine Hill Township, NJ 07803, USA",https://smart.link/abu8tr12sc1y6?utm_source=go...,274 Rte 46,Mine Hill Township,NJ 07803,USA,NJ,07803


### Clean Google DataFrame to have only Zip Codes of Morris County.

In [28]:
google_data_df = google_data_df[google_data_df['zip code'].isin(morris_county_zipcodes)]
google_data_df

Unnamed: 0,restaurant_name,place_id,price_level,restaurant_rating,google_maps_url,address,restaurant_website,address1,city,state_zip,country,state,zip code
0,Popeyes Louisiana Kitchen,ChIJ3dh_VfcKw4kR3lrLsxKB7tY,1,3.7,https://maps.google.com/?cid=15487458085899557598,Rockaway Townsquare Mall-301 Mt Hope Road - Su...,https://www.popeyes.com/store-locator/store/re...,Rockaway Townsquare Mall-301 Mt Hope Road - Su...,Rockaway,NJ 07866,USA,NJ,07866
1,Buffalo Wild Wings,ChIJAYlPufkKw4kRr7-IhJCnC3Y,2,4.2,https://maps.google.com/?cid=8506076560359997359,"343 Mt Hope Ave, Rockaway, NJ 07866, USA",https://www.buffalowildwings.com/en/locations/...,343 Mt Hope Ave,Rockaway,NJ 07866,USA,NJ,07866
2,Auntie Anne's,ChIJ3fcDtfsKw4kRsy2xOBfI0aQ,1,4.2,https://maps.google.com/?cid=11876493694413647283,"301 Mt Hope Ave Suite 1048, Rockaway, NJ 07866...",https://www.auntieannes.com/nj/rockaway/119-1?...,301 Mt Hope Ave Suite 1048,Rockaway,NJ 07866,USA,NJ,07866
3,Tabor Road Tavern,ChIJg8StYWinw4kRMFNslu1L9xg,3,4.4,https://maps.google.com/?cid=1798990059960816432,"510 Tabor Rd, Morris Plains, NJ 07950, USA",http://www.taborroadtavern.com/,510 Tabor Rd,Morris Plains,NJ 07950,USA,NJ,07950
4,Subway,ChIJ6YlEVO8Jw4kR4n_aDP4NbIc,1,3.4,https://maps.google.com/?cid=9758189877393522658,"559 E Main St Store # 2, Denville, NJ 07834, USA",https://order.subway.com/en-us/restaurant/2197...,559 E Main St Store # 2,Denville,NJ 07834,USA,NJ,07834
...,...,...,...,...,...,...,...,...,...,...,...,...,...
355,Anthony & Sons Bakery & Italian Deli,ChIJ1yZeabefw4kRfvmIeEYa2xY,2,4.5,https://maps.google.com/?cid=1646938978706323838,"15 NJ-10, Succasunna, NJ 07876, USA",http://anthonyandsonsbakerynj.com/,15 NJ-10,Succasunna,NJ 07876,USA,NJ,07876
356,Pizza Express Mt. Arlington,ChIJM4tBOgJ1w4kRa9U1mz6xgfI,1,3.7,https://maps.google.com/?cid=17474442911624320363,"312 Howard Blvd, Mt Arlington, NJ 07856, USA",https://pizzaexpressmtarlington.com/,312 Howard Blvd,Mt Arlington,NJ 07856,USA,NJ,07856
357,Shell,ChIJWf-qdf2fw4kR_MdkDkTL6Jo,2,3.1,https://maps.google.com/?cid=11162395169597474812,"274 US-46, Mine Hill Township, NJ 07803, USA",https://find.shell.com/us/fuel/12395193-274-rt...,274 US-46,Mine Hill Township,NJ 07803,USA,NJ,07803
358,Dunkin',ChIJNcJSc_2fw4kRIA4rycWRc58,1,4.2,https://maps.google.com/?cid=11489687353021894176,"274 Rte 46, Mine Hill Township, NJ 07803, USA",https://smart.link/abu8tr12sc1y6?utm_source=go...,274 Rte 46,Mine Hill Township,NJ 07803,USA,NJ,07803


In [29]:
# Drop unwanted columns and rename columns
google_data_df = google_data_df.drop(columns=['place_id', 'address', 'country', 'state_zip'])
google_data_df = google_data_df.rename(columns={'restaurant_rating':'google_rating',
                                  'price_level':'google_price_level'
                                 })
google_data_df['restaurant_name'] = google_data_df['restaurant_name'].str.strip()
google_data_df['address1'] = google_data_df['address1'].str.strip()

google_data_df

Unnamed: 0,restaurant_name,google_price_level,google_rating,google_maps_url,restaurant_website,address1,city,state,zip code
0,Popeyes Louisiana Kitchen,1,3.7,https://maps.google.com/?cid=15487458085899557598,https://www.popeyes.com/store-locator/store/re...,Rockaway Townsquare Mall-301 Mt Hope Road - Su...,Rockaway,NJ,07866
1,Buffalo Wild Wings,2,4.2,https://maps.google.com/?cid=8506076560359997359,https://www.buffalowildwings.com/en/locations/...,343 Mt Hope Ave,Rockaway,NJ,07866
2,Auntie Anne's,1,4.2,https://maps.google.com/?cid=11876493694413647283,https://www.auntieannes.com/nj/rockaway/119-1?...,301 Mt Hope Ave Suite 1048,Rockaway,NJ,07866
3,Tabor Road Tavern,3,4.4,https://maps.google.com/?cid=1798990059960816432,http://www.taborroadtavern.com/,510 Tabor Rd,Morris Plains,NJ,07950
4,Subway,1,3.4,https://maps.google.com/?cid=9758189877393522658,https://order.subway.com/en-us/restaurant/2197...,559 E Main St Store # 2,Denville,NJ,07834
...,...,...,...,...,...,...,...,...,...
355,Anthony & Sons Bakery & Italian Deli,2,4.5,https://maps.google.com/?cid=1646938978706323838,http://anthonyandsonsbakerynj.com/,15 NJ-10,Succasunna,NJ,07876
356,Pizza Express Mt. Arlington,1,3.7,https://maps.google.com/?cid=17474442911624320363,https://pizzaexpressmtarlington.com/,312 Howard Blvd,Mt Arlington,NJ,07856
357,Shell,2,3.1,https://maps.google.com/?cid=11162395169597474812,https://find.shell.com/us/fuel/12395193-274-rt...,274 US-46,Mine Hill Township,NJ,07803
358,Dunkin',1,4.2,https://maps.google.com/?cid=11489687353021894176,https://smart.link/abu8tr12sc1y6?utm_source=go...,274 Rte 46,Mine Hill Township,NJ,07803


# Trip Advisor Data

### Store CSV data into a DataFrame

In [30]:
csv_file = "../Resources/tripadvisor_data.csv"
tripadvisor_data_df = pd.read_csv(csv_file)
tripadvisor_data_df

Unnamed: 0,restaurant_name,tripadvisor_url,restaurant_rating,price_level,cuisine,address,restaurant_website
0,Gourmet Cafe Restaurant,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,"Italian,Vegetarian Friendly,Vegan Options","136 Baldwin Rd On the Corner of Rt 46 W, Walgr...",http://gourmetcafenj.com
1,Sergio and Co Italian Specialties,https://www.tripadvisor.com/Restaurant_Review-...,5.0,$$ - $$$,"Italian,Vegetarian Friendly,Vegan Options","28 Broadway, Denville, NJ 07834-2704",http://www.sergioandco.com
2,The Quiet Man,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,"American, Irish","64 E McFarlan St, Dover, NJ 07801-3522",http://www.quietmanpub.com
3,SubUrban Bar & Kitchen,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,,"500 Route 10 West, Randolph, NJ 07869-2148",https://www.sbknj.com/
4,Verona Restaurant,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,"Italian,Vegetarian Friendly,Vegan Options","1171 Sussex Tpke, Randolph, NJ 07869-4225",http://www.veronarestaurant.com


### Dropping duplicate restaurant address and restaurant_name value

In [31]:
# clear leading and trailing spaces
tripadvisor_data_df['restaurant_name'] = tripadvisor_data_df['restaurant_name'].str.strip()
tripadvisor_data_df['address'] = tripadvisor_data_df['address'].str.strip()

# drop duplicates for restaurant name and address
tripadvisor_data_df = tripadvisor_data_df.drop_duplicates(subset=['restaurant_name', 'address'])
tripadvisor_data_df

Unnamed: 0,restaurant_name,tripadvisor_url,restaurant_rating,price_level,cuisine,address,restaurant_website
0,Gourmet Cafe Restaurant,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,"Italian,Vegetarian Friendly,Vegan Options","136 Baldwin Rd On the Corner of Rt 46 W, Walgr...",http://gourmetcafenj.com
1,Sergio and Co Italian Specialties,https://www.tripadvisor.com/Restaurant_Review-...,5.0,$$ - $$$,"Italian,Vegetarian Friendly,Vegan Options","28 Broadway, Denville, NJ 07834-2704",http://www.sergioandco.com
2,The Quiet Man,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,"American, Irish","64 E McFarlan St, Dover, NJ 07801-3522",http://www.quietmanpub.com
3,SubUrban Bar & Kitchen,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,,"500 Route 10 West, Randolph, NJ 07869-2148",https://www.sbknj.com/
4,Verona Restaurant,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,"Italian,Vegetarian Friendly,Vegan Options","1171 Sussex Tpke, Randolph, NJ 07869-4225",http://www.veronarestaurant.com


In [32]:
# fill empty cells
tripadvisor_data_df = tripadvisor_data_df.fillna("")
tripadvisor_data_df

Unnamed: 0,restaurant_name,tripadvisor_url,restaurant_rating,price_level,cuisine,address,restaurant_website
0,Gourmet Cafe Restaurant,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,"Italian,Vegetarian Friendly,Vegan Options","136 Baldwin Rd On the Corner of Rt 46 W, Walgr...",http://gourmetcafenj.com
1,Sergio and Co Italian Specialties,https://www.tripadvisor.com/Restaurant_Review-...,5.0,$$ - $$$,"Italian,Vegetarian Friendly,Vegan Options","28 Broadway, Denville, NJ 07834-2704",http://www.sergioandco.com
2,The Quiet Man,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,"American, Irish","64 E McFarlan St, Dover, NJ 07801-3522",http://www.quietmanpub.com
3,SubUrban Bar & Kitchen,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,,"500 Route 10 West, Randolph, NJ 07869-2148",https://www.sbknj.com/
4,Verona Restaurant,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,"Italian,Vegetarian Friendly,Vegan Options","1171 Sussex Tpke, Randolph, NJ 07869-4225",http://www.veronarestaurant.com


### Split the Split address to address, city, state, zip code and country

In [33]:
# split from right with comma as delimiter
tripadvisor_data_df[['address1','city','state_zip']] = tripadvisor_data_df['address'].str.rsplit(',', 2, expand=True)
tripadvisor_data_df['state_zip'] = tripadvisor_data_df['state_zip'].str.lstrip()
tripadvisor_data_df[['state','zip code']] = tripadvisor_data_df['state_zip'].str.split(' ', 1, expand=True)
tripadvisor_data_df['zip code'] = tripadvisor_data_df['zip code'].str.split('-', expand=True)
tripadvisor_data_df

Unnamed: 0,restaurant_name,tripadvisor_url,restaurant_rating,price_level,cuisine,address,restaurant_website,address1,city,state_zip,state,zip code
0,Gourmet Cafe Restaurant,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,"Italian,Vegetarian Friendly,Vegan Options","136 Baldwin Rd On the Corner of Rt 46 W, Walgr...",http://gourmetcafenj.com,"136 Baldwin Rd On the Corner of Rt 46 W, Walgr...",Parsippany,NJ 07054-2099,NJ,7054
1,Sergio and Co Italian Specialties,https://www.tripadvisor.com/Restaurant_Review-...,5.0,$$ - $$$,"Italian,Vegetarian Friendly,Vegan Options","28 Broadway, Denville, NJ 07834-2704",http://www.sergioandco.com,28 Broadway,Denville,NJ 07834-2704,NJ,7834
2,The Quiet Man,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,"American, Irish","64 E McFarlan St, Dover, NJ 07801-3522",http://www.quietmanpub.com,64 E McFarlan St,Dover,NJ 07801-3522,NJ,7801
3,SubUrban Bar & Kitchen,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,,"500 Route 10 West, Randolph, NJ 07869-2148",https://www.sbknj.com/,500 Route 10 West,Randolph,NJ 07869-2148,NJ,7869
4,Verona Restaurant,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,"Italian,Vegetarian Friendly,Vegan Options","1171 Sussex Tpke, Randolph, NJ 07869-4225",http://www.veronarestaurant.com,1171 Sussex Tpke,Randolph,NJ 07869-4225,NJ,7869


### Clean Trip Advisor DataFrame to have only Zip Codes of Morris County.

In [34]:
tripadvisor_data_df = tripadvisor_data_df[tripadvisor_data_df['zip code'].isin(morris_county_zipcodes)]
tripadvisor_data_df

Unnamed: 0,restaurant_name,tripadvisor_url,restaurant_rating,price_level,cuisine,address,restaurant_website,address1,city,state_zip,state,zip code
0,Gourmet Cafe Restaurant,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,"Italian,Vegetarian Friendly,Vegan Options","136 Baldwin Rd On the Corner of Rt 46 W, Walgr...",http://gourmetcafenj.com,"136 Baldwin Rd On the Corner of Rt 46 W, Walgr...",Parsippany,NJ 07054-2099,NJ,7054
1,Sergio and Co Italian Specialties,https://www.tripadvisor.com/Restaurant_Review-...,5.0,$$ - $$$,"Italian,Vegetarian Friendly,Vegan Options","28 Broadway, Denville, NJ 07834-2704",http://www.sergioandco.com,28 Broadway,Denville,NJ 07834-2704,NJ,7834
2,The Quiet Man,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,"American, Irish","64 E McFarlan St, Dover, NJ 07801-3522",http://www.quietmanpub.com,64 E McFarlan St,Dover,NJ 07801-3522,NJ,7801
3,SubUrban Bar & Kitchen,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,,"500 Route 10 West, Randolph, NJ 07869-2148",https://www.sbknj.com/,500 Route 10 West,Randolph,NJ 07869-2148,NJ,7869
4,Verona Restaurant,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,"Italian,Vegetarian Friendly,Vegan Options","1171 Sussex Tpke, Randolph, NJ 07869-4225",http://www.veronarestaurant.com,1171 Sussex Tpke,Randolph,NJ 07869-4225,NJ,7869


In [35]:
# Drop unwanted columns and rename columns
tripadvisor_data_df = tripadvisor_data_df.drop(columns=['address', 'state_zip'])
tripadvisor_data_df = tripadvisor_data_df.rename(columns={'restaurant_rating':'tripadvisor_rating',
                                                          'price_level':'tripadvisor_price_level',
                                                          'cuisine':'tripadvisor_cuisine'
                                                         })
tripadvisor_data_df['restaurant_name'] = tripadvisor_data_df['restaurant_name'].str.strip()
tripadvisor_data_df['address1'] = tripadvisor_data_df['address1'].str.strip()

tripadvisor_data_df

Unnamed: 0,restaurant_name,tripadvisor_url,tripadvisor_rating,tripadvisor_price_level,tripadvisor_cuisine,restaurant_website,address1,city,state,zip code
0,Gourmet Cafe Restaurant,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,"Italian,Vegetarian Friendly,Vegan Options",http://gourmetcafenj.com,"136 Baldwin Rd On the Corner of Rt 46 W, Walgr...",Parsippany,NJ,7054
1,Sergio and Co Italian Specialties,https://www.tripadvisor.com/Restaurant_Review-...,5.0,$$ - $$$,"Italian,Vegetarian Friendly,Vegan Options",http://www.sergioandco.com,28 Broadway,Denville,NJ,7834
2,The Quiet Man,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,"American, Irish",http://www.quietmanpub.com,64 E McFarlan St,Dover,NJ,7801
3,SubUrban Bar & Kitchen,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,,https://www.sbknj.com/,500 Route 10 West,Randolph,NJ,7869
4,Verona Restaurant,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,"Italian,Vegetarian Friendly,Vegan Options",http://www.veronarestaurant.com,1171 Sussex Tpke,Randolph,NJ,7869


# Combining the Yelp and Google Data

In [36]:
# concat both yelp and google dataframes to get data from both dataframes
gy_all_df = pd.concat([yelp_df, google_data_df])
gy_all_df

Unnamed: 0,restaurant_name,yelp_url,yelp_rating,address1,restaurant_website,yelp_price_level,yelp_cuisine,city,state,zip code,google_price_level,google_rating,google_maps_url
0,Rosie’s Trattoria,https://www.yelp.com/biz/rosie-s-trattoria-ran...,4.5,1181 Sussex Tpke,,$$$,"Italian, Bars, Venues & Event Spaces",Randolph,NJ,07869,,,
1,SubUrban Bar & Kitchen,https://www.yelp.com/biz/suburban-bar-and-kitc...,4.0,500 NJ-10,sbknj.com,$$,"American (New), Bars, Pizza",Randolph,NJ,07869,,,
2,4 Seasons Mediterranean Restaurant,https://www.yelp.com/biz/4-seasons-mediterrane...,4.5,322 S Main St,4seasonswharton.com,$$,"Wine Bars, French, Italian",Wharton,NJ,07885,,,
3,Quiet Man Pub,https://www.yelp.com/biz/quiet-man-pub-dover-2...,4.5,64 E Mcfarlan St,quietmanpub.com,$$,"Irish Pub, Seafood, Tapas/Small Plates",Dover,NJ,07801,,,
4,Verona Restaurant,https://www.yelp.com/biz/verona-restaurant-ran...,4.5,1171 Sussex Tpke,veronarestaurant.com,$$,Italian,Randolph,NJ,07869,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
355,Anthony & Sons Bakery & Italian Deli,,,15 NJ-10,http://anthonyandsonsbakerynj.com/,,,Succasunna,NJ,07876,2,4.5,https://maps.google.com/?cid=1646938978706323838
356,Pizza Express Mt. Arlington,,,312 Howard Blvd,https://pizzaexpressmtarlington.com/,,,Mt Arlington,NJ,07856,1,3.7,https://maps.google.com/?cid=17474442911624320363
357,Shell,,,274 US-46,https://find.shell.com/us/fuel/12395193-274-rt...,,,Mine Hill Township,NJ,07803,2,3.1,https://maps.google.com/?cid=11162395169597474812
358,Dunkin',,,274 Rte 46,https://smart.link/abu8tr12sc1y6?utm_source=go...,,,Mine Hill Township,NJ,07803,1,4.2,https://maps.google.com/?cid=11489687353021894176


In [37]:
# merge yelp and google data on inner join to get the same restaurants in both tables
gy_common_df = pd.merge(yelp_df, google_data_df, on=['restaurant_name', 'address1', 'zip code'], how='inner')
gy_common_df

Unnamed: 0,restaurant_name,yelp_url,yelp_rating,address1,restaurant_website_x,yelp_price_level,yelp_cuisine,city_x,state_x,zip code,google_price_level,google_rating,google_maps_url,restaurant_website_y,city_y,state_y
0,Pollos Pucalor,https://www.yelp.com/biz/pollos-pucalor-dover?...,4.5,85 E Blackwell St,pollospucalor.com,$,Latin American,Dover,NJ,7801,1.0,4.4,https://maps.google.com/?cid=14298467328753486970,http://www.pollospucalor.com/,Dover,NJ
1,Pub 199,https://www.yelp.com/biz/pub-199-mount-arlingt...,3.5,199 Howard Blvd,,$$,"Steakhouses, Pubs",Mount Arlington,NJ,7856,2.0,4.2,https://maps.google.com/?cid=6036139170178877904,https://pub199nj.com/,Mt Arlington,NJ
2,Tabor Road Tavern,https://www.yelp.com/biz/tabor-road-tavern-mor...,3.5,510 Tabor Rd,taborroadtavern.com,$$$,American (New),Morris Plains,NJ,7950,3.0,4.4,https://maps.google.com/?cid=1798990059960816432,http://www.taborroadtavern.com/,Morris Plains,NJ
3,Hunan Taste Chinese Restaurant,https://www.yelp.com/biz/hunan-taste-chinese-r...,4.0,67 Bloomfield Ave,hunantaste.com,$$,"Chinese, Seafood, Soup",Denville,NJ,7834,2.0,4.6,https://maps.google.com/?cid=3183519225252686311,http://hunantaste.com/,Denville,NJ
4,Verde Ristorante,https://www.yelp.com/biz/verde-ristorante-morr...,4.0,1012 Tabor Rd,verderistorante.com,$$,Italian,Morris Plains,NJ,7950,2.0,4.5,https://maps.google.com/?cid=9976836420500350003,http://verderistorante.com/,Morris Plains,NJ
5,Rockaway River Barn,https://www.yelp.com/biz/rockaway-river-barn-r...,3.0,11 Green Pond Rd,rockawayriverbarn.com,$$,"American (New), Burgers, Sports Bars",Rockaway,NJ,7866,2.0,4.2,https://maps.google.com/?cid=12817532522511618573,http://www.rockawayriverbarn.com/,Rockaway,NJ
6,La Cucina,https://www.yelp.com/biz/la-cucina-denville?os...,4.0,278 Diamond Spring Rd,lacucina-denville.com,$$,Italian,Denville,NJ,7834,2.0,4.5,https://maps.google.com/?cid=1564351430727940054,http://www.lacucina-denville.com/,Denville,NJ
7,Veggie Heaven,https://www.yelp.com/biz/veggie-heaven-denvill...,4.5,57 Bloomfield Ave,veggieheavennj.com,$$,"Chinese, Sushi Bars, Salad",Denville,NJ,7834,2.0,4.6,https://maps.google.com/?cid=4921425093695790070,http://www.veggieheavennj.com/,Denville,NJ
8,Fresco Mexican,https://www.yelp.com/biz/fresco-mexican-cheste...,3.5,137 Main St,frescomexican.com,$$,"Mexican, Soup",Chester,NJ,7930,2.0,4.4,https://maps.google.com/?cid=6452456972668682316,http://www.frescomexican.com/,Chester,NJ
9,The Grand Cafe,https://www.yelp.com/biz/the-grand-cafe-morris...,4.0,42 Washington St,thegrandcafe.com,$$$,"American (New), French, Salad",Morristown,NJ,7960,,4.4,https://maps.google.com/?cid=3279464098397647907,http://www.thegrandcafe.com/,Morristown,NJ


In [38]:
# drop duplicate restaurants based on address1 and zip code
gy_common_df = gy_common_df.drop_duplicates(subset=['restaurant_name', 'address1', 'zip code'])
gy_common_df

Unnamed: 0,restaurant_name,yelp_url,yelp_rating,address1,restaurant_website_x,yelp_price_level,yelp_cuisine,city_x,state_x,zip code,google_price_level,google_rating,google_maps_url,restaurant_website_y,city_y,state_y
0,Pollos Pucalor,https://www.yelp.com/biz/pollos-pucalor-dover?...,4.5,85 E Blackwell St,pollospucalor.com,$,Latin American,Dover,NJ,7801,1.0,4.4,https://maps.google.com/?cid=14298467328753486970,http://www.pollospucalor.com/,Dover,NJ
1,Pub 199,https://www.yelp.com/biz/pub-199-mount-arlingt...,3.5,199 Howard Blvd,,$$,"Steakhouses, Pubs",Mount Arlington,NJ,7856,2.0,4.2,https://maps.google.com/?cid=6036139170178877904,https://pub199nj.com/,Mt Arlington,NJ
2,Tabor Road Tavern,https://www.yelp.com/biz/tabor-road-tavern-mor...,3.5,510 Tabor Rd,taborroadtavern.com,$$$,American (New),Morris Plains,NJ,7950,3.0,4.4,https://maps.google.com/?cid=1798990059960816432,http://www.taborroadtavern.com/,Morris Plains,NJ
3,Hunan Taste Chinese Restaurant,https://www.yelp.com/biz/hunan-taste-chinese-r...,4.0,67 Bloomfield Ave,hunantaste.com,$$,"Chinese, Seafood, Soup",Denville,NJ,7834,2.0,4.6,https://maps.google.com/?cid=3183519225252686311,http://hunantaste.com/,Denville,NJ
4,Verde Ristorante,https://www.yelp.com/biz/verde-ristorante-morr...,4.0,1012 Tabor Rd,verderistorante.com,$$,Italian,Morris Plains,NJ,7950,2.0,4.5,https://maps.google.com/?cid=9976836420500350003,http://verderistorante.com/,Morris Plains,NJ
5,Rockaway River Barn,https://www.yelp.com/biz/rockaway-river-barn-r...,3.0,11 Green Pond Rd,rockawayriverbarn.com,$$,"American (New), Burgers, Sports Bars",Rockaway,NJ,7866,2.0,4.2,https://maps.google.com/?cid=12817532522511618573,http://www.rockawayriverbarn.com/,Rockaway,NJ
6,La Cucina,https://www.yelp.com/biz/la-cucina-denville?os...,4.0,278 Diamond Spring Rd,lacucina-denville.com,$$,Italian,Denville,NJ,7834,2.0,4.5,https://maps.google.com/?cid=1564351430727940054,http://www.lacucina-denville.com/,Denville,NJ
7,Veggie Heaven,https://www.yelp.com/biz/veggie-heaven-denvill...,4.5,57 Bloomfield Ave,veggieheavennj.com,$$,"Chinese, Sushi Bars, Salad",Denville,NJ,7834,2.0,4.6,https://maps.google.com/?cid=4921425093695790070,http://www.veggieheavennj.com/,Denville,NJ
8,Fresco Mexican,https://www.yelp.com/biz/fresco-mexican-cheste...,3.5,137 Main St,frescomexican.com,$$,"Mexican, Soup",Chester,NJ,7930,2.0,4.4,https://maps.google.com/?cid=6452456972668682316,http://www.frescomexican.com/,Chester,NJ
9,The Grand Cafe,https://www.yelp.com/biz/the-grand-cafe-morris...,4.0,42 Washington St,thegrandcafe.com,$$$,"American (New), French, Salad",Morristown,NJ,7960,,4.4,https://maps.google.com/?cid=3279464098397647907,http://www.thegrandcafe.com/,Morristown,NJ


In [39]:
# drop unwanted columns and rename rest of the columns
gy_common_df = gy_common_df.drop(columns=['restaurant_website_x','city_x','state_x'])
gy_common_df = gy_common_df.rename(columns={'restaurant_website_y':'restaurant_website',
                                                          'city_y':'city',
                                                          'state_y':'state'})
gy_common_df

Unnamed: 0,restaurant_name,yelp_url,yelp_rating,address1,yelp_price_level,yelp_cuisine,zip code,google_price_level,google_rating,google_maps_url,restaurant_website,city,state
0,Pollos Pucalor,https://www.yelp.com/biz/pollos-pucalor-dover?...,4.5,85 E Blackwell St,$,Latin American,7801,1.0,4.4,https://maps.google.com/?cid=14298467328753486970,http://www.pollospucalor.com/,Dover,NJ
1,Pub 199,https://www.yelp.com/biz/pub-199-mount-arlingt...,3.5,199 Howard Blvd,$$,"Steakhouses, Pubs",7856,2.0,4.2,https://maps.google.com/?cid=6036139170178877904,https://pub199nj.com/,Mt Arlington,NJ
2,Tabor Road Tavern,https://www.yelp.com/biz/tabor-road-tavern-mor...,3.5,510 Tabor Rd,$$$,American (New),7950,3.0,4.4,https://maps.google.com/?cid=1798990059960816432,http://www.taborroadtavern.com/,Morris Plains,NJ
3,Hunan Taste Chinese Restaurant,https://www.yelp.com/biz/hunan-taste-chinese-r...,4.0,67 Bloomfield Ave,$$,"Chinese, Seafood, Soup",7834,2.0,4.6,https://maps.google.com/?cid=3183519225252686311,http://hunantaste.com/,Denville,NJ
4,Verde Ristorante,https://www.yelp.com/biz/verde-ristorante-morr...,4.0,1012 Tabor Rd,$$,Italian,7950,2.0,4.5,https://maps.google.com/?cid=9976836420500350003,http://verderistorante.com/,Morris Plains,NJ
5,Rockaway River Barn,https://www.yelp.com/biz/rockaway-river-barn-r...,3.0,11 Green Pond Rd,$$,"American (New), Burgers, Sports Bars",7866,2.0,4.2,https://maps.google.com/?cid=12817532522511618573,http://www.rockawayriverbarn.com/,Rockaway,NJ
6,La Cucina,https://www.yelp.com/biz/la-cucina-denville?os...,4.0,278 Diamond Spring Rd,$$,Italian,7834,2.0,4.5,https://maps.google.com/?cid=1564351430727940054,http://www.lacucina-denville.com/,Denville,NJ
7,Veggie Heaven,https://www.yelp.com/biz/veggie-heaven-denvill...,4.5,57 Bloomfield Ave,$$,"Chinese, Sushi Bars, Salad",7834,2.0,4.6,https://maps.google.com/?cid=4921425093695790070,http://www.veggieheavennj.com/,Denville,NJ
8,Fresco Mexican,https://www.yelp.com/biz/fresco-mexican-cheste...,3.5,137 Main St,$$,"Mexican, Soup",7930,2.0,4.4,https://maps.google.com/?cid=6452456972668682316,http://www.frescomexican.com/,Chester,NJ
9,The Grand Cafe,https://www.yelp.com/biz/the-grand-cafe-morris...,4.0,42 Washington St,$$$,"American (New), French, Salad",7960,,4.4,https://maps.google.com/?cid=3279464098397647907,http://www.thegrandcafe.com/,Morristown,NJ


In [40]:
# combine the table with all the restaurants with the table with common restaurants
gy_df = pd.concat([gy_all_df, gy_common_df])
gy_df

Unnamed: 0,restaurant_name,yelp_url,yelp_rating,address1,restaurant_website,yelp_price_level,yelp_cuisine,city,state,zip code,google_price_level,google_rating,google_maps_url
0,Rosie’s Trattoria,https://www.yelp.com/biz/rosie-s-trattoria-ran...,4.5,1181 Sussex Tpke,,$$$,"Italian, Bars, Venues & Event Spaces",Randolph,NJ,07869,,,
1,SubUrban Bar & Kitchen,https://www.yelp.com/biz/suburban-bar-and-kitc...,4.0,500 NJ-10,sbknj.com,$$,"American (New), Bars, Pizza",Randolph,NJ,07869,,,
2,4 Seasons Mediterranean Restaurant,https://www.yelp.com/biz/4-seasons-mediterrane...,4.5,322 S Main St,4seasonswharton.com,$$,"Wine Bars, French, Italian",Wharton,NJ,07885,,,
3,Quiet Man Pub,https://www.yelp.com/biz/quiet-man-pub-dover-2...,4.5,64 E Mcfarlan St,quietmanpub.com,$$,"Irish Pub, Seafood, Tapas/Small Plates",Dover,NJ,07801,,,
4,Verona Restaurant,https://www.yelp.com/biz/verona-restaurant-ran...,4.5,1171 Sussex Tpke,veronarestaurant.com,$$,Italian,Randolph,NJ,07869,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
12,Anthony & Sons Bakery,https://www.yelp.com/biz/anthony-and-sons-bake...,4.0,20 Luger Rd,http://anthonyandsonsbakerynj.com/,$$,"Bakeries, Delis, Grocery",Denville,NJ,07834,2,4.5,https://maps.google.com/?cid=12111564385776009695
13,Jefferson House,https://www.yelp.com/biz/jefferson-house-lake-...,2.0,139 Nolans Point Rd,http://www.jefferson-house.com/,$$,American (Traditional),Lake Hopatcong,NJ,07849,2,3.6,https://maps.google.com/?cid=13778617519495280818
14,Olive Garden Italian Restaurant,https://www.yelp.com/biz/olive-garden-italian-...,3.0,323 Mt Hope Ave,https://www.olivegarden.com/locations/nj/rocka...,$$,"Italian, Salad, Soup",Rockaway,NJ,07866,2,4.2,https://maps.google.com/?cid=15112342085930840195
15,Mehndi,https://www.yelp.com/biz/mehndi-morristown-6?o...,4.0,3 Speedwell Ave,http://www.mehndimorristown.com/,$$,Indian,Morristown,NJ,07960,3,4.1,https://maps.google.com/?cid=3543060601759244459


## Google and Yelp Restaurant Data with unique restaurants

In [41]:
# drop the first occurance of common restaurants, last occurance contains the rows with both yelp and google data
gy_final_df = gy_df.drop_duplicates(subset=['restaurant_name', 'address1', 'zip code'], keep='last')
gy_final_df

Unnamed: 0,restaurant_name,yelp_url,yelp_rating,address1,restaurant_website,yelp_price_level,yelp_cuisine,city,state,zip code,google_price_level,google_rating,google_maps_url
0,Rosie’s Trattoria,https://www.yelp.com/biz/rosie-s-trattoria-ran...,4.5,1181 Sussex Tpke,,$$$,"Italian, Bars, Venues & Event Spaces",Randolph,NJ,07869,,,
1,SubUrban Bar & Kitchen,https://www.yelp.com/biz/suburban-bar-and-kitc...,4.0,500 NJ-10,sbknj.com,$$,"American (New), Bars, Pizza",Randolph,NJ,07869,,,
2,4 Seasons Mediterranean Restaurant,https://www.yelp.com/biz/4-seasons-mediterrane...,4.5,322 S Main St,4seasonswharton.com,$$,"Wine Bars, French, Italian",Wharton,NJ,07885,,,
3,Quiet Man Pub,https://www.yelp.com/biz/quiet-man-pub-dover-2...,4.5,64 E Mcfarlan St,quietmanpub.com,$$,"Irish Pub, Seafood, Tapas/Small Plates",Dover,NJ,07801,,,
4,Verona Restaurant,https://www.yelp.com/biz/verona-restaurant-ran...,4.5,1171 Sussex Tpke,veronarestaurant.com,$$,Italian,Randolph,NJ,07869,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
12,Anthony & Sons Bakery,https://www.yelp.com/biz/anthony-and-sons-bake...,4.0,20 Luger Rd,http://anthonyandsonsbakerynj.com/,$$,"Bakeries, Delis, Grocery",Denville,NJ,07834,2,4.5,https://maps.google.com/?cid=12111564385776009695
13,Jefferson House,https://www.yelp.com/biz/jefferson-house-lake-...,2.0,139 Nolans Point Rd,http://www.jefferson-house.com/,$$,American (Traditional),Lake Hopatcong,NJ,07849,2,3.6,https://maps.google.com/?cid=13778617519495280818
14,Olive Garden Italian Restaurant,https://www.yelp.com/biz/olive-garden-italian-...,3.0,323 Mt Hope Ave,https://www.olivegarden.com/locations/nj/rocka...,$$,"Italian, Salad, Soup",Rockaway,NJ,07866,2,4.2,https://maps.google.com/?cid=15112342085930840195
15,Mehndi,https://www.yelp.com/biz/mehndi-morristown-6?o...,4.0,3 Speedwell Ave,http://www.mehndimorristown.com/,$$,Indian,Morristown,NJ,07960,3,4.1,https://maps.google.com/?cid=3543060601759244459


# Combining the Trip Advisor with combined Yelp and Google Data

In [42]:
# concat both combined yelp and google dataframes and trip advisor data to get data from all dataframes
restaurant_all_df = pd.concat([gy_final_df, tripadvisor_data_df])
restaurant_all_df

Unnamed: 0,restaurant_name,yelp_url,yelp_rating,address1,restaurant_website,yelp_price_level,yelp_cuisine,city,state,zip code,google_price_level,google_rating,google_maps_url,tripadvisor_url,tripadvisor_rating,tripadvisor_price_level,tripadvisor_cuisine
0,Rosie’s Trattoria,https://www.yelp.com/biz/rosie-s-trattoria-ran...,4.5,1181 Sussex Tpke,,$$$,"Italian, Bars, Venues & Event Spaces",Randolph,NJ,07869,,,,,,,
1,SubUrban Bar & Kitchen,https://www.yelp.com/biz/suburban-bar-and-kitc...,4.0,500 NJ-10,sbknj.com,$$,"American (New), Bars, Pizza",Randolph,NJ,07869,,,,,,,
2,4 Seasons Mediterranean Restaurant,https://www.yelp.com/biz/4-seasons-mediterrane...,4.5,322 S Main St,4seasonswharton.com,$$,"Wine Bars, French, Italian",Wharton,NJ,07885,,,,,,,
3,Quiet Man Pub,https://www.yelp.com/biz/quiet-man-pub-dover-2...,4.5,64 E Mcfarlan St,quietmanpub.com,$$,"Irish Pub, Seafood, Tapas/Small Plates",Dover,NJ,07801,,,,,,,
4,Verona Restaurant,https://www.yelp.com/biz/verona-restaurant-ran...,4.5,1171 Sussex Tpke,veronarestaurant.com,$$,Italian,Randolph,NJ,07869,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,Gourmet Cafe Restaurant,,,"136 Baldwin Rd On the Corner of Rt 46 W, Walgr...",http://gourmetcafenj.com,,,Parsippany,NJ,07054,,,,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,"Italian,Vegetarian Friendly,Vegan Options"
1,Sergio and Co Italian Specialties,,,28 Broadway,http://www.sergioandco.com,,,Denville,NJ,07834,,,,https://www.tripadvisor.com/Restaurant_Review-...,5.0,$$ - $$$,"Italian,Vegetarian Friendly,Vegan Options"
2,The Quiet Man,,,64 E McFarlan St,http://www.quietmanpub.com,,,Dover,NJ,07801,,,,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,"American, Irish"
3,SubUrban Bar & Kitchen,,,500 Route 10 West,https://www.sbknj.com/,,,Randolph,NJ,07869,,,,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,


In [43]:
# merge yelp and google dataframes and trip advisor data on inner join to get the same restaurants in all tables
restaurant_common_df = pd.merge(gy_final_df, tripadvisor_data_df, on=['restaurant_name', 'address1', 'zip code'], how='inner')
restaurant_common_df

Unnamed: 0,restaurant_name,yelp_url,yelp_rating,address1,restaurant_website_x,yelp_price_level,yelp_cuisine,city_x,state_x,zip code,google_price_level,google_rating,google_maps_url,tripadvisor_url,tripadvisor_rating,tripadvisor_price_level,tripadvisor_cuisine,restaurant_website_y,city_y,state_y
0,Verona Restaurant,https://www.yelp.com/biz/verona-restaurant-ran...,4.5,1171 Sussex Tpke,veronarestaurant.com,$$,Italian,Randolph,NJ,7869,,,,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,"Italian,Vegetarian Friendly,Vegan Options",http://www.veronarestaurant.com,Randolph,NJ


In [44]:
# drop duplicate restaurants based on address1 and zip code
restaurant_common_df = restaurant_common_df.drop_duplicates(subset=['restaurant_name', 'address1', 'zip code'])
restaurant_common_df

Unnamed: 0,restaurant_name,yelp_url,yelp_rating,address1,restaurant_website_x,yelp_price_level,yelp_cuisine,city_x,state_x,zip code,google_price_level,google_rating,google_maps_url,tripadvisor_url,tripadvisor_rating,tripadvisor_price_level,tripadvisor_cuisine,restaurant_website_y,city_y,state_y
0,Verona Restaurant,https://www.yelp.com/biz/verona-restaurant-ran...,4.5,1171 Sussex Tpke,veronarestaurant.com,$$,Italian,Randolph,NJ,7869,,,,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,"Italian,Vegetarian Friendly,Vegan Options",http://www.veronarestaurant.com,Randolph,NJ


In [45]:
# drop unwanted columns and rename rest of the columns
restaurant_common_df = restaurant_common_df.drop(columns=['restaurant_website_x','city_x','state_x'])
restaurant_common_df = restaurant_common_df.rename(columns={'restaurant_website_y':'restaurant_website',
                                                          'city_y':'city',
                                                          'state_y':'state'})
restaurant_common_df

Unnamed: 0,restaurant_name,yelp_url,yelp_rating,address1,yelp_price_level,yelp_cuisine,zip code,google_price_level,google_rating,google_maps_url,tripadvisor_url,tripadvisor_rating,tripadvisor_price_level,tripadvisor_cuisine,restaurant_website,city,state
0,Verona Restaurant,https://www.yelp.com/biz/verona-restaurant-ran...,4.5,1171 Sussex Tpke,$$,Italian,7869,,,,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,"Italian,Vegetarian Friendly,Vegan Options",http://www.veronarestaurant.com,Randolph,NJ


In [46]:
# combine the table with all the restaurants with the table with common restaurants
restaurant_df = pd.concat([restaurant_all_df, restaurant_common_df])
restaurant_df

Unnamed: 0,restaurant_name,yelp_url,yelp_rating,address1,restaurant_website,yelp_price_level,yelp_cuisine,city,state,zip code,google_price_level,google_rating,google_maps_url,tripadvisor_url,tripadvisor_rating,tripadvisor_price_level,tripadvisor_cuisine
0,Rosie’s Trattoria,https://www.yelp.com/biz/rosie-s-trattoria-ran...,4.5,1181 Sussex Tpke,,$$$,"Italian, Bars, Venues & Event Spaces",Randolph,NJ,07869,,,,,,,
1,SubUrban Bar & Kitchen,https://www.yelp.com/biz/suburban-bar-and-kitc...,4.0,500 NJ-10,sbknj.com,$$,"American (New), Bars, Pizza",Randolph,NJ,07869,,,,,,,
2,4 Seasons Mediterranean Restaurant,https://www.yelp.com/biz/4-seasons-mediterrane...,4.5,322 S Main St,4seasonswharton.com,$$,"Wine Bars, French, Italian",Wharton,NJ,07885,,,,,,,
3,Quiet Man Pub,https://www.yelp.com/biz/quiet-man-pub-dover-2...,4.5,64 E Mcfarlan St,quietmanpub.com,$$,"Irish Pub, Seafood, Tapas/Small Plates",Dover,NJ,07801,,,,,,,
4,Verona Restaurant,https://www.yelp.com/biz/verona-restaurant-ran...,4.5,1171 Sussex Tpke,veronarestaurant.com,$$,Italian,Randolph,NJ,07869,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,Sergio and Co Italian Specialties,,,28 Broadway,http://www.sergioandco.com,,,Denville,NJ,07834,,,,https://www.tripadvisor.com/Restaurant_Review-...,5.0,$$ - $$$,"Italian,Vegetarian Friendly,Vegan Options"
2,The Quiet Man,,,64 E McFarlan St,http://www.quietmanpub.com,,,Dover,NJ,07801,,,,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,"American, Irish"
3,SubUrban Bar & Kitchen,,,500 Route 10 West,https://www.sbknj.com/,,,Randolph,NJ,07869,,,,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,
4,Verona Restaurant,,,1171 Sussex Tpke,http://www.veronarestaurant.com,,,Randolph,NJ,07869,,,,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,"Italian,Vegetarian Friendly,Vegan Options"


## Google, Yelp and Trip Advisor Restaurant Data with unique restaurants

In [47]:
# drop the first occurance of common restaurants, last occurance contains the rows with both yelp and google data
restaurant_final_df = restaurant_df.drop_duplicates(subset=['restaurant_name', 'address1', 'zip code'], keep='last')

# add the resturant_id column for unique identifier
restaurant_final_df['restaurant_id'] = range(1, len(restaurant_final_df) + 1)
restaurant_final_df = restaurant_final_df.fillna("")
restaurant_final_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


Unnamed: 0,restaurant_name,yelp_url,yelp_rating,address1,restaurant_website,yelp_price_level,yelp_cuisine,city,state,zip code,google_price_level,google_rating,google_maps_url,tripadvisor_url,tripadvisor_rating,tripadvisor_price_level,tripadvisor_cuisine,restaurant_id
0,Rosie’s Trattoria,https://www.yelp.com/biz/rosie-s-trattoria-ran...,4.5,1181 Sussex Tpke,,$$$,"Italian, Bars, Venues & Event Spaces",Randolph,NJ,07869,,,,,,,,1
1,SubUrban Bar & Kitchen,https://www.yelp.com/biz/suburban-bar-and-kitc...,4,500 NJ-10,sbknj.com,$$,"American (New), Bars, Pizza",Randolph,NJ,07869,,,,,,,,2
2,4 Seasons Mediterranean Restaurant,https://www.yelp.com/biz/4-seasons-mediterrane...,4.5,322 S Main St,4seasonswharton.com,$$,"Wine Bars, French, Italian",Wharton,NJ,07885,,,,,,,,3
3,Quiet Man Pub,https://www.yelp.com/biz/quiet-man-pub-dover-2...,4.5,64 E Mcfarlan St,quietmanpub.com,$$,"Irish Pub, Seafood, Tapas/Small Plates",Dover,NJ,07801,,,,,,,,4
5,El Lechon De Negron,https://www.yelp.com/biz/el-lechon-de-negron-d...,4,23 E Main St,,,"Puerto Rican, Empanadas",Denville,NJ,07834,,,,,,,,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,Gourmet Cafe Restaurant,,,"136 Baldwin Rd On the Corner of Rt 46 W, Walgr...",http://gourmetcafenj.com,,,Parsippany,NJ,07054,,,,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,"Italian,Vegetarian Friendly,Vegan Options",464
1,Sergio and Co Italian Specialties,,,28 Broadway,http://www.sergioandco.com,,,Denville,NJ,07834,,,,https://www.tripadvisor.com/Restaurant_Review-...,5,$$ - $$$,"Italian,Vegetarian Friendly,Vegan Options",465
2,The Quiet Man,,,64 E McFarlan St,http://www.quietmanpub.com,,,Dover,NJ,07801,,,,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,"American, Irish",466
3,SubUrban Bar & Kitchen,,,500 Route 10 West,https://www.sbknj.com/,,,Randolph,NJ,07869,,,,https://www.tripadvisor.com/Restaurant_Review-...,4.5,$$ - $$$,,467


# Separating tables

## Final Google table

In [48]:
# search for rows that have a google_maps_url
google_final_df = restaurant_final_df[restaurant_final_df.google_maps_url != ""]

# drop unwanted columns
google_final_df = google_final_df.drop(columns=['restaurant_name', 'yelp_url', 'yelp_rating', 'yelp_price_level', 'yelp_cuisine', 'tripadvisor_url', 'tripadvisor_rating', 'tripadvisor_price_level', 'tripadvisor_cuisine', 'address1', 'restaurant_website', 'city', 'state', 'zip code'])
google_final_df = google_final_df[['restaurant_id', 'google_price_level', 'google_rating', 'google_maps_url']]
google_final_df

Unnamed: 0,restaurant_id,google_price_level,google_rating,google_maps_url
0,213,1,3.7,https://maps.google.com/?cid=15487458085899557598
1,214,2,4.2,https://maps.google.com/?cid=8506076560359997359
2,215,1,4.2,https://maps.google.com/?cid=11876493694413647283
4,216,1,3.4,https://maps.google.com/?cid=9758189877393522658
5,217,2,4.2,https://maps.google.com/?cid=4442482484205730940
...,...,...,...,...
12,459,2,4.5,https://maps.google.com/?cid=12111564385776009695
13,460,2,3.6,https://maps.google.com/?cid=13778617519495280818
14,461,2,4.2,https://maps.google.com/?cid=15112342085930840195
15,462,3,4.1,https://maps.google.com/?cid=3543060601759244459


## Final Yelp table

In [49]:
# search for rows that have a yelp_url
yelp_final_df = restaurant_final_df[restaurant_final_df.yelp_url != ""]

# drop unwanted columns
yelp_final_df = yelp_final_df.drop(columns=['google_price_level', 'google_rating', 'google_maps_url', 'tripadvisor_url', 'tripadvisor_rating', 'tripadvisor_price_level', 'tripadvisor_cuisine', 'address1', 'restaurant_website', 'city', 'state', 'zip code', 'restaurant_name'])
yelp_final_df = yelp_final_df[['restaurant_id', 'yelp_price_level', 'yelp_rating', 'yelp_url', 'yelp_cuisine']]

yelp_final_df

Unnamed: 0,restaurant_id,yelp_price_level,yelp_rating,yelp_url,yelp_cuisine
0,1,$$$,4.5,https://www.yelp.com/biz/rosie-s-trattoria-ran...,"Italian, Bars, Venues & Event Spaces"
1,2,$$,4,https://www.yelp.com/biz/suburban-bar-and-kitc...,"American (New), Bars, Pizza"
2,3,$$,4.5,https://www.yelp.com/biz/4-seasons-mediterrane...,"Wine Bars, French, Italian"
3,4,$$,4.5,https://www.yelp.com/biz/quiet-man-pub-dover-2...,"Irish Pub, Seafood, Tapas/Small Plates"
5,5,,4,https://www.yelp.com/biz/el-lechon-de-negron-d...,"Puerto Rican, Empanadas"
...,...,...,...,...,...
13,460,$$,2,https://www.yelp.com/biz/jefferson-house-lake-...,American (Traditional)
14,461,$$,3,https://www.yelp.com/biz/olive-garden-italian-...,"Italian, Salad, Soup"
15,462,$$,4,https://www.yelp.com/biz/mehndi-morristown-6?o...,Indian
16,463,$,4.5,https://www.yelp.com/biz/bryans-luncheonette-s...,"Breakfast & Brunch, Coffee & Tea, Sandwiches"


## Cuisine table from Yelp

In [50]:
# get the list of all available cuisines
cuisine_series = yelp_final_df['yelp_cuisine'].str.split(',')
cuisine_list = [item.strip() for cuisine_sublist in cuisine_series for item in cuisine_sublist]
cuisine_list = list(set(cuisine_list)) 
cuisine_list

['Peruvian',
 'Pizza',
 'Bars',
 'Fast Food',
 'Desserts',
 'American (Traditional)',
 'Spanish',
 'Malaysian',
 'Beer Bar',
 'Gluten-Free',
 'Mediterranean',
 'Indian',
 'Acai Bowls',
 'Korean',
 'Wine Bars',
 'Salvadoran',
 'Pasta Shops',
 'Steakhouses',
 'Iberian',
 'Gastropubs',
 'Sandwiches',
 'Greek',
 'Barbeque',
 'Breakfast & Brunch',
 'Chinese',
 'Kebab',
 'Irish',
 'Bubble Tea',
 'Caterers',
 'Comfort Food',
 'Sushi Bars',
 'Music Venues',
 'Halal',
 'Vegetarian',
 'Diners',
 'Colombian',
 'Polish',
 'Pubs',
 'Juice Bars & Smoothies',
 'Buffets',
 'Vietnamese',
 'Seafood',
 'Caribbean',
 'Asian Fusion',
 'Irish Pub',
 'Grocery',
 'Turkish',
 'Delis',
 'Italian',
 'Bakeries',
 'Cafes',
 'Meat Shops',
 'Food Trucks',
 'Coffee & Tea',
 'Hot Dogs',
 'Restaurants',
 'Bagels',
 'Mexican',
 'Cheese Shops',
 'German',
 'Burgers',
 'Sports Bars',
 'Afghan',
 'Puerto Rican',
 'Soul Food',
 'British',
 'Cheesesteaks',
 'Soup',
 'Tacos',
 'Vegan',
 'Portuguese',
 'Latin American',
 'Midd

In [51]:
# create the cuisine table
rest_id = []
cuisine = []
for item, row in restaurant_final_df.iterrows():
    # if there is a cuisine entry
    if row['yelp_cuisine'] != "":
        # find the cuisines and add to list
        rest_cuisine = map(str.strip, row['yelp_cuisine'].split(","))
        # search for the restaurant listed cuisines in the cuisine list
        matches = set(rest_cuisine).intersection(cuisine_list)
        # add the cuisine and restaurant id to list
        for match in matches:
            cuisine.append(match)
            rest_id.append(row['restaurant_id'])

# add lists to the dataframe
cuisine_df = pd.DataFrame({'cuisine':cuisine,
                          'restaurant_id':rest_id})
# drop cuisine column from yelp data
yelp_final_df = yelp_final_df.drop(columns=['yelp_cuisine'])
cuisine_df

Unnamed: 0,cuisine,restaurant_id
0,Venues & Event Spaces,1
1,Italian,1
2,Bars,1
3,Pizza,2
4,Bars,2
...,...,...
453,Indian,462
454,Sandwiches,463
455,Coffee & Tea,463
456,Breakfast & Brunch,463


## Final Trip Advisor table

In [52]:
# search for rows that have a tripadvisor_url
ta_final_df = restaurant_final_df[restaurant_final_df.tripadvisor_url != ""]

# drop unwanted columns
ta_final_df = ta_final_df.drop(columns=['google_price_level', 'google_rating', 'google_maps_url', 'yelp_price_level', 'yelp_rating', 'yelp_url', 'yelp_cuisine', 'address1', 'restaurant_website', 'city', 'state', 'zip code', 'restaurant_name'])
ta_final_df = ta_final_df[['restaurant_id', 'tripadvisor_price_level', 'tripadvisor_rating', 'tripadvisor_url', 'tripadvisor_cuisine']]

ta_final_df

Unnamed: 0,restaurant_id,tripadvisor_price_level,tripadvisor_rating,tripadvisor_url,tripadvisor_cuisine
0,464,$$ - $$$,4.5,https://www.tripadvisor.com/Restaurant_Review-...,"Italian,Vegetarian Friendly,Vegan Options"
1,465,$$ - $$$,5.0,https://www.tripadvisor.com/Restaurant_Review-...,"Italian,Vegetarian Friendly,Vegan Options"
2,466,$$ - $$$,4.5,https://www.tripadvisor.com/Restaurant_Review-...,"American, Irish"
3,467,$$ - $$$,4.5,https://www.tripadvisor.com/Restaurant_Review-...,
0,468,$$ - $$$,4.5,https://www.tripadvisor.com/Restaurant_Review-...,"Italian,Vegetarian Friendly,Vegan Options"


### Cuisine from Trip Advisor

In [53]:
# get the list of all available cuisines
cuisine_series = ta_final_df['tripadvisor_cuisine'].str.split(',')
cuisine_list = [item.strip() for cuisine_sublist in cuisine_series for item in cuisine_sublist]
cuisine_list = list(set(cuisine_list)) 
cuisine_list

['', 'Vegetarian Friendly', 'Vegan Options', 'Irish', 'Italian', 'American']

In [54]:
# add trip advisor cuisines the cuisine table
for item, row in restaurant_final_df.iterrows():
    # if there is a cuisine entry
    if row['tripadvisor_cuisine'] != "":
        # find the cuisines and add to list
        rest_cuisine = map(str.strip, row['tripadvisor_cuisine'].split(","))
        # search for the restaurant listed cuisines in the cuisine list
        matches = set(rest_cuisine).intersection(cuisine_list)
        # add the cuisine and restaurant id to cuisine dataframe
        for match in matches:
            cuisine_df.append({'cuisine':match, 'restaurant_id':row['restaurant_id']},ignore_index=True,sort=False)

# drop cuisine column from yelp data
ta_final_df = ta_final_df.drop(columns=['tripadvisor_cuisine'])
cuisine_df = cuisine_df.drop_duplicates()
cuisine_df

Unnamed: 0,cuisine,restaurant_id
0,Venues & Event Spaces,1
1,Italian,1
2,Bars,1
3,Pizza,2
4,Bars,2
...,...,...
453,Indian,462
454,Sandwiches,463
455,Coffee & Tea,463
456,Breakfast & Brunch,463


## Final Restaurant table

In [55]:
# drop unwanted columns
restaurant_final_df = restaurant_final_df.drop(columns=['google_price_level', 'google_rating', 'google_maps_url', 'yelp_url', 'yelp_rating', 'yelp_price_level', 'yelp_cuisine'])
restaurant_final_df = restaurant_final_df[['restaurant_id', 'restaurant_name', 'restaurant_website', 'address1', 'city', 'state', 'zip code']]
restaurant_final_df

Unnamed: 0,restaurant_id,restaurant_name,restaurant_website,address1,city,state,zip code
0,1,Rosie’s Trattoria,,1181 Sussex Tpke,Randolph,NJ,07869
1,2,SubUrban Bar & Kitchen,sbknj.com,500 NJ-10,Randolph,NJ,07869
2,3,4 Seasons Mediterranean Restaurant,4seasonswharton.com,322 S Main St,Wharton,NJ,07885
3,4,Quiet Man Pub,quietmanpub.com,64 E Mcfarlan St,Dover,NJ,07801
5,5,El Lechon De Negron,,23 E Main St,Denville,NJ,07834
...,...,...,...,...,...,...,...
0,464,Gourmet Cafe Restaurant,http://gourmetcafenj.com,"136 Baldwin Rd On the Corner of Rt 46 W, Walgr...",Parsippany,NJ,07054
1,465,Sergio and Co Italian Specialties,http://www.sergioandco.com,28 Broadway,Denville,NJ,07834
2,466,The Quiet Man,http://www.quietmanpub.com,64 E McFarlan St,Dover,NJ,07801
3,467,SubUrban Bar & Kitchen,https://www.sbknj.com/,500 Route 10 West,Randolph,NJ,07869


In [56]:
# save to csv files
google_final_df.to_csv('../Resources/google_final_data.csv', index=False)
yelp_final_df.to_csv('../Resources/yelp_final_data.csv', index=False)
ta_final_df.to_csv('../Resources/tripadvisor_final_data.csv', index=False)
cuisine_df.to_csv('../Resources/cuisine_data.csv', index=False)
restaurant_final_df.to_csv('../Resources/restaurant_final_data.csv', index=False)