In [14]:
import pandas as pd
import numpy as np
import html
import ast
from pprint import pprint
from tqdm import tqdm
import re
import json
data = pd.read_csv('zomato.csv')

## Data Cleanup

### Cleaning up reviews

In [2]:
def clean_review(review):
    review = html.unescape(review)
    review = review.encode().decode('unicode_escape')
    review = review.replace("\n", "")
    try:
        review = ast.literal_eval(review)
    except Exception as e:
        review = review.strip('][')
        review = review.strip('()')
        review = review.split('), (')
        try:
            review = [(item[:11].strip("'"), ' '.join(re.findall(r"[\w%\-.']+", item[14:-1].strip('"')))) for item in review]
        except Exception as e:
            print(review)
    return review

In [16]:
for column in list(data.columns.values):
    data[column] = data[column].apply(str)

In [23]:
data_url_review = data[['url', 'name','address','reviews_list']]

### Removing Duplicates

In [28]:
data_cleaned = data.groupby(['name', 'address']).agg({'online_order' : ','.join,
         'book_table' : ','.join,
         'rate': ','.join,
         'votes': ','.join,
         'location': ','.join, 
         'rest_type': ','.join,
         'dish_liked': ','.join,
         'cuisines': ','.join,
         'approx_cost(for two people)': ','.join,
         'menu_item': ','.join,
         'listed_in(city)': ','.join,
         'listed_in(type)': ','.join}).reset_index()

In [29]:
data_cleaned

Unnamed: 0,name,address,online_order,book_table,rate,votes,location,rest_type,dish_liked,cuisines,approx_cost(for two people),menu_item,listed_in(city),listed_in(type)
0,#FeelTheROLL,"Opposite Mantri Commercio, Outer Ring Road, De...","No,No","No,No","3.4/5,3.4 /5",77,"Bellandur,Bellandur","Quick Bites,Quick Bites","nan,nan","Fast Food,Fast Food",200200,"[],[]","Bellandur,Sarjapur Road","Delivery,Delivery"
1,#L-81 Cafe,"Sector 6, HSR Layout, HSR","Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes","No,No,No,No,No,No,No,No,No","3.9/5,3.9/5,3.9/5,3.9/5,3.9/5,3.9 /5,3.9 /5,3....",484848484848484848,"HSR,HSR,HSR,HSR,HSR,HSR,HSR,HSR,HSR","Quick Bites,Quick Bites,Quick Bites,Quick Bite...","Burgers,Burgers,Burgers,Burgers,Burgers,Burger...","Fast Food, Beverages,Fast Food, Beverages,Fast...",400400400400400400400400400,"[],[],[],[],[],[],[],[],[]","BTM,BTM,HSR,HSR,Koramangala 4th Block,Koramang...","Delivery,Dine-out,Delivery,Dine-out,Delivery,D..."
2,#Vibes Restro,"Marasur Gate, Chandapura - Anekal Road, Near A...","No,No,No","No,No,No","nan,nan,nan",000,"Electronic City,Electronic City,Electronic City","Casual Dining,Casual Dining,Casual Dining","nan,nan,nan","Continental, Chinese, Italian,Continental, Chi...",700700700,"[],[],[]","Electronic City,Electronic City,Electronic City","Buffet,Delivery,Dine-out"
3,#refuel,"7, Ground Floor, RR Commercial Complex, Akshay...","Yes,Yes,Yes","No,No,No","3.7/5,3.7/5,3.7/5",373737,"Bannerghatta Road,Bannerghatta Road,Bannerghat...","Cafe,Cafe,Cafe","Thick Shakes, Sandwiches, Pasta, Mocktails,Thi...","Cafe, Beverages,Cafe, Beverages,Cafe, Beverages",400400400,"['Kit Kat Thick Shake', 'Ferrero Rocher Thick ...","Bannerghatta Road,Bannerghatta Road,Bannerghat...","Cafes,Delivery,Dine-out"
4,'Brahmins' Thatte Idli,"19, 1st main, 2nd cross, 3rd stage, 3rd block,...",Yes,No,,0,Basaveshwara Nagar,Quick Bites,,South Indian,100,"['Masala Dosa', 'Set Dosa', 'Shavige Bhath', '...",Rajajinagar,Dine-out
5,1 Fahreheit,"Nishika Arcade, Nanjappa Main Road, Next to Jo...",No,No,,0,New BEL Road,Dessert Parlor,,"Desserts, Ice Cream",200,[],New BEL Road,Desserts
6,1000 B.C,"16, 17th A Main, Koramangala 5th Block, Bangalore","Yes,Yes,Yes,Yes,Yes,Yes","No,No,No,No,No,No","3.2/5,3.2/5,3.2/5,3.2 /5,3.2 /5,3.2 /5",494949494949,"Koramangala 5th Block,Koramangala 5th Block,Ko...","Quick Bites,Quick Bites,Quick Bites,Quick Bite...","Shawarma, Sandwiches,Shawarma, Sandwiches,Shaw...","Arabian, Sandwich, Rolls, Burger,Arabian, Sand...",300300300300300300,"[],['Crispy Paneer Burger', 'Veg Burger', 'Egg...","BTM,Jayanagar,Koramangala 4th Block,Koramangal...","Delivery,Delivery,Delivery,Delivery,Delivery,D..."
7,100ÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂ°C,"688, Thanish Corner, 7th Main, 10th Cross, 2nd...","No,No,No","No,No,No","3.7/5,3.7 /5,3.7/5",414141,"BTM,BTM,BTM","Casual Dining,Casual Dining,Casual Dining","Chicken Biryani,Chicken Biryani,Chicken Biryani","Biryani, North Indian,Biryani, North Indian,Bi...",450450450,"[],[],[]","BTM,Jayanagar,JP Nagar","Dine-out,Dine-out,Dine-out"
8,11 to 11 Express Biriyanis,"Near Velankani Bus Stop, Electroniccity Phase ...","Yes,Yes","No,No","3.5/5,3.5/5",2222,"Electronic City,Electronic City","Quick Bites,Quick Bites","nan,nan","Biryani, Kebab,Biryani, Kebab",300300,"[],[]","Electronic City,Electronic City","Delivery,Dine-out"
9,1131 Bar + Kitchen,"100 Feet Road, HAL 2nd Stage, Indiranagar, Ban...","No,No,No,No,No,No","Yes,Yes,Yes,Yes,Yes,Yes","4.6/5,4.6/5,4.6/5,4.4 /5,4.4 /5,4.4 /5",278527852785286128612861,"Indiranagar,Indiranagar,Indiranagar,Indiranaga...","Bar, Casual Dining,Bar, Casual Dining,Bar, Cas...","Pizza, Chocolate Cake, Cocktails, Beer, Nachos...","Continental, Asian, Italian, North Indian,Cont...",150015001500150015001500,"[],[],[],[],[],[]","Indiranagar,Indiranagar,Indiranagar,Old Airpor...","Dine-out,Drinks & nightlife,Pubs and bars,Dine..."


In [30]:
data_cleaned = data_cleaned.merge(data_url_review, how='left', on=['name', 'address']).drop_duplicates(['address','name'])

In [31]:
data_cleaned

Unnamed: 0,name,address,online_order,book_table,rate,votes,location,rest_type,dish_liked,cuisines,approx_cost(for two people),menu_item,listed_in(city),listed_in(type),url,reviews_list
0,#FeelTheROLL,"Opposite Mantri Commercio, Outer Ring Road, De...","No,No","No,No","3.4/5,3.4 /5",77,"Bellandur,Bellandur","Quick Bites,Quick Bites","nan,nan","Fast Food,Fast Food",200200,"[],[]","Bellandur,Sarjapur Road","Delivery,Delivery",https://www.zomato.com/bangalore/feeltheroll-b...,"[('Rated 5.0', ""RATED\n Had an egg chicken ro..."
2,#L-81 Cafe,"Sector 6, HSR Layout, HSR","Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes","No,No,No,No,No,No,No,No,No","3.9/5,3.9/5,3.9/5,3.9/5,3.9/5,3.9 /5,3.9 /5,3....",484848484848484848,"HSR,HSR,HSR,HSR,HSR,HSR,HSR,HSR,HSR","Quick Bites,Quick Bites,Quick Bites,Quick Bite...","Burgers,Burgers,Burgers,Burgers,Burgers,Burger...","Fast Food, Beverages,Fast Food, Beverages,Fast...",400400400400400400400400400,"[],[],[],[],[],[],[],[],[]","BTM,BTM,HSR,HSR,Koramangala 4th Block,Koramang...","Delivery,Dine-out,Delivery,Dine-out,Delivery,D...",https://www.zomato.com/bangalore/l-81-cafe-hsr...,"[('Rated 4.0', 'RATED\n This little cafe is s..."
11,#Vibes Restro,"Marasur Gate, Chandapura - Anekal Road, Near A...","No,No,No","No,No,No","nan,nan,nan",000,"Electronic City,Electronic City,Electronic City","Casual Dining,Casual Dining,Casual Dining","nan,nan,nan","Continental, Chinese, Italian,Continental, Chi...",700700700,"[],[],[]","Electronic City,Electronic City,Electronic City","Buffet,Delivery,Dine-out",https://www.zomato.com/bangalore/vibes-restro-...,"[('Rated 5.0', ""RATED\n Great service and don..."
14,#refuel,"7, Ground Floor, RR Commercial Complex, Akshay...","Yes,Yes,Yes","No,No,No","3.7/5,3.7/5,3.7/5",373737,"Bannerghatta Road,Bannerghatta Road,Bannerghat...","Cafe,Cafe,Cafe","Thick Shakes, Sandwiches, Pasta, Mocktails,Thi...","Cafe, Beverages,Cafe, Beverages,Cafe, Beverages",400400400,"['Kit Kat Thick Shake', 'Ferrero Rocher Thick ...","Bannerghatta Road,Bannerghatta Road,Bannerghat...","Cafes,Delivery,Dine-out",https://www.zomato.com/bangalore/refuel-banner...,"[('Rated 3.0', 'RATED\n We ordered for Schezw..."
17,'Brahmins' Thatte Idli,"19, 1st main, 2nd cross, 3rd stage, 3rd block,...",Yes,No,,0,Basaveshwara Nagar,Quick Bites,,South Indian,100,"['Masala Dosa', 'Set Dosa', 'Shavige Bhath', '...",Rajajinagar,Dine-out,https://www.zomato.com/bangalore/brahmins-that...,[]
18,1 Fahreheit,"Nishika Arcade, Nanjappa Main Road, Next to Jo...",No,No,,0,New BEL Road,Dessert Parlor,,"Desserts, Ice Cream",200,[],New BEL Road,Desserts,https://www.zomato.com/bangalore/1-fahreheit-n...,"[('Rated 5.0', 'RATED\n An awesome place for ..."
19,1000 B.C,"16, 17th A Main, Koramangala 5th Block, Bangalore","Yes,Yes,Yes,Yes,Yes,Yes","No,No,No,No,No,No","3.2/5,3.2/5,3.2/5,3.2 /5,3.2 /5,3.2 /5",494949494949,"Koramangala 5th Block,Koramangala 5th Block,Ko...","Quick Bites,Quick Bites,Quick Bites,Quick Bite...","Shawarma, Sandwiches,Shawarma, Sandwiches,Shaw...","Arabian, Sandwich, Rolls, Burger,Arabian, Sand...",300300300300300300,"[],['Crispy Paneer Burger', 'Veg Burger', 'Egg...","BTM,Jayanagar,Koramangala 4th Block,Koramangal...","Delivery,Delivery,Delivery,Delivery,Delivery,D...",https://www.zomato.com/bangalore/1000-b-c-kora...,"[('Rated 1.0', ""RATED\n Ordered a chicken sub..."
25,100ÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂ°C,"688, Thanish Corner, 7th Main, 10th Cross, 2nd...","No,No,No","No,No,No","3.7/5,3.7 /5,3.7/5",414141,"BTM,BTM,BTM","Casual Dining,Casual Dining,Casual Dining","Chicken Biryani,Chicken Biryani,Chicken Biryani","Biryani, North Indian,Biryani, North Indian,Bi...",450450450,"[],[],[]","BTM,Jayanagar,JP Nagar","Dine-out,Dine-out,Dine-out",https://www.zomato.com/bangalore/100%C2%B0c-bt...,"[('Rated 1.0', 'RATED\n They are frod they ha..."
28,11 to 11 Express Biriyanis,"Near Velankani Bus Stop, Electroniccity Phase ...","Yes,Yes","No,No","3.5/5,3.5/5",2222,"Electronic City,Electronic City","Quick Bites,Quick Bites","nan,nan","Biryani, Kebab,Biryani, Kebab",300300,"[],[]","Electronic City,Electronic City","Delivery,Dine-out",https://www.zomato.com/bangalore/11-to-11-expr...,"[('Rated 1.0', 'RATED\n Food quantity is not ..."
30,1131 Bar + Kitchen,"100 Feet Road, HAL 2nd Stage, Indiranagar, Ban...","No,No,No,No,No,No","Yes,Yes,Yes,Yes,Yes,Yes","4.6/5,4.6/5,4.6/5,4.4 /5,4.4 /5,4.4 /5",278527852785286128612861,"Indiranagar,Indiranagar,Indiranagar,Indiranaga...","Bar, Casual Dining,Bar, Casual Dining,Bar, Cas...","Pizza, Chocolate Cake, Cocktails, Beer, Nachos...","Continental, Asian, Italian, North Indian,Cont...",150015001500150015001500,"[],[],[],[],[],[]","Indiranagar,Indiranagar,Indiranagar,Old Airpor...","Dine-out,Drinks & nightlife,Pubs and bars,Dine...",https://www.zomato.com/bangalore/1131-bar-kitc...,"[('Rated 5.0', ""RATED\n A place with a beauti..."


### Removing duplicate row entries

In [32]:
def clean_dup_entry(entry):
    entry = entry.split(',')
    entry = list(set(entry))
    entry = ', '.join(entry)
    return entry

for column in list(data_cleaned.columns.values):
    data_cleaned[column] = data_cleaned[column].apply(clean_dup_entry)

In [33]:
data_cleaned

Unnamed: 0,name,address,online_order,book_table,rate,votes,location,rest_type,dish_liked,cuisines,approx_cost(for two people),menu_item,listed_in(city),listed_in(type),url,reviews_list
0,#FeelTheROLL,"Near Sakra World Hospital, Opposite Mantri Co...",No,No,"3.4/5, 3.4 /5",7,Bellandur,Quick Bites,,Fast Food,200,[],"Sarjapur Road, Bellandur",Delivery,https://www.zomato.com/bangalore/feeltheroll-b...,"('Rated 5.0', 'RATED\n Had an amazing mouth..."
2,#L-81 Cafe,"Sector 6, HSR Layout, HSR",Yes,No,"3.9/5, 3.9 /5",48,HSR,Quick Bites,Burgers,"Beverages, Fast Food",400,[],"Koramangala 7th Block, BTM, Koramangala 4th Bl...","Delivery, Dine-out",https://www.zomato.com/bangalore/l-81-cafe-hsr...,just came here before boarding the bus. This ...
11,#Vibes Restro,"Marasur Gate, Electronic City, Near Aliance ...",No,No,,0,Electronic City,Casual Dining,,"Italian, Chinese, Continental",700,[],Electronic City,"Delivery, Buffet, Dine-out",https://www.zomato.com/bangalore/vibes-restro-...,"""RATED\n Great service and don't forget to t..."
14,#refuel,"Ground Floor, Akshay Nagar, Bangalore, Ban...",Yes,No,3.7/5,37,Bannerghatta Road,Cafe,"Mocktails, Sandwiches, Thick Shakes, Pasta","Cafe, Beverages",400,"'Strawberry', 'Blue Berry Shake', 'Italian ...",Bannerghatta Road,"Cafes, Delivery, Dine-out",https://www.zomato.com/bangalore/refuel-banner...,'RATED\n Nice compact place with outdoor sea...
17,'Brahmins' Thatte Idli,"2nd cross, 1st main, 3rd stage, Basaveshwa...",Yes,No,,0,Basaveshwara Nagar,Quick Bites,,South Indian,100,"'Masala Dosa', 'Kesari Bhath', 'Shavige Bha...",Rajajinagar,Dine-out,https://www.zomato.com/bangalore/brahmins-that...,[]
18,1 Fahreheit,"Nanjappa Main Road, Nishika Arcade, Bengalor...",No,No,,0,New BEL Road,Dessert Parlor,,"Desserts, Ice Cream",200,[],New BEL Road,Desserts,https://www.zomato.com/bangalore/1-fahreheit-n...,'RATED\n Again a newly opened dessert parlou...
19,1000 B.C,"17th A Main, Bangalore, 16, Koramangala 5th...",Yes,No,"3.2/5, 3.2 /5",49,Koramangala 5th Block,Quick Bites,"Sandwiches, Shawarma","Arabian, Sandwich, Rolls, Burger",300,"'Pineapple Pure Juice', 'Pineapple Juice', ...","Koramangala 7th Block, BTM, Koramangala 4th Bl...",Delivery,https://www.zomato.com/bangalore/1000-b-c-kora...,"[('Rated 1.0', cute cafe with good food. Drop..."
25,100ÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂ°C,"7th Main, 2nd Phase, Thanish Corner, 688, ...",No,No,"3.7 /5, 3.7/5",41,BTM,Casual Dining,Chicken Biryani,"Biryani, North Indian",450,[],"JP Nagar, BTM, Jayanagar",Dine-out,https://www.zomato.com/bangalore/100%C2%B0c-bt...,with the serious intent with which dishes are...
28,11 to 11 Express Biriyanis,"Bangalore, Near Velankani Bus Stop, Electron...",Yes,No,3.5/5,22,Electronic City,Quick Bites,,"Kebab, Biryani",300,[],Electronic City,"Delivery, Dine-out",https://www.zomato.com/bangalore/11-to-11-expr...,"[('Rated 1.0', 'RATED\n price is tooo high')..."
30,1131 Bar + Kitchen,"Bangalore, HAL 2nd Stage, Indiranagar, 100 ...",No,Yes,"4.6/5, 4.4 /5","2861, 2785",Indiranagar,"Bar, Casual Dining","Craft Beer, Cocktails, Beer, Mocktails, Pi...","Italian, Continental, North Indian, Asian","1, 500",[],"Old Airport Road, Indiranagar","Pubs and bars, Dine-out, Drinks & nightlife",https://www.zomato.com/bangalore/1131-bar-kitc...,and its huge. 3 floor of awesomeness. \n\nWe ...
