In [1]:
import pandas as pd
import numpy as np
import html
import ast
from pprint import pprint
from tqdm import tqdm
import re
import json
data = pd.read_csv('zomato.csv')

## Data Cleanup

### Cleaning up reviews

In [20]:
def clean_review(review):
    review = html.unescape(review)
    review = review.encode().decode('unicode_escape')
    review = review.replace("\n", "")
    try:
        review = ast.literal_eval(review)
    except Exception as e:
        review = review.strip('][')
        review = review.strip('()')
        review = review.split('), (')
        try:
            review = [(item[:11].strip("'"), ' '.join(re.findall(r"[\w%\-.']+", item[14:-1].strip('"')))) for item in review]
        except Exception as e:
            print(review)
    return review

In [21]:
for column in list(data.columns.values):
    data[column] = data[column].apply(str)

In [22]:
data_url_review = data[['url', 'name','address','reviews_list']]

In [23]:
data_url_review.reviews_list[2]

'[(\'Rated 3.0\', "RATED\\n  Ambience is not that good enough and it\'s not a pocket friendly cafe and the quantity is not that good and desserts are too good enough ??.."), (\'Rated 3.0\', "RATED\\n \\nWent there for a quick bite with friends.\\nThe ambience had more of corporate feel. I would say it was unique.\\nTried nachos, pasta churros and lasagne.\\n\\nNachos were pathetic.( Seriously don\'t order)\\nPasta was okayish.\\nLasagne was good.\\nNutella churros were the best.\\nOverall an okayish experience!\\nPeace ??"), (\'Rated 4.0\', "RATED\\n  First of all, a big thanks to the staff of this Cafe. Very polite and courteous.\\n\\nI was there 15mins before their closing time. Without any discomfort or hesitation, the staff welcomed me with a warm smile and said they\'re still open, though they were preparing to close the cafe for the day.\\n\\nQuickly ordered the Thai green curry, which is served with rice. They got it for me within 10mins, hot and freshly made.\\n\\nIt was tasty 

### Removing Duplicates

In [24]:
data_cleaned = data.groupby(['name', 'address']).agg({'online_order' : ','.join,
         'book_table' : ','.join,
         'rate': ','.join,
         'votes': ','.join,
         'location': ','.join, 
         'rest_type': ','.join,
         'dish_liked': ','.join,
         'cuisines': ','.join,
         'approx_cost(for two people)': ','.join,
         'menu_item': ','.join,
         'listed_in(city)': ','.join,
         'listed_in(type)': ','.join}).reset_index()

In [25]:
data_cleaned

Unnamed: 0,name,address,online_order,book_table,rate,votes,location,rest_type,dish_liked,cuisines,approx_cost(for two people),menu_item,listed_in(city),listed_in(type)
0,#FeelTheROLL,"Opposite Mantri Commercio, Outer Ring Road, De...","No,No","No,No","3.4/5,3.4 /5",77,"Bellandur,Bellandur","Quick Bites,Quick Bites","nan,nan","Fast Food,Fast Food",200200,"[],[]","Bellandur,Sarjapur Road","Delivery,Delivery"
1,#L-81 Cafe,"Sector 6, HSR Layout, HSR","Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes","No,No,No,No,No,No,No,No,No","3.9/5,3.9/5,3.9/5,3.9/5,3.9/5,3.9 /5,3.9 /5,3....",484848484848484848,"HSR,HSR,HSR,HSR,HSR,HSR,HSR,HSR,HSR","Quick Bites,Quick Bites,Quick Bites,Quick Bite...","Burgers,Burgers,Burgers,Burgers,Burgers,Burger...","Fast Food, Beverages,Fast Food, Beverages,Fast...",400400400400400400400400400,"[],[],[],[],[],[],[],[],[]","BTM,BTM,HSR,HSR,Koramangala 4th Block,Koramang...","Delivery,Dine-out,Delivery,Dine-out,Delivery,D..."
2,#Vibes Restro,"Marasur Gate, Chandapura - Anekal Road, Near A...","No,No,No","No,No,No","nan,nan,nan",000,"Electronic City,Electronic City,Electronic City","Casual Dining,Casual Dining,Casual Dining","nan,nan,nan","Continental, Chinese, Italian,Continental, Chi...",700700700,"[],[],[]","Electronic City,Electronic City,Electronic City","Buffet,Delivery,Dine-out"
3,#refuel,"7, Ground Floor, RR Commercial Complex, Akshay...","Yes,Yes,Yes","No,No,No","3.7/5,3.7/5,3.7/5",373737,"Bannerghatta Road,Bannerghatta Road,Bannerghat...","Cafe,Cafe,Cafe","Thick Shakes, Sandwiches, Pasta, Mocktails,Thi...","Cafe, Beverages,Cafe, Beverages,Cafe, Beverages",400400400,"['Kit Kat Thick Shake', 'Ferrero Rocher Thick ...","Bannerghatta Road,Bannerghatta Road,Bannerghat...","Cafes,Delivery,Dine-out"
4,'Brahmins' Thatte Idli,"19, 1st main, 2nd cross, 3rd stage, 3rd block,...",Yes,No,,0,Basaveshwara Nagar,Quick Bites,,South Indian,100,"['Masala Dosa', 'Set Dosa', 'Shavige Bhath', '...",Rajajinagar,Dine-out
5,1 Fahreheit,"Nishika Arcade, Nanjappa Main Road, Next to Jo...",No,No,,0,New BEL Road,Dessert Parlor,,"Desserts, Ice Cream",200,[],New BEL Road,Desserts
6,1000 B.C,"16, 17th A Main, Koramangala 5th Block, Bangalore","Yes,Yes,Yes,Yes,Yes,Yes","No,No,No,No,No,No","3.2/5,3.2/5,3.2/5,3.2 /5,3.2 /5,3.2 /5",494949494949,"Koramangala 5th Block,Koramangala 5th Block,Ko...","Quick Bites,Quick Bites,Quick Bites,Quick Bite...","Shawarma, Sandwiches,Shawarma, Sandwiches,Shaw...","Arabian, Sandwich, Rolls, Burger,Arabian, Sand...",300300300300300300,"[],['Crispy Paneer Burger', 'Veg Burger', 'Egg...","BTM,Jayanagar,Koramangala 4th Block,Koramangal...","Delivery,Delivery,Delivery,Delivery,Delivery,D..."
7,100ÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂ°C,"688, Thanish Corner, 7th Main, 10th Cross, 2nd...","No,No,No","No,No,No","3.7/5,3.7 /5,3.7/5",414141,"BTM,BTM,BTM","Casual Dining,Casual Dining,Casual Dining","Chicken Biryani,Chicken Biryani,Chicken Biryani","Biryani, North Indian,Biryani, North Indian,Bi...",450450450,"[],[],[]","BTM,Jayanagar,JP Nagar","Dine-out,Dine-out,Dine-out"
8,11 to 11 Express Biriyanis,"Near Velankani Bus Stop, Electroniccity Phase ...","Yes,Yes","No,No","3.5/5,3.5/5",2222,"Electronic City,Electronic City","Quick Bites,Quick Bites","nan,nan","Biryani, Kebab,Biryani, Kebab",300300,"[],[]","Electronic City,Electronic City","Delivery,Dine-out"
9,1131 Bar + Kitchen,"100 Feet Road, HAL 2nd Stage, Indiranagar, Ban...","No,No,No,No,No,No","Yes,Yes,Yes,Yes,Yes,Yes","4.6/5,4.6/5,4.6/5,4.4 /5,4.4 /5,4.4 /5",278527852785286128612861,"Indiranagar,Indiranagar,Indiranagar,Indiranaga...","Bar, Casual Dining,Bar, Casual Dining,Bar, Cas...","Pizza, Chocolate Cake, Cocktails, Beer, Nachos...","Continental, Asian, Italian, North Indian,Cont...",150015001500150015001500,"[],[],[],[],[],[]","Indiranagar,Indiranagar,Indiranagar,Old Airpor...","Dine-out,Drinks & nightlife,Pubs and bars,Dine..."


### Adding url and reviews

In [26]:
data_cleaned = data_cleaned.merge(data_url_review, how='left', on=['name', 'address']).drop_duplicates(['address','name'])

In [32]:
columns = list(data_cleaned.columns.values)
columns.pop()
columns.pop()

'url'

### Removing duplicate row entries

In [33]:
def clean_dup_entry(entry):
    entry = entry.split(',')
    entry = list(set(entry))
    entry = ', '.join(entry)
    return entry

for column in columns:
    data_cleaned[column] = data_cleaned[column].apply(clean_dup_entry)

In [34]:
data_cleaned

Unnamed: 0,name,address,online_order,book_table,rate,votes,location,rest_type,dish_liked,cuisines,approx_cost(for two people),menu_item,listed_in(city),listed_in(type),url,reviews_list
0,#FeelTheROLL,"Near Sakra World Hospital, Opposite Mantri Co...",No,No,"3.4 /5, 3.4/5",7,Bellandur,Quick Bites,,Fast Food,200,[],"Bellandur, Sarjapur Road",Delivery,https://www.zomato.com/bangalore/feeltheroll-b...,"[('Rated 5.0', ""RATED\n Had an egg chicken ro..."
2,#L-81 Cafe,"HSR Layout, Sector 6, HSR",Yes,No,"3.9 /5, 3.9/5",48,HSR,Quick Bites,Burgers,"Beverages, Fast Food",400,[],"Koramangala 6th Block, Koramangala 4th Block, ...","Dine-out, Delivery",https://www.zomato.com/bangalore/l-81-cafe-hsr...,"[('Rated 4.0', 'RATED\n This little cafe is s..."
11,#Vibes Restro,"Near Aliance Collage, Bangalore, Electronic...",No,No,,0,Electronic City,Casual Dining,,"Chinese, Continental, Italian",700,[],Electronic City,"Dine-out, Delivery, Buffet",https://www.zomato.com/bangalore/vibes-restro-...,"[('Rated 5.0', ""RATED\n Great service and don..."
14,#refuel,"Bangalore, Bannerghatta Road, Ground Floor,...",Yes,No,3.7/5,37,Bannerghatta Road,Cafe,"Pasta, Sandwiches, Mocktails, Thick Shakes","Cafe, Beverages",400,"'Garlic Cheese Corn Sandwich', 'Ferrero Roch...",Bannerghatta Road,"Dine-out, Cafes, Delivery",https://www.zomato.com/bangalore/refuel-banner...,"[('Rated 3.0', 'RATED\n We ordered for Schezw..."
17,'Brahmins' Thatte Idli,"2nd cross, Basaveshwara Nagar, Bangalore, ...",Yes,No,,0,Basaveshwara Nagar,Quick Bites,,South Indian,100,"'Masala Dosa', 'Khara Bhath and Vada Combo',...",Rajajinagar,Dine-out,https://www.zomato.com/bangalore/brahmins-that...,[]
18,1 Fahreheit,"Vidyaranyapura, Nishika Arcade, Nanjappa Mai...",No,No,,0,New BEL Road,Dessert Parlor,,"Ice Cream, Desserts",200,[],New BEL Road,Desserts,https://www.zomato.com/bangalore/1-fahreheit-n...,"[('Rated 5.0', 'RATED\n An awesome place for ..."
19,1000 B.C,"16, Bangalore, 17th A Main, Koramangala 5th...",Yes,No,"3.2 /5, 3.2/5",49,Koramangala 5th Block,Quick Bites,"Sandwiches, Shawarma","Burger, Arabian, Sandwich, Rolls",300,"'Muskmelon Juice', 'Egg Burger', 'Milano Su...","Koramangala 6th Block, Koramangala 4th Block, ...",Delivery,https://www.zomato.com/bangalore/1000-b-c-kora...,"[('Rated 1.0', ""RATED\n Ordered a chicken sub..."
25,100ÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂ°C,"7th Main, BTM, 10th Cross, Bangalore, Tha...",No,No,"3.7/5, 3.7 /5",41,BTM,Casual Dining,Chicken Biryani,"Biryani, North Indian",450,[],"BTM, JP Nagar, Jayanagar",Dine-out,https://www.zomato.com/bangalore/100%C2%B0c-bt...,"[('Rated 1.0', 'RATED\n They are frod they ha..."
28,11 to 11 Express Biriyanis,"Near Velankani Bus Stop, Electronic City, Ba...",Yes,No,3.5/5,22,Electronic City,Quick Bites,,"Biryani, Kebab",300,[],Electronic City,"Dine-out, Delivery",https://www.zomato.com/bangalore/11-to-11-expr...,"[('Rated 1.0', 'RATED\n Food quantity is not ..."
30,1131 Bar + Kitchen,"Indiranagar, Bangalore, HAL 2nd Stage, 100 ...",No,Yes,"4.6/5, 4.4 /5","2785, 2861",Indiranagar,"Casual Dining, Bar","Craft Beer, Chocolate Cake, Mocktails, Nac...","Continental, North Indian, Asian, Italian","500, 1",[],"Indiranagar, Old Airport Road","Pubs and bars, Dine-out, Drinks & nightlife",https://www.zomato.com/bangalore/1131-bar-kitc...,"[('Rated 5.0', ""RATED\n A place with a beauti..."


In [35]:
data_cleaned.to_csv("zomato_cleaned.csv")