In [2]:
# Defining a function to get the information of each restaurant in hyderabad

In [1]:
from selenium import webdriver
from selenium.webdriver.edge.service import Service
from selenium.webdriver.edge.options import Options
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import time

def get_res_info_hyd(df_hyd, bs_data_hyd, max_restaurants_hyd):
    restaurants_hyd = bs_data_hyd.find_all("div", attrs={"class": "sc-kDgGX fjYwDL"})
    for each_res in restaurants_hyd:
        if len(df_hyd) >= max_restaurants_hyd:
            break  # Stop if we've reached the desired number of restaurants
        
        try:
            name = each_res.find("h4", attrs={"class": "sc-1hp8d8a-0 sc-bpubUI eKeNGz"}).text
            location = each_res.find("p", attrs={"class": "sc-1hez2tp-0 sc-jkPxnQ jKgMiA"}).text
            cuisine = each_res.find("p", attrs={"class": "sc-1hez2tp-0 sc-sVRsr gVSfmH"}).text
            price_for_2 = each_res.find("p", attrs={"class": "sc-1hez2tp-0 sc-sVRsr jIrGje"}).text
            walk_in_offer = each_res.find("div", attrs={"class": "walkin-offer-value"}).text
            rating = each_res.find("div", attrs={"class": "sc-1q7bklc-1 cILgox"}).text
            average_distance = each_res.find("div", attrs={"class": "min-basic-info-right"}).text
            

        except AttributeError:
            continue

        data_dict_hyd = {"Name": name,
                         "Location": location,
                         "Cuisine": cuisine,
                         "Price for 2": price_for_2,
                         "Walk in Offer": walk_in_offer,
                         "Rating": rating,
                         "Average distance": average_distance}

        each_df_hyd = pd.DataFrame(data_dict_hyd, index=[0])
        df_hyd = pd.concat((df_hyd, each_df_hyd), ignore_index=True)
    return df_hyd

# Setup WebDriver
edge_options = Options()
edge_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
service = Service('msedgedriver.exe')
driver = webdriver.Edge(service=service, options=edge_options)

page_link = r"https://www.zomato.com/hyderabad"
driver.get(page_link)

df_hyd = pd.DataFrame()

# Desired number of restaurants to scrape
max_restaurants_hyd = 500

# Scroll and load more content
last_height = driver.execute_script("return document.body.scrollHeight")
while len(df_hyd) < max_restaurants_hyd:
    # Scroll down to the bottom of the page
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(3)  # Wait for new content to load

    # Get new page source and parse with BeautifulSoup
    bs_data_hyd = BeautifulSoup(driver.page_source, 'html.parser')
    df_hyd = get_res_info_hyd(df_hyd, bs_data_hyd, max_restaurants_hyd)

    # Check if more content was loaded
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height

print(f"Total restaurants scraped: {len(df_hyd)}")

driver.quit()

Total restaurants scraped: 500


In [2]:
df_hyd

Unnamed: 0,Name,Location,Cuisine,Price for 2,Walk in Offer,Rating,Average distance
0,Tashi Cafe,"Banjara Hills, Hyderabad","North Indian, Pizza, Pasta, Italian, Chinese, ...","₹2,000 for two",Flat 10% OFF,New,5.8 km
1,Coffee Cup,"Somajiguda, Hyderabad","Cafe, Tea, Beverages, American, Pizza, Burger,...",₹950 for two,Flat 10% OFF,4.2,6.5 km
2,Ofen,"Banjara Hills, Hyderabad","Cafe, Coffee, Sandwich, Italian, Mexican, Past...","₹1,200 for two",Flat 10% OFF,3.7,6.5 km
3,Shanghai - Flavours of China Town,"S P Road, Secunderabad","Chinese, Asian","₹1,000 for two",Flat 15% OFF,4.3,8.6 km
4,Red Rhino,"Hitech City, Hyderabad","Asian, North Indian, Continental, European, Bu...","₹2,400 for two",Flat 10% OFF,4.5,12.7 km
...,...,...,...,...,...,...,...
495,Jewel of Nizam - The Golkonda Hotel,"Golkonda Hotel, Masab Tank, Hyderabad","Hyderabadi, Mughlai","₹5,000 for two",Flat 10% OFF,4.1,4.9 km
496,Cravery,"Jubilee Hills, Hyderabad","Cafe, Chinese, BBQ, Continental, Seafood, Ital...",₹950 for two,Flat 10% OFF,4.6,9 km
497,Cravery Cafe,"Financial District, Hyderabad","Cafe, Coffee, North Indian, Mughlai, Seafood, ...","₹1,000 for two",Flat 10% OFF,4.4,15.9 km
498,Sounds & Spirits,"Madhapur, Hyderabad","Finger Food, Continental, Pizza, North Indian,...","₹1,300 for two",Flat 10% OFF,3.9,12 km


In [3]:
# Defining a function to get the information of each restaurant in Bangalore

In [4]:
from selenium import webdriver
from selenium.webdriver.edge.service import Service
from selenium.webdriver.edge.options import Options
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import time

def get_res_info_blr(df_blr, bs_data_blr, max_restaurants_blr):
    restaurants_blr = bs_data_blr.find_all("div", attrs={"class": "sc-kDgGX fjYwDL"})
    for each_res_blr in restaurants_blr:
        if len(df_blr) >= max_restaurants_blr:
            break  # Stop if we've reached the desired number of restaurants
        
        try:
            name_blr = each_res_blr.find("h4", attrs={"class": "sc-1hp8d8a-0 sc-bpubUI eKeNGz"}).text
            location_blr = each_res_blr.find("p", attrs={"class": "sc-1hez2tp-0 sc-jkPxnQ jKgMiA"}).text
            cuisine_blr = each_res_blr.find("p", attrs={"class": "sc-1hez2tp-0 sc-sVRsr gVSfmH"}).text
            price_for_2_blr = each_res_blr.find("p", attrs={"class": "sc-1hez2tp-0 sc-sVRsr jIrGje"}).text
            walk_in_offer_blr = each_res_blr.find("div", attrs={"class": "walkin-offer-value"}).text
            rating_blr = each_res_blr.find("div", attrs={"class": "sc-1q7bklc-1 cILgox"}).text
            average_distance_blr = each_res_blr.find("div", attrs={"class": "min-basic-info-right"}).text

        except AttributeError:
            continue

        data_dict_blr = {"Name": name_blr,
                         "Location": location_blr,
                         "Cuisine": cuisine_blr,
                         "Price for 2": price_for_2_blr,
                         "Walk in Offer": walk_in_offer_blr,
                         "Rating": rating_blr,
                         "Average distance": average_distance_blr}

        each_df_blr = pd.DataFrame(data_dict_blr, index=[0])
        df_blr = pd.concat((df_blr, each_df_blr), ignore_index=True)
    return df_blr

# Setup WebDriver
edge_options = Options()
edge_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
service = Service('msedgedriver.exe')
driver = webdriver.Edge(service=service, options=edge_options)

page_link_blr = r"https://www.zomato.com/bangalore"
driver.get(page_link_blr)

df_blr = pd.DataFrame()

# Desired number of restaurants to scrape
max_restaurants_blr = 500

# Scroll and load more content
last_height = driver.execute_script("return document.body.scrollHeight")
while len(df_blr) < max_restaurants_blr:
    # Scroll down to the bottom of the page
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(3)  # Wait for new content to load

    # Get new page source and parse with BeautifulSoup
    bs_data_blr = BeautifulSoup(driver.page_source, 'html.parser')
    df_blr = get_res_info_blr(df_blr, bs_data_blr, max_restaurants_blr)

    # Check if more content was loaded
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height

print(f"Total restaurants scraped: {len(df_blr)}")

driver.quit()

Total restaurants scraped: 500


In [5]:
df_blr

Unnamed: 0,Name,Location,Cuisine,Price for 2,Walk in Offer,Rating,Average distance
0,Oney Brewhouse,"Cunningham Road, Bangalore","American, Continental, Italian, South Indian, ...","₹1,500 for two",Flat 15% OFF,4.0,1.6 km
1,Farzi Cafe,"UB City, Bangalore","North Indian, Desserts","₹1,900 for two",Flat 10% OFF,4.2,168 m
2,SkyDeck By Sherlock's,"MG Road, Bangalore","Continental, Chinese, Finger Food, North India...","₹1,800 for two",Flat 15% OFF,4.2,1.5 km
3,Maayaa,"Prestige Trade Tower, Race Course Road, Banga...","South Indian, Seafood, Biryani, Kerala, Desser...","₹1,200 for two",Flat 15% OFF,4.1,1.7 km
4,Ouzo By Fire,"Indiranagar, Bangalore","Goan, Mangalorean, Beverages","₹1,000 for two",Flat 10% OFF,4.3,5.1 km
...,...,...,...,...,...,...,...
495,White Garden,"Kalyan Nagar, Bangalore","North Indian, Italian, Lebanese, Asian, Contin...","₹1,600 for two",Flat 10% OFF,4.3,7.8 km
496,Ambedo Kitchen|Taproom,"JP Nagar, Bangalore","Continental, Asian, North Indian, Italian, Piz...","₹1,500 for two",Flat 10% OFF,4.5,6.8 km
497,Punjab Grill,"Forum Rex Walk, Bangalore","North Indian, Kebab, Biryani, Mughlai","₹2,000 for two",Flat 15% OFF,4.3,1.3 km
498,Levels Pub & Kitchen,"JP Nagar, Bangalore","Asian, South Indian, North Indian","₹1,700 for two",Flat 10% OFF,4.5,7.3 km


In [4]:
# Defining a function to get the information of each restaurant in Mumbai

In [7]:
from selenium import webdriver
from selenium.webdriver.edge.service import Service
from selenium.webdriver.edge.options import Options
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import time

def get_res_info_mum(df_mum, bs_data_mum, max_restaurants_mum):
    restaurants_mum = bs_data_mum.find_all("div", attrs={"class": "sc-kDgGX fjYwDL"})
    for each_res_mum in restaurants_mum:
        if len(df_mum) >= max_restaurants_mum:
            break  # Stop if we've reached the desired number of restaurants
        
        try:
            name_mum = each_res_mum.find("h4", attrs={"class": "sc-1hp8d8a-0 sc-bpubUI eKeNGz"}).text
            location_mum = each_res_mum.find("p", attrs={"class": "sc-1hez2tp-0 sc-jkPxnQ jKgMiA"}).text
            cuisine_mum = each_res_mum.find("p", attrs={"class": "sc-1hez2tp-0 sc-sVRsr gVSfmH"}).text
            price_for_2_mum = each_res_mum.find("p", attrs={"class": "sc-1hez2tp-0 sc-sVRsr jIrGje"}).text
            walk_in_offer_mum = each_res_mum.find("div", attrs={"class": "walkin-offer-value"}).text
            rating_mum = each_res_mum.find("div", attrs={"class": "sc-1q7bklc-1 cILgox"}).text
            average_distance_mum = each_res_mum.find("div", attrs={"class": "min-basic-info-right"}).text

        except AttributeError:
            continue

        data_dict_mum = {"Name": name_mum,
                         "Location": location_mum,
                         "Cuisine": cuisine_mum,
                         "Price for 2": price_for_2_mum,
                         "Walk in Offer": walk_in_offer_mum,
                         "Rating": rating_mum,
                         "Average distance": average_distance_mum}

        each_df_mum = pd.DataFrame(data_dict_mum, index=[0])
        df_mum = pd.concat((df_mum, each_df_mum), ignore_index=True)
    return df_mum

# Setup WebDriver
edge_options = Options()
edge_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
service = Service('msedgedriver.exe')
driver = webdriver.Edge(service=service, options=edge_options)

page_link_mum = r"https://www.zomato.com/mumbai"
driver.get(page_link_mum)

df_mum = pd.DataFrame()

# Desired number of restaurants to scrape
max_restaurants_mum = 500

# Scroll and load more content
last_height = driver.execute_script("return document.body.scrollHeight")
while len(df_mum) < max_restaurants_mum:
    # Scroll down to the bottom of the page
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(3)  # Wait for new content to load

    # Get new page source and parse with BeautifulSoup
    bs_data_mum = BeautifulSoup(driver.page_source, 'html.parser')
    df_mum = get_res_info_mum(df_mum, bs_data_mum, max_restaurants_mum)

    # Check if more content was loaded
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height

print(f"Total restaurants scraped: {len(df_mum)}")

driver.quit()

Total restaurants scraped: 500


In [8]:
df_mum

Unnamed: 0,Name,Location,Cuisine,Price for 2,Walk in Offer,Rating,Average distance
0,Not Just Tamasha,"Lower Parel, Mumbai","Bar Food, Continental, Pasta, Pizza, Asian, Ch...","₹1,800 for two",Flat 20% OFF,New,3.3 km
1,Goa Portuguesa,"Mahim, Mumbai","Goan, Portuguese, South Indian, Maharashtrian,...","₹2,500 for two",Flat 10% OFF,4.5,2.3 km
2,Eight,"Palladium Mall, Lower Parel, Mumbai","Asian, Japanese, Sushi","₹3,500 for two",Flat 10% OFF,4.6,4.1 km
3,Ishaara,"Palladium Mall, Lower Parel, Mumbai","North Indian, Mughlai, Kebab, Biryani","₹1,900 for two",Flat 10% OFF,4.7,4.1 km
4,Therapy Cocktail Bar,"Khar, Mumbai","Bar Food, Chinese, Desserts, Beverages, Pasta,...","₹1,000 for two",Flat 10% OFF,4.2,6.1 km
...,...,...,...,...,...,...,...
495,Bustling Brew Bistro Cafe,"Wagle Estate, Thane West, Thane","Cafe, Beverages, Italian, Chinese, Fast Food, ...",₹700 for two,Flat 15% OFF,4.7,21.9 km
496,On The Docks,"Fort, Mumbai","Chinese, Continental, Italian, Pizza, Pasta, N...","₹1,200 for two",Flat 10% OFF,4.2,9.1 km
497,Kissa Stories Over Food,"Andheri Lokhandwala, Andheri West, Mumbai","Asian, Sushi, Italian, Pizza, Modern Indian, B...","₹1,400 for two",Flat 15% OFF,4.3,14.4 km
498,Three 60 Daily Bar & Kitchen,"Versova, Andheri West, Mumbai","Bar Food, North Indian, Mughlai, Biryani, Keba...","₹1,000 for two",Flat 15% OFF,4.4,13 km


In [5]:
# Defining a function to get the information of each restaurant in Chennai

In [9]:
from selenium import webdriver
from selenium.webdriver.edge.service import Service
from selenium.webdriver.edge.options import Options
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import time

def get_res_info_che(df_che, bs_data_che, max_restaurants_che):
    restaurants_che = bs_data_che.find_all("div", attrs={"class": "sc-kDgGX fjYwDL"})
    for each_res_che in restaurants_che:
        if len(df_che) >= max_restaurants_che:
            break  # Stop if we've reached the desired number of restaurants
        
        try:
            name_che = each_res_che.find("h4", attrs={"class": "sc-1hp8d8a-0 sc-bpubUI eKeNGz"}).text
            location_che = each_res_che.find("p", attrs={"class": "sc-1hez2tp-0 sc-jkPxnQ jKgMiA"}).text
            cuisine_che = each_res_che.find("p", attrs={"class": "sc-1hez2tp-0 sc-sVRsr gVSfmH"}).text
            price_for_2_che = each_res_che.find("p", attrs={"class": "sc-1hez2tp-0 sc-sVRsr jIrGje"}).text
            walk_in_offer_che = each_res_che.find("div", attrs={"class": "walkin-offer-value"}).text
            rating_che = each_res_che.find("div", attrs={"class": "sc-1q7bklc-1 cILgox"}).text
            average_distance_che = each_res_che.find("div", attrs={"class": "min-basic-info-right"}).text

        except AttributeError:
            continue

        data_dict_che = {"Name": name_che,
                         "Location": location_che,
                         "Cuisine": cuisine_che,
                         "Price for 2": price_for_2_che,
                         "Walk in Offer": walk_in_offer_che,
                         "Rating": rating_che,
                         "Average distance": average_distance_che}

        each_df_che = pd.DataFrame(data_dict_che, index=[0])
        df_che = pd.concat((df_che, each_df_che), ignore_index=True)
    return df_che

# Setup WebDriver
edge_options = Options()
edge_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
service = Service('msedgedriver.exe')
driver = webdriver.Edge(service=service, options=edge_options)

page_link_che = r"https://www.zomato.com/chennai"
driver.get(page_link_che)

df_che = pd.DataFrame()

# Desired number of restaurants to scrape
max_restaurants_che = 500

# Scroll and load more content
last_height = driver.execute_script("return document.body.scrollHeight")
while len(df_che) < max_restaurants_che:
    # Scroll down to the bottom of the page
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(3)  # Wait for new content to load

    # Get new page source and parse with BeautifulSoup
    bs_data_che = BeautifulSoup(driver.page_source, 'html.parser')
    df_che = get_res_info_che(df_che, bs_data_che, max_restaurants_che)

    # Check if more content was loaded
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height

print(f"Total restaurants scraped: {len(df_che)}")

driver.quit()

Total restaurants scraped: 500


In [10]:
df_che

Unnamed: 0,Name,Location,Cuisine,Price for 2,Walk in Offer,Rating,Average distance
0,Sterling By Madras Square,"Nungambakkam, Chennai","Continental, Italian, Chinese, Pasta, Asian, P...","₹1,000 for two",Flat 10% OFF,New,3.6 km
1,Dil Se Rasoi,"Kilpauk, Chennai","Chinese, North Indian, Street Food, Sandwich, ...","₹1,000 for two",Flat 10% OFF,3.7,2.8 km
2,Dhaba - Estd 1986 Delhi,"Ispahani Centre, Nungambakkam, Chennai","North Indian, Mughlai, Biryani, Kebab, Desserts","₹1,500 for two",Flat 10% OFF,4.4,3.9 km
3,Marina Sands,"Ramada, Egmore, Chennai","Seafood, North Indian, Chinese, Desserts","₹1,000 for two",Flat 20% OFF,3.6,983 m
4,Gujraj Veg Restaurant,"Park Town, Chennai","South Indian, Chinese, North Indian, Thai, Ice...","₹1,000 for two",Flat 10% OFF,3.8,365 m
...,...,...,...,...,...,...,...
495,AsianKatha,"Nungambakkam, Chennai","Japanese, Singaporean, Chinese, Asian, Indones...","₹1,500 for two",Flat 10% OFF,New,3.6 km
496,The Mayflower,"Anna Nagar East, Chennai",Continental,"₹2,000 for two",Flat 10% OFF,New,5.6 km
497,Trella,"Jaag Hotels, T.Nagar, Chennai","Continental, Fast Food, Asian, Italian, Chines...","₹1,800 for two",Flat 15% OFF,4.2,5.1 km
498,Surf Turf,"Kovalam, Chennai","Continental, Chinese, Italian, Seafood, Desser...",₹700 for two,Flat 10% OFF,4.1,33.1 km


In [6]:
# Defining a function to get the information of each restaurant in Delhi

In [11]:
from selenium import webdriver
from selenium.webdriver.edge.service import Service
from selenium.webdriver.edge.options import Options
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import time

def get_res_info_del(df_del, bs_data_del, max_restaurants_del):
    restaurants_del = bs_data_del.find_all("div", attrs={"class": "sc-kDgGX fjYwDL"})
    for each_res_del in restaurants_del:
        if len(df_del) >= max_restaurants_del:
            break  # Stop if we've reached the desired number of restaurants
        
        try:
            name_del = each_res_del.find("h4", attrs={"class": "sc-1hp8d8a-0 sc-bpubUI eKeNGz"}).text
            location_del = each_res_del.find("p", attrs={"class": "sc-1hez2tp-0 sc-jkPxnQ jKgMiA"}).text
            cuisine_del = each_res_del.find("p", attrs={"class": "sc-1hez2tp-0 sc-sVRsr gVSfmH"}).text
            price_for_2_del = each_res_del.find("p", attrs={"class": "sc-1hez2tp-0 sc-sVRsr jIrGje"}).text
            walk_in_offer_del = each_res_del.find("div", attrs={"class": "walkin-offer-value"}).text
            rating_del = each_res_del.find("div", attrs={"class": "sc-1q7bklc-1 cILgox"}).text
            average_distance_del = each_res_del.find("div", attrs={"class": "min-basic-info-right"}).text

        except AttributeError:
            walk_in_offer_del = np.nan
            continue

        data_dict_del = {"Name": name_del,
                         "Location": location_del,
                         "Cuisine": cuisine_del,
                         "Price for 2": price_for_2_del,
                         "Walk in Offer": walk_in_offer_del,
                         "Rating": rating_del,
                         "Average distance": average_distance_del}

        each_df_del = pd.DataFrame(data_dict_del, index=[0])
        df_del = pd.concat((df_del, each_df_del), ignore_index=True)
    return df_del

# Setup WebDriver
edge_options = Options()
edge_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
service = Service('msedgedriver.exe')
driver = webdriver.Edge(service=service, options=edge_options)

page_link_del = r"https://www.zomato.com/ncr"
driver.get(page_link_del)

df_del = pd.DataFrame()

# Desired number of restaurants to scrape
max_restaurants_del = 500

# Scroll and load more content
last_height = driver.execute_script("return document.body.scrollHeight")
while len(df_del) < max_restaurants_del:
    # Scroll down to the bottom of the page
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(3)  # Wait for new content to load

    # Get new page source and parse with BeautifulSoup
    bs_data_del = BeautifulSoup(driver.page_source, 'html.parser')
    df_del = get_res_info_del(df_del, bs_data_del, max_restaurants_del)

    # Check if more content was loaded
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height

print(f"Total restaurants scraped: {len(df_del)}")

driver.quit()

Total restaurants scraped: 500


In [12]:
df_del

Unnamed: 0,Name,Location,Cuisine,Price for 2,Walk in Offer,Rating,Average distance
0,Cé La Vie Kitchen & Bar,"Middle Circle, Connaught Place, New Delhi","North Indian, Mediterranean, Continental, Sush...","₹2,200 for two",Flat 20% OFF,4.3,1.4 km
1,Out Of The Box Courtyard,"Middle Circle, Connaught Place, New Delhi","Continental, North Indian, Italian, Pizza, Chi...","₹2,200 for two",Flat 20% OFF,4.1,1.2 km
2,Lord Of The Drinks,"Outer Circle, Connaught Place, New Delhi","Oriental, North Indian, Continental, Italian, ...","₹2,400 for two",Flat 15% OFF,4.3,937 m
3,The Darzi Bar & Kitchen,"Middle Circle, Connaught Place, New Delhi","North Indian, Chinese, Asian, Italian, Mughlai...","₹2,800 for two",Flat 25% OFF,4.2,1.4 km
4,Mount Fuji Japanese Pan Asian Restaurant & Bar,"Middle Circle, Connaught Place, New Delhi","Japanese, Asian, Chinese, Thai, Sushi, Seafood...","₹2,400 for two",Flat 25% OFF,4.6,1.4 km
...,...,...,...,...,...,...,...
495,Farzi Cafe,"Cyber Hub, DLF Cyber City, Gurgaon","North Indian, Fast Food, Pizza, Desserts","₹2,000 for two",Flat 10% OFF,4.3,18.8 km
496,64/6 - Country Inn & Suites By Radisson,"Country Inn & Suites by Carlson, Sahibabad, G...","North Indian, Chinese, Thai, Continental","₹3,500 for two",Flat 20% OFF,4.6,13.4 km
497,Laidback Cafe,"Cyber Hub, DLF Cyber City, Gurgaon","Asian, Oriental, Continental, Italian, Middle ...","₹1,900 for two",Flat 15% OFF,4.6,19.1 km
498,Cafe Hawkers,"Connaught Place, New Delhi","North Indian, Chinese, Mexican, Fast Food, Des...","₹1,600 for two",Flat 15% OFF,4.2,1.5 km


In [13]:
df = pd.DataFrame()

In [14]:
# Adding all the dataframes to get the final dataframe with restaurants from all cities

In [15]:
df = pd.concat((df, df_hyd, df_blr, df_mum, df_che, df_del))

In [16]:
df

Unnamed: 0,Name,Location,Cuisine,Price for 2,Walk in Offer,Rating,Average distance
0,Tashi Cafe,"Banjara Hills, Hyderabad","North Indian, Pizza, Pasta, Italian, Chinese, ...","₹2,000 for two",Flat 10% OFF,New,5.8 km
1,Coffee Cup,"Somajiguda, Hyderabad","Cafe, Tea, Beverages, American, Pizza, Burger,...",₹950 for two,Flat 10% OFF,4.2,6.5 km
2,Ofen,"Banjara Hills, Hyderabad","Cafe, Coffee, Sandwich, Italian, Mexican, Past...","₹1,200 for two",Flat 10% OFF,3.7,6.5 km
3,Shanghai - Flavours of China Town,"S P Road, Secunderabad","Chinese, Asian","₹1,000 for two",Flat 15% OFF,4.3,8.6 km
4,Red Rhino,"Hitech City, Hyderabad","Asian, North Indian, Continental, European, Bu...","₹2,400 for two",Flat 10% OFF,4.5,12.7 km
...,...,...,...,...,...,...,...
495,Farzi Cafe,"Cyber Hub, DLF Cyber City, Gurgaon","North Indian, Fast Food, Pizza, Desserts","₹2,000 for two",Flat 10% OFF,4.3,18.8 km
496,64/6 - Country Inn & Suites By Radisson,"Country Inn & Suites by Carlson, Sahibabad, G...","North Indian, Chinese, Thai, Continental","₹3,500 for two",Flat 20% OFF,4.6,13.4 km
497,Laidback Cafe,"Cyber Hub, DLF Cyber City, Gurgaon","Asian, Oriental, Continental, Italian, Middle ...","₹1,900 for two",Flat 15% OFF,4.6,19.1 km
498,Cafe Hawkers,"Connaught Place, New Delhi","North Indian, Chinese, Mexican, Fast Food, Des...","₹1,600 for two",Flat 15% OFF,4.2,1.5 km


In [17]:
df.to_csv("Zomato_Restaurants_raw - Web Scraping Project-2.csv")

### Step-2 Data Preparation

In [18]:
import re
import numpy as np
import pandas as pd

In [19]:
dataset = pd.read_csv("Zomato_Restaurants_raw - Web Scraping Project-2.csv")

In [20]:
dataset

Unnamed: 0.1,Unnamed: 0,Name,Location,Cuisine,Price for 2,Walk in Offer,Rating,Average distance
0,0,Tashi Cafe,"Banjara Hills, Hyderabad","North Indian, Pizza, Pasta, Italian, Chinese, ...","₹2,000 for two",Flat 10% OFF,New,5.8 km
1,1,Coffee Cup,"Somajiguda, Hyderabad","Cafe, Tea, Beverages, American, Pizza, Burger,...",₹950 for two,Flat 10% OFF,4.2,6.5 km
2,2,Ofen,"Banjara Hills, Hyderabad","Cafe, Coffee, Sandwich, Italian, Mexican, Past...","₹1,200 for two",Flat 10% OFF,3.7,6.5 km
3,3,Shanghai - Flavours of China Town,"S P Road, Secunderabad","Chinese, Asian","₹1,000 for two",Flat 15% OFF,4.3,8.6 km
4,4,Red Rhino,"Hitech City, Hyderabad","Asian, North Indian, Continental, European, Bu...","₹2,400 for two",Flat 10% OFF,4.5,12.7 km
...,...,...,...,...,...,...,...,...
2495,495,Farzi Cafe,"Cyber Hub, DLF Cyber City, Gurgaon","North Indian, Fast Food, Pizza, Desserts","₹2,000 for two",Flat 10% OFF,4.3,18.8 km
2496,496,64/6 - Country Inn & Suites By Radisson,"Country Inn & Suites by Carlson, Sahibabad, G...","North Indian, Chinese, Thai, Continental","₹3,500 for two",Flat 20% OFF,4.6,13.4 km
2497,497,Laidback Cafe,"Cyber Hub, DLF Cyber City, Gurgaon","Asian, Oriental, Continental, Italian, Middle ...","₹1,900 for two",Flat 15% OFF,4.6,19.1 km
2498,498,Cafe Hawkers,"Connaught Place, New Delhi","North Indian, Chinese, Mexican, Fast Food, Des...","₹1,600 for two",Flat 15% OFF,4.2,1.5 km


In [21]:
dataset.drop("Unnamed: 0", axis = 1, inplace = True)

In [22]:
dataset

Unnamed: 0,Name,Location,Cuisine,Price for 2,Walk in Offer,Rating,Average distance
0,Tashi Cafe,"Banjara Hills, Hyderabad","North Indian, Pizza, Pasta, Italian, Chinese, ...","₹2,000 for two",Flat 10% OFF,New,5.8 km
1,Coffee Cup,"Somajiguda, Hyderabad","Cafe, Tea, Beverages, American, Pizza, Burger,...",₹950 for two,Flat 10% OFF,4.2,6.5 km
2,Ofen,"Banjara Hills, Hyderabad","Cafe, Coffee, Sandwich, Italian, Mexican, Past...","₹1,200 for two",Flat 10% OFF,3.7,6.5 km
3,Shanghai - Flavours of China Town,"S P Road, Secunderabad","Chinese, Asian","₹1,000 for two",Flat 15% OFF,4.3,8.6 km
4,Red Rhino,"Hitech City, Hyderabad","Asian, North Indian, Continental, European, Bu...","₹2,400 for two",Flat 10% OFF,4.5,12.7 km
...,...,...,...,...,...,...,...
2495,Farzi Cafe,"Cyber Hub, DLF Cyber City, Gurgaon","North Indian, Fast Food, Pizza, Desserts","₹2,000 for two",Flat 10% OFF,4.3,18.8 km
2496,64/6 - Country Inn & Suites By Radisson,"Country Inn & Suites by Carlson, Sahibabad, G...","North Indian, Chinese, Thai, Continental","₹3,500 for two",Flat 20% OFF,4.6,13.4 km
2497,Laidback Cafe,"Cyber Hub, DLF Cyber City, Gurgaon","Asian, Oriental, Continental, Italian, Middle ...","₹1,900 for two",Flat 15% OFF,4.6,19.1 km
2498,Cafe Hawkers,"Connaught Place, New Delhi","North Indian, Chinese, Mexican, Fast Food, Des...","₹1,600 for two",Flat 15% OFF,4.2,1.5 km


In [23]:
# The Rating column does not have any NaN values, but all the data in the column is not numeric

In [24]:
dataset.isnull().any()

Name                False
Location            False
Cuisine             False
Price for 2         False
Walk in Offer       False
Rating              False
Average distance    False
dtype: bool

In [25]:
# Converting all the values to numeric, and force converting the non-numeric values to NaN

In [26]:
dataset["Rating"] = pd.to_numeric(dataset["Rating"], errors = 'coerce')

In [27]:
dataset.isnull().any()

Name                False
Location            False
Cuisine             False
Price for 2         False
Walk in Offer       False
Rating               True
Average distance    False
dtype: bool

In [30]:
# 165 restaurants have no ratings

In [31]:
dataset.isnull().sum()

Name                  0
Location              0
Cuisine               0
Price for 2           0
Walk in Offer         0
Rating              165
Average distance      0
dtype: int64

In [32]:
rating_data = dataset[["Name", "Price for 2", "Rating"]]

In [33]:
rating_data[rating_data.isnull().any(axis = 1)]

Unnamed: 0,Name,Price for 2,Rating
0,Tashi Cafe,"₹2,000 for two",
10,Masterpiece,"₹3,200 for two",
24,Zindagi Sky Bar & Kitchen,"₹3,100 for two",
30,Firestone Grill,"₹1,600 for two",
31,Frat House,"₹2,500 for two",
...,...,...,...
2352,Blanca,"₹1,800 for two",
2356,Sufiaana By Cherish,"₹2,000 for two",
2423,Ten Twenty Two - 1022,"₹1,600 for two",
2455,Litchi Bistro,"₹1,600 for two",


In [34]:
dataset.dtypes

Name                 object
Location             object
Cuisine              object
Price for 2          object
Walk in Offer        object
Rating              float64
Average distance     object
dtype: object

In [35]:
# Filling the NaN values with the median of ratings, since the rating are slighlty skewed and concentrated between 4.2 to 4.5

In [36]:
dataset['Rating'] = dataset['Rating'].fillna(dataset['Rating'].median())

In [37]:
dataset

Unnamed: 0,Name,Location,Cuisine,Price for 2,Walk in Offer,Rating,Average distance
0,Tashi Cafe,"Banjara Hills, Hyderabad","North Indian, Pizza, Pasta, Italian, Chinese, ...","₹2,000 for two",Flat 10% OFF,4.3,5.8 km
1,Coffee Cup,"Somajiguda, Hyderabad","Cafe, Tea, Beverages, American, Pizza, Burger,...",₹950 for two,Flat 10% OFF,4.2,6.5 km
2,Ofen,"Banjara Hills, Hyderabad","Cafe, Coffee, Sandwich, Italian, Mexican, Past...","₹1,200 for two",Flat 10% OFF,3.7,6.5 km
3,Shanghai - Flavours of China Town,"S P Road, Secunderabad","Chinese, Asian","₹1,000 for two",Flat 15% OFF,4.3,8.6 km
4,Red Rhino,"Hitech City, Hyderabad","Asian, North Indian, Continental, European, Bu...","₹2,400 for two",Flat 10% OFF,4.5,12.7 km
...,...,...,...,...,...,...,...
2495,Farzi Cafe,"Cyber Hub, DLF Cyber City, Gurgaon","North Indian, Fast Food, Pizza, Desserts","₹2,000 for two",Flat 10% OFF,4.3,18.8 km
2496,64/6 - Country Inn & Suites By Radisson,"Country Inn & Suites by Carlson, Sahibabad, G...","North Indian, Chinese, Thai, Continental","₹3,500 for two",Flat 20% OFF,4.6,13.4 km
2497,Laidback Cafe,"Cyber Hub, DLF Cyber City, Gurgaon","Asian, Oriental, Continental, Italian, Middle ...","₹1,900 for two",Flat 15% OFF,4.6,19.1 km
2498,Cafe Hawkers,"Connaught Place, New Delhi","North Indian, Chinese, Mexican, Fast Food, Des...","₹1,600 for two",Flat 15% OFF,4.2,1.5 km


In [38]:
dataset["Price for 2"] = dataset["Price for 2"].apply(lambda amount : re.sub(r"[^0-9.]", "", amount))

In [39]:
dataset

Unnamed: 0,Name,Location,Cuisine,Price for 2,Walk in Offer,Rating,Average distance
0,Tashi Cafe,"Banjara Hills, Hyderabad","North Indian, Pizza, Pasta, Italian, Chinese, ...",2000,Flat 10% OFF,4.3,5.8 km
1,Coffee Cup,"Somajiguda, Hyderabad","Cafe, Tea, Beverages, American, Pizza, Burger,...",950,Flat 10% OFF,4.2,6.5 km
2,Ofen,"Banjara Hills, Hyderabad","Cafe, Coffee, Sandwich, Italian, Mexican, Past...",1200,Flat 10% OFF,3.7,6.5 km
3,Shanghai - Flavours of China Town,"S P Road, Secunderabad","Chinese, Asian",1000,Flat 15% OFF,4.3,8.6 km
4,Red Rhino,"Hitech City, Hyderabad","Asian, North Indian, Continental, European, Bu...",2400,Flat 10% OFF,4.5,12.7 km
...,...,...,...,...,...,...,...
2495,Farzi Cafe,"Cyber Hub, DLF Cyber City, Gurgaon","North Indian, Fast Food, Pizza, Desserts",2000,Flat 10% OFF,4.3,18.8 km
2496,64/6 - Country Inn & Suites By Radisson,"Country Inn & Suites by Carlson, Sahibabad, G...","North Indian, Chinese, Thai, Continental",3500,Flat 20% OFF,4.6,13.4 km
2497,Laidback Cafe,"Cyber Hub, DLF Cyber City, Gurgaon","Asian, Oriental, Continental, Italian, Middle ...",1900,Flat 15% OFF,4.6,19.1 km
2498,Cafe Hawkers,"Connaught Place, New Delhi","North Indian, Chinese, Mexican, Fast Food, Des...",1600,Flat 15% OFF,4.2,1.5 km


In [40]:
dataset["Price for 2"] = dataset["Price for 2"].astype("float")

In [41]:
dataset.dtypes

Name                 object
Location             object
Cuisine              object
Price for 2         float64
Walk in Offer        object
Rating              float64
Average distance     object
dtype: object

In [42]:
dataset["Walk in Offer"] = dataset["Walk in Offer"].apply(lambda amount : re.sub(r"[^0-9.]", "", amount))

In [43]:
dataset["Walk in Offer"] = dataset["Walk in Offer"].astype("float")

In [44]:
dataset.dtypes

Name                 object
Location             object
Cuisine              object
Price for 2         float64
Walk in Offer       float64
Rating              float64
Average distance     object
dtype: object

In [45]:
dataset["Average distance"] = dataset["Average distance"].apply(lambda amount : re.sub(r"[^0-9.]", "", amount))

In [46]:
dataset["Average distance"] = dataset["Average distance"].astype("float")

In [47]:
def convert_to_km(value):
    if value >= 100:  # assuming all values >= 100 are in meters
        value = value / 1000
    return round(value, 1)

In [48]:
dataset['Average distance'] = dataset['Average distance'].apply(convert_to_km)

In [49]:
dataset

Unnamed: 0,Name,Location,Cuisine,Price for 2,Walk in Offer,Rating,Average distance
0,Tashi Cafe,"Banjara Hills, Hyderabad","North Indian, Pizza, Pasta, Italian, Chinese, ...",2000.0,10.0,4.3,5.8
1,Coffee Cup,"Somajiguda, Hyderabad","Cafe, Tea, Beverages, American, Pizza, Burger,...",950.0,10.0,4.2,6.5
2,Ofen,"Banjara Hills, Hyderabad","Cafe, Coffee, Sandwich, Italian, Mexican, Past...",1200.0,10.0,3.7,6.5
3,Shanghai - Flavours of China Town,"S P Road, Secunderabad","Chinese, Asian",1000.0,15.0,4.3,8.6
4,Red Rhino,"Hitech City, Hyderabad","Asian, North Indian, Continental, European, Bu...",2400.0,10.0,4.5,12.7
...,...,...,...,...,...,...,...
2495,Farzi Cafe,"Cyber Hub, DLF Cyber City, Gurgaon","North Indian, Fast Food, Pizza, Desserts",2000.0,10.0,4.3,18.8
2496,64/6 - Country Inn & Suites By Radisson,"Country Inn & Suites by Carlson, Sahibabad, G...","North Indian, Chinese, Thai, Continental",3500.0,20.0,4.6,13.4
2497,Laidback Cafe,"Cyber Hub, DLF Cyber City, Gurgaon","Asian, Oriental, Continental, Italian, Middle ...",1900.0,15.0,4.6,19.1
2498,Cafe Hawkers,"Connaught Place, New Delhi","North Indian, Chinese, Mexican, Fast Food, Des...",1600.0,15.0,4.2,1.5


In [50]:
dataset.dtypes

Name                 object
Location             object
Cuisine              object
Price for 2         float64
Walk in Offer       float64
Rating              float64
Average distance    float64
dtype: object

In [51]:
dataset

Unnamed: 0,Name,Location,Cuisine,Price for 2,Walk in Offer,Rating,Average distance
0,Tashi Cafe,"Banjara Hills, Hyderabad","North Indian, Pizza, Pasta, Italian, Chinese, ...",2000.0,10.0,4.3,5.8
1,Coffee Cup,"Somajiguda, Hyderabad","Cafe, Tea, Beverages, American, Pizza, Burger,...",950.0,10.0,4.2,6.5
2,Ofen,"Banjara Hills, Hyderabad","Cafe, Coffee, Sandwich, Italian, Mexican, Past...",1200.0,10.0,3.7,6.5
3,Shanghai - Flavours of China Town,"S P Road, Secunderabad","Chinese, Asian",1000.0,15.0,4.3,8.6
4,Red Rhino,"Hitech City, Hyderabad","Asian, North Indian, Continental, European, Bu...",2400.0,10.0,4.5,12.7
...,...,...,...,...,...,...,...
2495,Farzi Cafe,"Cyber Hub, DLF Cyber City, Gurgaon","North Indian, Fast Food, Pizza, Desserts",2000.0,10.0,4.3,18.8
2496,64/6 - Country Inn & Suites By Radisson,"Country Inn & Suites by Carlson, Sahibabad, G...","North Indian, Chinese, Thai, Continental",3500.0,20.0,4.6,13.4
2497,Laidback Cafe,"Cyber Hub, DLF Cyber City, Gurgaon","Asian, Oriental, Continental, Italian, Middle ...",1900.0,15.0,4.6,19.1
2498,Cafe Hawkers,"Connaught Place, New Delhi","North Indian, Chinese, Mexican, Fast Food, Des...",1600.0,15.0,4.2,1.5


In [52]:
dataset[['Area', 'City']] = dataset['Location'].str.rsplit(',', n = 1, expand = True)

In [53]:
dataset

Unnamed: 0,Name,Location,Cuisine,Price for 2,Walk in Offer,Rating,Average distance,Area,City
0,Tashi Cafe,"Banjara Hills, Hyderabad","North Indian, Pizza, Pasta, Italian, Chinese, ...",2000.0,10.0,4.3,5.8,Banjara Hills,Hyderabad
1,Coffee Cup,"Somajiguda, Hyderabad","Cafe, Tea, Beverages, American, Pizza, Burger,...",950.0,10.0,4.2,6.5,Somajiguda,Hyderabad
2,Ofen,"Banjara Hills, Hyderabad","Cafe, Coffee, Sandwich, Italian, Mexican, Past...",1200.0,10.0,3.7,6.5,Banjara Hills,Hyderabad
3,Shanghai - Flavours of China Town,"S P Road, Secunderabad","Chinese, Asian",1000.0,15.0,4.3,8.6,S P Road,Secunderabad
4,Red Rhino,"Hitech City, Hyderabad","Asian, North Indian, Continental, European, Bu...",2400.0,10.0,4.5,12.7,Hitech City,Hyderabad
...,...,...,...,...,...,...,...,...,...
2495,Farzi Cafe,"Cyber Hub, DLF Cyber City, Gurgaon","North Indian, Fast Food, Pizza, Desserts",2000.0,10.0,4.3,18.8,"Cyber Hub, DLF Cyber City",Gurgaon
2496,64/6 - Country Inn & Suites By Radisson,"Country Inn & Suites by Carlson, Sahibabad, G...","North Indian, Chinese, Thai, Continental",3500.0,20.0,4.6,13.4,"Country Inn & Suites by Carlson, Sahibabad",Ghaziabad
2497,Laidback Cafe,"Cyber Hub, DLF Cyber City, Gurgaon","Asian, Oriental, Continental, Italian, Middle ...",1900.0,15.0,4.6,19.1,"Cyber Hub, DLF Cyber City",Gurgaon
2498,Cafe Hawkers,"Connaught Place, New Delhi","North Indian, Chinese, Mexican, Fast Food, Des...",1600.0,15.0,4.2,1.5,Connaught Place,New Delhi


In [54]:
dataset = dataset[["Name", "Area", "City", "Cuisine", "Price for 2", "Walk in Offer", "Rating", "Average distance", "Location"]]

In [55]:
dataset

Unnamed: 0,Name,Area,City,Cuisine,Price for 2,Walk in Offer,Rating,Average distance,Location
0,Tashi Cafe,Banjara Hills,Hyderabad,"North Indian, Pizza, Pasta, Italian, Chinese, ...",2000.0,10.0,4.3,5.8,"Banjara Hills, Hyderabad"
1,Coffee Cup,Somajiguda,Hyderabad,"Cafe, Tea, Beverages, American, Pizza, Burger,...",950.0,10.0,4.2,6.5,"Somajiguda, Hyderabad"
2,Ofen,Banjara Hills,Hyderabad,"Cafe, Coffee, Sandwich, Italian, Mexican, Past...",1200.0,10.0,3.7,6.5,"Banjara Hills, Hyderabad"
3,Shanghai - Flavours of China Town,S P Road,Secunderabad,"Chinese, Asian",1000.0,15.0,4.3,8.6,"S P Road, Secunderabad"
4,Red Rhino,Hitech City,Hyderabad,"Asian, North Indian, Continental, European, Bu...",2400.0,10.0,4.5,12.7,"Hitech City, Hyderabad"
...,...,...,...,...,...,...,...,...,...
2495,Farzi Cafe,"Cyber Hub, DLF Cyber City",Gurgaon,"North Indian, Fast Food, Pizza, Desserts",2000.0,10.0,4.3,18.8,"Cyber Hub, DLF Cyber City, Gurgaon"
2496,64/6 - Country Inn & Suites By Radisson,"Country Inn & Suites by Carlson, Sahibabad",Ghaziabad,"North Indian, Chinese, Thai, Continental",3500.0,20.0,4.6,13.4,"Country Inn & Suites by Carlson, Sahibabad, G..."
2497,Laidback Cafe,"Cyber Hub, DLF Cyber City",Gurgaon,"Asian, Oriental, Continental, Italian, Middle ...",1900.0,15.0,4.6,19.1,"Cyber Hub, DLF Cyber City, Gurgaon"
2498,Cafe Hawkers,Connaught Place,New Delhi,"North Indian, Chinese, Mexican, Fast Food, Des...",1600.0,15.0,4.2,1.5,"Connaught Place, New Delhi"


In [56]:
dataset.drop("Location", axis = 1, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset.drop("Location", axis = 1, inplace = True)


In [57]:
dataset

Unnamed: 0,Name,Area,City,Cuisine,Price for 2,Walk in Offer,Rating,Average distance
0,Tashi Cafe,Banjara Hills,Hyderabad,"North Indian, Pizza, Pasta, Italian, Chinese, ...",2000.0,10.0,4.3,5.8
1,Coffee Cup,Somajiguda,Hyderabad,"Cafe, Tea, Beverages, American, Pizza, Burger,...",950.0,10.0,4.2,6.5
2,Ofen,Banjara Hills,Hyderabad,"Cafe, Coffee, Sandwich, Italian, Mexican, Past...",1200.0,10.0,3.7,6.5
3,Shanghai - Flavours of China Town,S P Road,Secunderabad,"Chinese, Asian",1000.0,15.0,4.3,8.6
4,Red Rhino,Hitech City,Hyderabad,"Asian, North Indian, Continental, European, Bu...",2400.0,10.0,4.5,12.7
...,...,...,...,...,...,...,...,...
2495,Farzi Cafe,"Cyber Hub, DLF Cyber City",Gurgaon,"North Indian, Fast Food, Pizza, Desserts",2000.0,10.0,4.3,18.8
2496,64/6 - Country Inn & Suites By Radisson,"Country Inn & Suites by Carlson, Sahibabad",Ghaziabad,"North Indian, Chinese, Thai, Continental",3500.0,20.0,4.6,13.4
2497,Laidback Cafe,"Cyber Hub, DLF Cyber City",Gurgaon,"Asian, Oriental, Continental, Italian, Middle ...",1900.0,15.0,4.6,19.1
2498,Cafe Hawkers,Connaught Place,New Delhi,"North Indian, Chinese, Mexican, Fast Food, Des...",1600.0,15.0,4.2,1.5


In [58]:
dataset['Restaurant type'] = dataset['Cuisine'].str.split(',').str[0]

In [59]:
dataset

Unnamed: 0,Name,Area,City,Cuisine,Price for 2,Walk in Offer,Rating,Average distance,Restaurant type
0,Tashi Cafe,Banjara Hills,Hyderabad,"North Indian, Pizza, Pasta, Italian, Chinese, ...",2000.0,10.0,4.3,5.8,North Indian
1,Coffee Cup,Somajiguda,Hyderabad,"Cafe, Tea, Beverages, American, Pizza, Burger,...",950.0,10.0,4.2,6.5,Cafe
2,Ofen,Banjara Hills,Hyderabad,"Cafe, Coffee, Sandwich, Italian, Mexican, Past...",1200.0,10.0,3.7,6.5,Cafe
3,Shanghai - Flavours of China Town,S P Road,Secunderabad,"Chinese, Asian",1000.0,15.0,4.3,8.6,Chinese
4,Red Rhino,Hitech City,Hyderabad,"Asian, North Indian, Continental, European, Bu...",2400.0,10.0,4.5,12.7,Asian
...,...,...,...,...,...,...,...,...,...
2495,Farzi Cafe,"Cyber Hub, DLF Cyber City",Gurgaon,"North Indian, Fast Food, Pizza, Desserts",2000.0,10.0,4.3,18.8,North Indian
2496,64/6 - Country Inn & Suites By Radisson,"Country Inn & Suites by Carlson, Sahibabad",Ghaziabad,"North Indian, Chinese, Thai, Continental",3500.0,20.0,4.6,13.4,North Indian
2497,Laidback Cafe,"Cyber Hub, DLF Cyber City",Gurgaon,"Asian, Oriental, Continental, Italian, Middle ...",1900.0,15.0,4.6,19.1,Asian
2498,Cafe Hawkers,Connaught Place,New Delhi,"North Indian, Chinese, Mexican, Fast Food, Des...",1600.0,15.0,4.2,1.5,North Indian


In [60]:
dataset["Restaurant type"].value_counts()

Restaurant type
North Indian      736
Continental       320
Asian             209
Cafe              143
Chinese           135
South Indian      126
Bar Food          112
Italian            96
Finger Food        59
Pizza              50
European           48
Modern Indian      47
BBQ                37
American           36
Andhra             35
Japanese           28
Mediterranean      23
Sushi              22
Seafood            18
Biryani            18
Korean             17
Mexican            14
Goan               12
Coffee             11
Salad              11
Oriental           10
Middle Eastern      8
Fast Food           8
Thai                8
Bakery              8
Arabian             8
Turkish             7
North Eastern       7
Tibetan             6
Desserts            6
Hyderabadi          6
Burger              6
French              5
Sandwich            4
Vietnamese          4
Mughlai             4
Greek               4
Moroccan            3
Street Food         3
Bengali         

In [61]:
# Define a mapping of restaurant types to dining categories
dining_categories = {'Casual Dining': ['Cafe', 'Pizza', 'Bakery', 'Sandwich', 'Fast Food', 'Coffee'],
                     'Fine Dining': ['Italian', 'French', 'Modern Indian', 'European', 'Japanese','Greek', 'Mediterranean', 'Lebanese', 'Middle Eastern', 'Moroccan'],
                     'Regional Dining': ['North Indian', 'South Indian', 'Andhra', 'Goan', 'Mughlai','Biryani', 'Tamil', 'North Eastern', 'Tibetan'],
                     'Specialty Dining': ['BBQ', 'Sushi', 'Seafood', 'Korean', 'Thai', 'Vietnamese','Mexican', 'Oriental'],
                     'Thematic Dining': ['Continental', 'Asian', 'Chinese', 'Bar Food', 'American','Finger Food', 'Salad']}

# Create a reverse mapping for quick lookup
type_to_category = {type_: category for category, types in dining_categories.items() for type_ in types}

# Apply the lambda function to create a new column for dining type
dataset['Type of Dining'] = dataset['Restaurant type'].apply(lambda x: type_to_category.get(x, 'Other'))

In [62]:
dataset

Unnamed: 0,Name,Area,City,Cuisine,Price for 2,Walk in Offer,Rating,Average distance,Restaurant type,Type of Dining
0,Tashi Cafe,Banjara Hills,Hyderabad,"North Indian, Pizza, Pasta, Italian, Chinese, ...",2000.0,10.0,4.3,5.8,North Indian,Regional Dining
1,Coffee Cup,Somajiguda,Hyderabad,"Cafe, Tea, Beverages, American, Pizza, Burger,...",950.0,10.0,4.2,6.5,Cafe,Casual Dining
2,Ofen,Banjara Hills,Hyderabad,"Cafe, Coffee, Sandwich, Italian, Mexican, Past...",1200.0,10.0,3.7,6.5,Cafe,Casual Dining
3,Shanghai - Flavours of China Town,S P Road,Secunderabad,"Chinese, Asian",1000.0,15.0,4.3,8.6,Chinese,Thematic Dining
4,Red Rhino,Hitech City,Hyderabad,"Asian, North Indian, Continental, European, Bu...",2400.0,10.0,4.5,12.7,Asian,Thematic Dining
...,...,...,...,...,...,...,...,...,...,...
2495,Farzi Cafe,"Cyber Hub, DLF Cyber City",Gurgaon,"North Indian, Fast Food, Pizza, Desserts",2000.0,10.0,4.3,18.8,North Indian,Regional Dining
2496,64/6 - Country Inn & Suites By Radisson,"Country Inn & Suites by Carlson, Sahibabad",Ghaziabad,"North Indian, Chinese, Thai, Continental",3500.0,20.0,4.6,13.4,North Indian,Regional Dining
2497,Laidback Cafe,"Cyber Hub, DLF Cyber City",Gurgaon,"Asian, Oriental, Continental, Italian, Middle ...",1900.0,15.0,4.6,19.1,Asian,Thematic Dining
2498,Cafe Hawkers,Connaught Place,New Delhi,"North Indian, Chinese, Mexican, Fast Food, Des...",1600.0,15.0,4.2,1.5,North Indian,Regional Dining


In [63]:
dataset.drop("Restaurant type", axis = 1, inplace = True)

In [64]:
dataset

Unnamed: 0,Name,Area,City,Cuisine,Price for 2,Walk in Offer,Rating,Average distance,Type of Dining
0,Tashi Cafe,Banjara Hills,Hyderabad,"North Indian, Pizza, Pasta, Italian, Chinese, ...",2000.0,10.0,4.3,5.8,Regional Dining
1,Coffee Cup,Somajiguda,Hyderabad,"Cafe, Tea, Beverages, American, Pizza, Burger,...",950.0,10.0,4.2,6.5,Casual Dining
2,Ofen,Banjara Hills,Hyderabad,"Cafe, Coffee, Sandwich, Italian, Mexican, Past...",1200.0,10.0,3.7,6.5,Casual Dining
3,Shanghai - Flavours of China Town,S P Road,Secunderabad,"Chinese, Asian",1000.0,15.0,4.3,8.6,Thematic Dining
4,Red Rhino,Hitech City,Hyderabad,"Asian, North Indian, Continental, European, Bu...",2400.0,10.0,4.5,12.7,Thematic Dining
...,...,...,...,...,...,...,...,...,...
2495,Farzi Cafe,"Cyber Hub, DLF Cyber City",Gurgaon,"North Indian, Fast Food, Pizza, Desserts",2000.0,10.0,4.3,18.8,Regional Dining
2496,64/6 - Country Inn & Suites By Radisson,"Country Inn & Suites by Carlson, Sahibabad",Ghaziabad,"North Indian, Chinese, Thai, Continental",3500.0,20.0,4.6,13.4,Regional Dining
2497,Laidback Cafe,"Cyber Hub, DLF Cyber City",Gurgaon,"Asian, Oriental, Continental, Italian, Middle ...",1900.0,15.0,4.6,19.1,Thematic Dining
2498,Cafe Hawkers,Connaught Place,New Delhi,"North Indian, Chinese, Mexican, Fast Food, Des...",1600.0,15.0,4.2,1.5,Regional Dining


In [65]:
dataset.rename(columns = {'Average distance':'Average distance from City-Center'}, inplace = True)

In [66]:
dataset

Unnamed: 0,Name,Area,City,Cuisine,Price for 2,Walk in Offer,Rating,Average distance from City-Center,Type of Dining
0,Tashi Cafe,Banjara Hills,Hyderabad,"North Indian, Pizza, Pasta, Italian, Chinese, ...",2000.0,10.0,4.3,5.8,Regional Dining
1,Coffee Cup,Somajiguda,Hyderabad,"Cafe, Tea, Beverages, American, Pizza, Burger,...",950.0,10.0,4.2,6.5,Casual Dining
2,Ofen,Banjara Hills,Hyderabad,"Cafe, Coffee, Sandwich, Italian, Mexican, Past...",1200.0,10.0,3.7,6.5,Casual Dining
3,Shanghai - Flavours of China Town,S P Road,Secunderabad,"Chinese, Asian",1000.0,15.0,4.3,8.6,Thematic Dining
4,Red Rhino,Hitech City,Hyderabad,"Asian, North Indian, Continental, European, Bu...",2400.0,10.0,4.5,12.7,Thematic Dining
...,...,...,...,...,...,...,...,...,...
2495,Farzi Cafe,"Cyber Hub, DLF Cyber City",Gurgaon,"North Indian, Fast Food, Pizza, Desserts",2000.0,10.0,4.3,18.8,Regional Dining
2496,64/6 - Country Inn & Suites By Radisson,"Country Inn & Suites by Carlson, Sahibabad",Ghaziabad,"North Indian, Chinese, Thai, Continental",3500.0,20.0,4.6,13.4,Regional Dining
2497,Laidback Cafe,"Cyber Hub, DLF Cyber City",Gurgaon,"Asian, Oriental, Continental, Italian, Middle ...",1900.0,15.0,4.6,19.1,Thematic Dining
2498,Cafe Hawkers,Connaught Place,New Delhi,"North Indian, Chinese, Mexican, Fast Food, Des...",1600.0,15.0,4.2,1.5,Regional Dining


In [67]:
dataset.to_csv("30-08-2024-Zomato-cleaned.csv")