In [1]:
import pandas as pd
df = pd.read_csv('data/zomato.csv', encoding = 'latin1')
country = pd.read_excel('data/Country-Code.xlsx')

In [2]:
#1)	Which restaurant has the largest number of votes from the customer
# DONE
# Pull up the columns Restaurant Name and Votes. Sort values by order then get first value

votes = df.loc[:, ['Restaurant Name', 'Votes']].sort_values(by = 'Votes', ascending=False)
votes.iloc[0]

Restaurant Name     Toit
Votes              10934
Name: 728, dtype: object

In [3]:
#2)	Which city has more number of poor and not rated rating than very good rating in each country?
# DONE

# Combining the zomato with country code for easier access
country = country.rename(columns = {'Country Code': 'cc'})
combine = df.merge(country, on = "cc", how="inner")

# Locating the unique countries and cities
cities = combine['City'].unique()
countries = country['Country'].unique()

# For every city, go through each restaurant.
# Sum up the Not Rated, Poor, Very Good ratings.
# If Not Rated + Poor > Very Good, append to the list
bad_cities = []
for x in cities:
    one = df[ df['City'] == str(x)]
    very_good = len(one[(df['Rating text'] == "Very Good")])
    poor = len(one[(df['Rating text'] == "Poor")]) 
    not_rated = len (one[(df['Rating text'] == "Not rated")])
    if very_good < (poor + not_rated):
        bad_cities.append(str(x))

bad_cities



['Mc Millan',
 'Montville',
 'Faridabad',
 'Ghaziabad',
 'Gurgaon',
 'New Delhi',
 'Noida']

In [4]:
#3) Which city is costliest in each country? ( Assume all the currency are of same value)
# Go through each unique country and each unique city.
# Average the costs for the entire city. The highest average cost
# city will be selected for each country.

costcity = combine[['City', 'Average Cost for two', 'Country']]
costiest_city = []

for x in countries:
    one = costcity[ costcity['Country'] == str(x) ]
    name = ''
    number = 0
    for y in cities: 
        two = one[ one['City'] == str(y)]
        if( two['Average Cost for two'].mean() > number):
            name = y
            number = two['Average Cost for two'].mean()
    costiest_city.append(name)

costiest_city 

['Panchkula',
 'Paynesville',
 'Sí£o Paulo',
 'Vineland Station',
 'Jakarta',
 'Wellington City',
 'Pasay City',
 'Doha',
 'Singapore',
 'Inner City',
 'Colombo',
 'Ankara',
 'Dubai',
 'London',
 'Princeton']

In [5]:
#4) In india how many restaurants are present in each locality?
# DONE
# Filter by country India. For each locality, sum up the number of restaurants
locality = combine [ combine['Country'] == 'India']
locality['Locality'].value_counts()

Connaught Place                          122
Rajouri Garden                            99
Shahdara                                  87
Defence Colony                            86
Pitampura                                 85
Malviya Nagar                             85
Mayur Vihar Phase 1                       84
Rajinder Nagar                            81
Safdarjung                                80
Satyaniketan                              79
Krishna Nagar                             77
Sector 62                                 76
Karol Bagh                                76
Sector 18                                 75
Hauz Khas                                 74
Kalkaji                                   73
Sector 15                                 71
DLF Phase 3                               69
Mahipalpur                                69
Mukherjee Nagar                           68
Shalimar Bagh                             67
Chandni Chowk                             67
Greater Ka

In [6]:
#5) Which city has the most number of restaurants in each country
mostcity = combine[['City', 'Country']]
most_number = []

# For each country, go through its cities
# In each city, find the number of restaurants
# Select only the highest value for each city
# Append to list
for x in countries:
    one = combine[ combine['Country'] == str(x) ]
    name = ''
    number = 0
    for y in cities: 
        two = one[ one['City'] == str(y)]
        if( len(two) > number):
            name = y
            number = len(two)
    most_number.append(name)

most_number

['New Delhi',
 'Hepburn Springs',
 'Brasí_lia',
 'Chatham-Kent',
 'Jakarta',
 'Auckland',
 'Mandaluyong City',
 'Doha',
 'Singapore',
 'Cape Town',
 'Colombo',
 'Ankara',
 'Abu Dhabi',
 'Birmingham',
 'Albany']

In [7]:
#6) Which franchise has the highest number of Restaurants?
# DONE
# Sum up the number of Restaurant Names
franchise = df['Restaurant Name'].value_counts()
franchise.head(1)

Cafe Coffee Day    83
Name: Restaurant Name, dtype: int64

In [8]:
#7) How many Restaurants are accepting online orders?
# DONE
# Pull up the Online delivery column and sum up the Yes
len( df[ df["Has Online delivery"] == "Yes" ])

2451

In [9]:
#8) How many have a book table facility?
# DONE
# Pull up the Table booking column and sum up the Yes
len( df[ df["Has Table booking"] == "Yes" ])

1158

In [10]:
#9) Which location has the highest number of Restaurants?
# DONE
# Pull up the Location column and sum up each unique value
locations = df['City'].value_counts()
locations.head(1)

New Delhi    5473
Name: City, dtype: int64

In [11]:
#10) How many types of Restaurant types are there?
# DONE
# Go through the Cuisine column. Separate the individual cuisines.
# Append unique values to the list. Sum the list.
restaurants = []

for x in df['Cuisines']:
    cuisine = str(x).split(', ')
    for y in cuisine:
        if( y not in restaurants):
            restaurants.append(y)

len(restaurants)

146

In [12]:
#11) What is the most liked Restaurant?
# DONE
# Pull up the Restaurant Name and Votes, get the highest value votes
df[['Restaurant Name', 'Votes']].max()

Restaurant Name    íukuraÛôa SofrasÛ±
Votes                            10934
dtype: object

In [13]:
#12) What is the Average cost for 2 persons?
# DONE
# Sum up all the Average Cost for two then divide by total number of restaurant
df['Average Cost for two'].sum() / len(df['Average Cost for two'])

1199.2107632708617