# **Zomato Data Set Analysis and Visualization**


## Importing Libraries

In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## Reading CSV

In [3]:
df = pd.read_csv('../input/zomato-bangalore-restaurants/zomato.csv')
df.head()

In [6]:
df.shape

In [7]:
df.columns

In [8]:
df = df.drop(['url', 'address', 'phone', 'menu_item', 'dish_liked', 'reviews_list'], axis = 1)
df.head()

In [9]:
df.info()

## Dropping Duplicates

In [10]:
df.drop_duplicates(inplace = True)
df.shape

## Cleaning Rate Column

In [None]:
df['rate'].unique()

## Removing "NEW" ,  "-" and "/5" from Rate Column

In [11]:
def modifyrate(value):
    if(value=='NEW' or value=='-'):
        return np.nan
    else:
        value = str(value).split('/')
        value = value[0]
        return float(value)
    
df['rate'] = df['rate'].apply(modifyrate)
df['rate'].head()

## Filling Null Values in Rate Column with Mean

In [12]:
df['rate'].fillna(df['rate'].mean(), inplace = True)
df['rate'].isnull().sum()

In [13]:
df.info()

## Dropping Null Values

In [14]:
df.dropna(inplace = True)
df.head()

In [15]:
df.rename(columns = {'approx_cost(for two people)':'Cost2plates', 'listed_in(type)':'Type'}, inplace = True)
df.head()

In [16]:
df['location'].unique()

In [17]:
df['listed_in(city)'].unique()

## Listed in(city) and location, both are there, lets keep only one.

In [None]:
df=df.drop(['listed_in(city)'], axis=1)

In [21]:
df.head()

In [20]:
df['Cost2plates'].unique()

## Removing , from Cost2Plates Column

In [22]:
def handlecomma(value):
    value = str(value)
    if ',' in value:
        value = value.replace(',', '')
        return float(value)
    else:
        return float(value)
    
df['Cost2plates'] = df['Cost2plates'].apply(handlecomma)
df['Cost2plates'].unique()
        

In [23]:
df.head()

## Cleaning Rest Type Column

In [24]:
df['rest_type'].value_counts()

In [26]:
rest_types = df['rest_type'].value_counts(ascending  = False)
rest_types

In [27]:
rest_types_lessthan1000 = rest_types[rest_types<1000]
rest_types_lessthan1000

## Making Rest Types less than 1000 in frequency as others

In [28]:
def handle_rest_type(value):
    if(value in rest_types_lessthan1000):
        return 'others'
    else:
        return value
        
df['rest_type'] = df['rest_type'].apply(handle_rest_type)
df['rest_type'].value_counts()
        

## Cleaning Location Column

In [29]:
df.head()

In [30]:
location = df['location'].value_counts(ascending  = False)

location_lessthan300 = location[location<300]



def handle_location(value):
    if(value in location_lessthan300):
        return 'others'
    else:
        return value
        
df['location'] = df['location'].apply(handle_location)
df['location'].value_counts()

## Cleaning Cuisines Column

In [31]:
cuisines = df['cuisines'].value_counts(ascending  = False)


cuisines_lessthan100 = cuisines[cuisines<100]



def handle_cuisines(value):
    if(value in cuisines_lessthan100):
        return 'others'
    else:
        return value
        
df['cuisines'] = df['cuisines'].apply(handle_cuisines)
df['cuisines'].value_counts()


In [32]:
df.head()

## **Data is Clean, Lets jump to Visualization**

## Count Plot of Various Locations

In [33]:
plt.figure(figsize = (16,10))
ax = sns.countplot(df['location'])
plt.xticks(rotation=90)

## Visualizing Online Order

In [34]:
plt.figure(figsize = (6,6))
sns.countplot(df['online_order'], palette = 'inferno')

## Visualizing Book Table

In [38]:
plt.figure(figsize = (6,6))
sns.countplot(df['book_table'], palette = 'rainbow')

## Visualizing Online Order vs Rate

In [35]:
plt.figure(figsize = (6,6))
sns.boxplot(x = 'online_order', y = 'rate', data = df)

## Visualizing Book Table vs Rate

In [36]:
plt.figure(figsize = (6,6))
sns.boxplot(x = 'book_table', y = 'rate', data = df)

## Visualizing Online Order Facility, Location Wise

In [37]:
df1 = df.groupby(['location','online_order'])['name'].count()
df1.to_csv('location_online.csv')
df1 = pd.read_csv('location_online.csv')
df1 = pd.pivot_table(df1, values=None, index=['location'], columns=['online_order'], fill_value=0, aggfunc=np.sum)
df1

In [None]:
df1.plot(kind = 'bar', figsize = (15,8))

## Visualizing Book Table Facility, Location Wise

In [39]:
df2 = df.groupby(['location','book_table'])['name'].count()
df2.to_csv('location_booktable.csv')
df2 = pd.read_csv('location_booktable.csv')
df2 = pd.pivot_table(df2, values=None, index=['location'], columns=['book_table'], fill_value=0, aggfunc=np.sum)
df2

In [40]:
df2.plot(kind = 'bar', figsize = (15,8))

## Visualizing Types of Restaurents vs Rate 

In [41]:
plt.figure(figsize = (14, 8))
sns.boxplot(x = 'Type', y = 'rate', data = df, palette = 'inferno')

## Grouping Types of Restaurents, location wise

In [42]:
df3 = df.groupby(['location','Type'])['name'].count()
df3.to_csv('location_Type.csv')
df3 = pd.read_csv('location_Type.csv')
df3 = pd.pivot_table(df3, values=None, index=['location'], columns=['Type'], fill_value=0, aggfunc=np.sum)
df3

In [47]:
df3.plot(kind = 'bar', figsize = (36,8))

## No. of Votes, Location Wise

In [43]:
df4 = df[['location', 'votes']]
df4.drop_duplicates()
df5 = df4.groupby(['location'])['votes'].sum()
df5 = df5.to_frame()
df5 = df5.sort_values('votes', ascending=False)
df5.head()

In [48]:
plt.figure(figsize = (15,8))
sns.barplot(df5.index , df5['votes'])
plt.xticks(rotation = 90)

In [49]:
df.head()

## Visualizing Top Cuisines

In [44]:
df6 = df[['cuisines', 'votes']]
df6.drop_duplicates()
df7 = df6.groupby(['cuisines'])['votes'].sum()
df7 = df7.to_frame()
df7 = df7.sort_values('votes', ascending=False)
df7.head()

In [45]:
df7 = df7.iloc[1:, :]
df7.head()

In [46]:
plt.figure(figsize = (15,8))
sns.barplot(df7.index , df7['votes'])
plt.xticks(rotation = 90)