# LEVEL - 2 TASK - 1
 __Task: Table Booking and Online Delivery__

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Loaing the dataset

data = pd.read_csv("cognify.csv")

In [3]:
# Exploring the dataset

print("Number of rows and columns:")
print(data.shape)

Number of rows and columns:
(9551, 21)


In [4]:
# Checking for missing values

print("\nMissing values in each column:")
print(data.isnull().sum())


Missing values in each column:
Restaurant ID           0
Restaurant Name         0
Country Code            0
City                    0
Address                 0
Locality                0
Locality Verbose        0
Longitude               0
Latitude                0
Cuisines                9
Average Cost for two    0
Currency                0
Has Table booking       0
Has Online delivery     0
Is delivering now       0
Switch to order menu    0
Price range             0
Aggregate rating        0
Rating color            0
Rating text             0
Votes                   0
dtype: int64


In [7]:
print(data.columns)

Index(['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Address',
       'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines',
       'Average Cost for two', 'Currency', 'Has Table booking',
       'Has Online delivery', 'Is delivering now', 'Switch to order menu',
       'Price range', 'Aggregate rating', 'Rating color', 'Rating text',
       'Votes'],
      dtype='object')


In [10]:
#Determining the percentage of restaurants that offer table booking and online delivery

data['Has Table booking'] = data['Has Table booking'].map({'Yes': 1, 'No': 0})
data['Has Online delivery'] = data['Has Online delivery'].map({'Yes': 1, 'No': 0})

total_restaurants = len(data)
table_booking_percentage = (data['Has Table booking'].sum() / total_restaurants) * 100
online_delivery_percentage = (data['Has Online delivery'].sum() / total_restaurants) * 100

print("Percentage of restaurants offering table booking:", table_booking_percentage)
print("Percentage of restaurants offering online delivery:", online_delivery_percentage)


Percentage of restaurants offering table booking: 12.124384881164275
Percentage of restaurants offering online delivery: 25.662234321013504


In [12]:
#Comparing the average ratings of restaurants with and without table booking.

avg_rating_with_table_booking = data[data['Has Table booking'] == 'Yes']['Aggregate rating'].mean()
avg_rating_without_table_booking = data[data['Has Table booking'] == 'No']['Aggregate rating'].mean()

print("Average rating of restaurants with table booking:", avg_rating_with_table_booking)
print("Average rating of restaurants without table booking:", avg_rating_without_table_booking)

Average rating of restaurants with table booking: nan
Average rating of restaurants without table booking: nan


In [17]:
#Analyzing the availability of online delivery among restaurants with different price ranges.

price_ranges = data['Price range'].unique()
for price_range in price_ranges:
    online_delivery_percentage = (data[data['Price range'] == price_range]['Has Online delivery'].sum() / len(data[data['Price range'] == price_range])) * 100
    print("Percentage of restaurants with online delivery in price range", price_range, ":", online_delivery_percentage)

Percentage of restaurants with online delivery in price range 3 : 29.19034090909091
Percentage of restaurants with online delivery in price range 4 : 9.044368600682594
Percentage of restaurants with online delivery in price range 2 : 41.310632830067455
Percentage of restaurants with online delivery in price range 1 : 15.774077407740775


# LEVEL - 2 TASK - 2
__Task: Price Range Analysis__

In [14]:
#Determining the most common price range

most_common_price_range = data['Price range'].mode()[0]

In [15]:
#Calculating the average rating for each price range

average_rating_by_price_range = data.groupby('Price range')['Aggregate rating'].mean()

In [16]:
#Identifying the color that represents the highest average rating among different price ranges

highest_avg_rating_price_range = average_rating_by_price_range.idxmax()
color_for_highest_avg_rating = data[data['Price range'] == highest_avg_rating_price_range]['Rating color'].iloc[0]

In [18]:
# Calculating the number of restaurants in each price range

restaurant_count_by_price_range = data['Price range'].value_counts()

In [19]:
# Finding the price range with the highest number of restaurants

most_restaurants_price_range = restaurant_count_by_price_range.idxmax()

In [20]:
# Calculating the median rating for each price range

median_rating_by_price_range = data.groupby('Price range')['Aggregate rating'].median()

In [21]:
# Displaying results

print("Most common price range among all restaurants:", most_common_price_range)
print("\nAverage rating for each price range:")
print(average_rating_by_price_range)
print("\nMedian rating for each price range:")
print(median_rating_by_price_range)
print("\nColor representing the highest average rating among different price ranges:", color_for_highest_avg_rating)

print("\nNumber of restaurants in each price range:")
print(restaurant_count_by_price_range)
print("\nPrice range with the highest number of restaurants:", most_restaurants_price_range)

Most common price range among all restaurants: 1

Average rating for each price range:
Price range
1    1.999887
2    2.941054
3    3.683381
4    3.817918
Name: Aggregate rating, dtype: float64

Median rating for each price range:
Price range
1    2.9
2    3.3
3    3.8
4    3.9
Name: Aggregate rating, dtype: float64

Color representing the highest average rating among different price ranges: Green

Number of restaurants in each price range:
Price range
1    4444
2    3113
3    1408
4     586
Name: count, dtype: int64

Price range with the highest number of restaurants: 1


# LEVEL - 2 TASK - 3
__Task: Feature Engineering__

In [22]:
#Extracting Additional Features

data['Restaurant Name Length'] = data['Restaurant Name'].apply(len)
data['Address Length'] = data['Address'].apply(len)

In [23]:
#Creating New Features

data['Has Table Booking Binary'] = data['Has Table booking'].map({'Yes': 1, 'No': 0})
data['Has Online Delivery Binary'] = data['Has Online delivery'].map({'Yes': 1, 'No': 0})

In [24]:
#Encoding Categorical Variables

data = pd.get_dummies(data, columns=['City', 'Cuisines'])

In [25]:
#Normalizing Numerical Features

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
data[['Average Cost for two', 'Votes']] = scaler.fit_transform(data[['Average Cost for two', 'Votes']])

In [26]:
#Deriving Statistical Features
# Calculating statistical features like mean, median, and standard deviation for numerical columns

numerical_columns = ['Average Cost for two', 'Votes']
data['Mean Cost'] = data[numerical_columns].mean(axis=1)
data['Median Votes'] = data[numerical_columns].median(axis=1)
data['Std Dev Cost'] = data[numerical_columns].std(axis=1)

In [27]:
# Displaying the updated dataset

print(data.head())

   Restaurant ID         Restaurant Name  Country Code  \
0        6317637        Le Petit Souffle           162   
1        6304287        Izakaya Kikufuji           162   
2        6300002  Heat - Edsa Shangri-La           162   
3        6318506                    Ooma           162   
4        6314302             Sambo Kojin           162   

                                             Address  \
0  Third Floor, Century City Mall, Kalayaan Avenu...   
1  Little Tokyo, 2277 Chino Roces Avenue, Legaspi...   
2  Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...   
3  Third Floor, Mega Fashion Hall, SM Megamall, O...   
4  Third Floor, Mega Atrium, SM Megamall, Ortigas...   

                                     Locality  \
0   Century City Mall, Poblacion, Makati City   
1  Little Tokyo, Legaspi Village, Makati City   
2  Edsa Shangri-La, Ortigas, Mandaluyong City   
3      SM Megamall, Ortigas, Mandaluyong City   
4      SM Megamall, Ortigas, Mandaluyong City   

                   

In [None]:
*