<div style="background-color: #F4A460; padding: 20px; border-radius: 10px; box-shadow: 5px 5px 10px #888888;">
  <h1 style="color: white; font-size: 30px; font-weight: bold; text-align: center; text-shadow: 2px 2px 4px #000000;">Swiggy Restaurant Analysis Notebook</h1>
</div>


In [57]:
# %% 
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import seaborn as sns

In [58]:
# Load the data
df = pd.read_csv("swiggy.csv")

In [59]:
# %% 
# Display basic information
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8680 entries, 0 to 8679
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   ID             8680 non-null   int64  
 1   Area           8680 non-null   object 
 2   City           8680 non-null   object 
 3   Restaurant     8680 non-null   object 
 4   Price          8680 non-null   float64
 5   Avg ratings    8680 non-null   float64
 6   Total ratings  8680 non-null   int64  
 7   Food type      8680 non-null   object 
 8   Address        8680 non-null   object 
 9   Delivery time  8680 non-null   int64  
dtypes: float64(2), int64(3), object(5)
memory usage: 678.3+ KB


In [60]:
# Check missing values
missing_values = df.isnull().sum()
missing_values[missing_values > 0]

Series([], dtype: int64)

In [61]:
# Check duplicates
duplicates = df.duplicated().sum()
print(f"Number of duplicate rows: {duplicates}")

Number of duplicate rows: 0


</div>
<div style="background-color: #F4A460; padding: 20px; border-radius: 10px; box-shadow: 5px 5px 10px #888888;">
  <h1 style="color: white; font-size: 30px; font-weight: bold; text-align: center; text-shadow: 2px 2px 4px #000000;">Data Overview</h1>
</div>


In [62]:
# Show first few rows
df.head()


Unnamed: 0,ID,Area,City,Restaurant,Price,Avg ratings,Total ratings,Food type,Address,Delivery time
0,211,Koramangala,Bangalore,Tandoor Hut,300.0,4.4,100,"Biryani,Chinese,North Indian,South Indian",5Th Block,59
1,221,Koramangala,Bangalore,Tunday Kababi,300.0,4.1,100,"Mughlai,Lucknowi",5Th Block,56
2,246,Jogupalya,Bangalore,Kim Lee,650.0,4.4,100,Chinese,Double Road,50
3,248,Indiranagar,Bangalore,New Punjabi Hotel,250.0,3.9,500,"North Indian,Punjabi,Tandoor,Chinese",80 Feet Road,57
4,249,Indiranagar,Bangalore,Nh8,350.0,4.0,50,"Rajasthani,Gujarati,North Indian,Snacks,Desser...",80 Feet Road,63


In [63]:
# Verify missing values after cleaning
df.isnull().sum()


ID               0
Area             0
City             0
Restaurant       0
Price            0
Avg ratings      0
Total ratings    0
Food type        0
Address          0
Delivery time    0
dtype: int64

In [64]:
# Number of records and columns
num_records = len(df)
num_columns = len(df.columns)
print(f"Number of records: {num_records}")
print(f"Number of columns: {num_columns}")

Number of records: 8680
Number of columns: 10


In [65]:
# Visualizing price range distribution
price_range_counts = df['Price'].value_counts()
price_pie = px.pie(price_range_counts, names=price_range_counts.index, values=price_range_counts.values)
price_range_counts.head()


Price
300.0    1776
200.0    1774
250.0     968
400.0     838
500.0     605
Name: count, dtype: int64

</div>
<div style="background-color: #F4A460; padding: 20px; border-radius: 10px; box-shadow: 5px 5px 10px #888888;">
  <h1 style="color: white; font-size: 30px; font-weight: bold; text-align: center; text-shadow: 2px 2px 4px #000000;">Data Analysis</h1>
</div>


In [66]:
# Top 10 areas with most restaurants
top_10_areas = df['Area'].value_counts().head(10)
fig = px.bar(top_10_areas, x=top_10_areas.index, y=top_10_areas.values, title='Top 10 Areas with Most Restaurants')

fig.update_xaxes(title_text='Area', title_font_color='white')  
fig.update_yaxes(title_text='Number of Restaurants', title_font_color='white')  

fig.update_layout(
    paper_bgcolor='black', 
    plot_bgcolor='black',
    font_color='white'    
)

fig.show()


In [67]:
# Popular food types by city
popular_food_types = df.groupby(['City', 'Food type'])['ID'].count().reset_index()
popular_food_types = popular_food_types.sort_values(by=['City', 'ID'], ascending=[True, False])
popular_food_types = popular_food_types.drop_duplicates(subset='City')

fig = px.bar(popular_food_types, x='City', y='ID', color='Food type',
             title='Most Popular Food Types Served by Swiggy Restaurants in Each City')

fig.update_xaxes(title_text='City', title_font=dict(color='white'))
fig.update_yaxes(title_text='Number of Restaurants', title_font=dict(color='white'))

fig.update_layout(
    paper_bgcolor='black',
    plot_bgcolor='black',
    font_color='white',
    title=dict(text='Most Popular Food Types Served by Swiggy Restaurants in Each City', font=dict(color='white'))
)

fig.show()

In [68]:
# Pie chart for rating distribution
above_45_ratings = df[df['Avg ratings'] > 4.5]
below_45_ratings = df[df['Avg ratings'] <= 4.5]
percentage_above_45 = (len(above_45_ratings) / len(df)) * 100
percentage_below_45 = 100 - percentage_above_45

rating_df = pd.DataFrame({'Rating': ['Above 4.5', '4.5 and Below'],
                          'Percentage': [percentage_above_45, percentage_below_45]})

fig = px.pie(rating_df, names='Rating', values='Percentage',
             title='Top Rated Swiggy Restaurants in Percentage ',
             hole=0.2,
            )

fig.update_traces(pull=[0.3, 0])
fig.show()

In [69]:
# Distribution of restaurant prices
fig = px.histogram(df, x='Price', title='Distribution of Restaurant Prices on Swiggy')

fig.update_traces(marker_color='red')

fig.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',  
    font_color='white' 
)

fig.show()

In [70]:
# Correlation matrix (Fixing the issue)
# We will exclude non-numeric columns before calculating the correlation matrix
numeric_df = df.select_dtypes(include=[np.number])  # Only numeric columns
correlation_matrix = numeric_df.corr()

fig = go.Figure(data=go.Heatmap(z=correlation_matrix, x=correlation_matrix.columns, y=correlation_matrix.columns))
fig.update_layout(title='Correlation Heatmap of Factors Affecting Average Rating')
fig.show()


In [71]:
# Scatter plot between Price and Avg ratings
fig = px.scatter(df, x='Price', y='Avg ratings', title='Correlation Between Restaurant Price and Average Rating', color_discrete_sequence=['red'])
fig.update_layout(template="plotly_dark") 

fig.show()