# Blinkit Analysis in Python

### importing liberies

In [2]:
import pandas as pd
import numpy as np
import matplotlib .pyplot as plt
import seaborn as sns

In [3]:
import plotly.express as px
from plotly.express import colors
import plotly .graph_objects as go
import plotly .io as pio
import plotly.colors as cloors
pio.templates.default = "plotly_white"

### Loading data

In [4]:
df = pd.read_csv(r"C:\Users\massm\OneDrive\New folder\python_in_VS\blinkit_data.csv")
df.head()

Unnamed: 0,Item Fat Content,Item Identifier,Item Type,Outlet Establishment Year,Outlet Identifier,Outlet Location Type,Outlet Size,Outlet Type,Item Visibility,Item Weight,Sales,Rating
0,Regular,FDX32,Fruits and Vegetables,2012,OUT049,Tier 1,Medium,Supermarket Type1,0.100014,15.1,145.4786,5.0
1,Low Fat,NCB42,Health and Hygiene,2022,OUT018,Tier 3,Medium,Supermarket Type2,0.008596,11.8,115.3492,5.0
2,Regular,FDR28,Frozen Foods,2010,OUT046,Tier 1,Small,Supermarket Type1,0.025896,13.85,165.021,5.0
3,Regular,FDL50,Canned,2000,OUT013,Tier 3,High,Supermarket Type1,0.042278,12.15,126.5046,5.0
4,Low Fat,DRI25,Soft Drinks,2015,OUT045,Tier 2,Small,Supermarket Type1,0.03397,19.6,55.1614,5.0


### Understanding the data

In [5]:
print("size of the data :",df.shape)

size of the data : (8523, 12)


In [6]:
df.columns

Index(['Item Fat Content', 'Item Identifier', 'Item Type',
       'Outlet Establishment Year', 'Outlet Identifier',
       'Outlet Location Type', 'Outlet Size', 'Outlet Type', 'Item Visibility',
       'Item Weight', 'Sales', 'Rating'],
      dtype='object')

In [7]:
df.dtypes

Item Fat Content              object
Item Identifier               object
Item Type                     object
Outlet Establishment Year      int64
Outlet Identifier             object
Outlet Location Type          object
Outlet Size                   object
Outlet Type                   object
Item Visibility              float64
Item Weight                  float64
Sales                        float64
Rating                       float64
dtype: object

### Data Cleaning

In [8]:
print(df["Item Fat Content"].unique())

['Regular' 'Low Fat' 'low fat' 'LF' 'reg']


In [9]:
df["Item Fat Content"] = df["Item Fat Content"].replace({"LF":"Low Fat",
                                                         "reg":"Regular",
                                                         "low fat":"Low Fat"})
print(df["Item Fat Content"].unique())

['Regular' 'Low Fat']


## Business Requirements

### KIP'S Requirements

In [10]:
# Total_sales
total_sales =df["Sales"].sum()

#Average_sales
Average_sales =df["Sales"].mean()

#No_of_items_Sold
no_of_items_sold =df["Sales"].count()

#Average_rating
Average_rating = df["Rating"].mean()

#Display
print(f"Total sales : ${total_sales:,.0f}")
print(f"Average sales : {Average_sales:,.1f}")
print(f"no of items sold : {no_of_items_sold:,.0f}")
print(f"average rating :{Average_rating:,.0f}")

Total sales : $1,201,681
Average sales : 141.0
no of items sold : 8,523
average rating :4


## Charts Requirements

### 1. Sales by fat content

In [11]:
df.tail()

Unnamed: 0,Item Fat Content,Item Identifier,Item Type,Outlet Establishment Year,Outlet Identifier,Outlet Location Type,Outlet Size,Outlet Type,Item Visibility,Item Weight,Sales,Rating
8518,Low Fat,NCT53,Health and Hygiene,1998,OUT027,Tier 3,Medium,Supermarket Type3,0.0,,164.5526,4.0
8519,Low Fat,FDN09,Snack Foods,1998,OUT027,Tier 3,Medium,Supermarket Type3,0.034706,,241.6828,4.0
8520,Low Fat,DRE13,Soft Drinks,1998,OUT027,Tier 3,Medium,Supermarket Type3,0.027571,,86.6198,4.0
8521,Regular,FDT50,Dairy,1998,OUT027,Tier 3,Medium,Supermarket Type3,0.107715,,97.8752,4.0
8522,Regular,FDM58,Snack Foods,1998,OUT027,Tier 3,Medium,Supermarket Type3,0.0,,112.2544,4.0


In [12]:
sales_by_fat =df.groupby("Item Fat Content")["Sales"].sum().reset_index()
fig = px.pie(sales_by_fat,values="Sales",names="Item Fat Content",
             hole = 0.5,color_discrete_sequence = px.colors.qualitative.Pastel)
fig.update_traces(textposition = "inside",textinfo ="percent+label")
fig.update_layout(title_text = "sales analysis by fat content",title_font=dict(size=24))
fig.show()

### 2.Total sales by items Types

In [13]:
sales_by_items = (df.groupby("Item Type")["Sales"]
                  .sum()
                  .reset_index()
                  .sort_values(by="Sales", ascending=False))

fig = px.bar(sales_by_items,x="Item Type",
             y="Sales",title="Total sales by items Types")

# Rotate x-axis labels to 90 degrees
fig.update_layout(
    xaxis_tickangle=90)

fig.show()

### 3.Fat content by Outlet for Total Sales

In [14]:
df.head()

Unnamed: 0,Item Fat Content,Item Identifier,Item Type,Outlet Establishment Year,Outlet Identifier,Outlet Location Type,Outlet Size,Outlet Type,Item Visibility,Item Weight,Sales,Rating
0,Regular,FDX32,Fruits and Vegetables,2012,OUT049,Tier 1,Medium,Supermarket Type1,0.100014,15.1,145.4786,5.0
1,Low Fat,NCB42,Health and Hygiene,2022,OUT018,Tier 3,Medium,Supermarket Type2,0.008596,11.8,115.3492,5.0
2,Regular,FDR28,Frozen Foods,2010,OUT046,Tier 1,Small,Supermarket Type1,0.025896,13.85,165.021,5.0
3,Regular,FDL50,Canned,2000,OUT013,Tier 3,High,Supermarket Type1,0.042278,12.15,126.5046,5.0
4,Low Fat,DRI25,Soft Drinks,2015,OUT045,Tier 2,Small,Supermarket Type1,0.03397,19.6,55.1614,5.0


In [15]:
fat_content_outlet = (
    df.groupby(["Outlet Location Type", "Item Fat Content"])["Sales"]
      .sum()
      .reset_index()
)


In [16]:
fat_content_outlet_pivot = fat_content_outlet.pivot(
    index="Outlet Location Type",
    columns="Item Fat Content",
    values="Sales"
)
fat_content_outlet_pivot

Item Fat Content,Low Fat,Regular
Outlet Location Type,Unnamed: 1_level_1,Unnamed: 2_level_1
Tier 1,215047.9126,121349.8994
Tier 2,254464.7734,138685.8682
Tier 3,306806.9924,165326.0348


In [17]:
fig = px.bar(
    fat_content_outlet,
    x="Outlet Location Type",
    y="Sales",
    color="Item Fat Content",
    title="Total Sales by Fat Content across Outlet Locations",
    barmode="group"  #  side-by-side bars
)                    # if you want stack then mention "stack"

fig.show()


### 4.Total sales by outlet Establishment

In [18]:
Sales_by_Outlet_Establishment = (
    df.groupby("Outlet Establishment Year", as_index=False)["Sales"]
      .sum()
      .sort_values("Outlet Establishment Year")
)

fig = px.line(
    Sales_by_Outlet_Establishment,
    x="Outlet Establishment Year",
    y="Sales",
    markers=True,
    title="Total Sales by Outlet Establishment Year"
)

fig.show()


### 5.Sales by outlet size

In [19]:
Sales_by_Outlet_size = df.groupby("Outlet Size")["Sales"].sum().reset_index()
fig = px.pie(Sales_by_Outlet_size,values="Sales",names="Outlet Size",
             hole = 0.5,color_discrete_sequence = px.colors.qualitative.Pastel)
fig.update_traces(textposition = "inside",textinfo ="percent+label")
fig.update_layout(title_text = "Sales_by_Outlet_size",title_font=dict(size=24))
fig.show()

### 6.Sales by Outlet Location

 **vartical bar chart**

In [21]:
Sales_by_Outlet_location = df.groupby("Outlet Location Type")["Sales"].sum().reset_index()
fig = px.bar(
  Sales_by_Outlet_location,
    x="Outlet Location Type",
    y="Sales",
    title="Outlet_Location_Type",
    text_auto=True
)
fig.show()


**Horizontal bar chart**

In [None]:
Sales_by_Outlet_location = (
    df.groupby("Outlet Location Type", as_index=False)["Sales"]
      .sum()
      .sort_values("Sales", ascending=True)  # best for horizontal
)

fig = px.bar(
    Sales_by_Outlet_location,
    x="Sales",                       
    y="Outlet Location Type",        
    title="Sales by Outlet Location Type",
    text_auto=True,
    orientation="h"                  #  horizontal bars
)

fig.show()
