In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [2]:
df = pd.read_csv(r"C:\Users\hp\Desktop\Machine Learning\Projects\23. Supermarket Sales Data Analysis\sales.csv")

In [3]:
df.head()

Unnamed: 0,sale_id,branch,city,customer_type,gender,product_name,product_category,unit_price,quantity,tax,total_price,reward_points
0,1,A,New York,Member,Male,Shampoo,Personal Care,5.5,3,1.16,17.66,1
1,2,B,Los Angeles,Normal,Female,Notebook,Stationery,2.75,10,1.93,29.43,0
2,3,A,New York,Member,Female,Apple,Fruits,1.2,15,1.26,19.26,1
3,4,A,Chicago,Normal,Male,Detergent,Household,7.8,5,2.73,41.73,0
4,5,B,Los Angeles,Member,Female,Orange Juice,Beverages,3.5,7,1.72,26.22,2


In [4]:
df.tail()

Unnamed: 0,sale_id,branch,city,customer_type,gender,product_name,product_category,unit_price,quantity,tax,total_price,reward_points
995,996,A,New York,Member,Female,Shampoo,Stationery,1.55,11,1.19,18.24,1
996,997,A,New York,Member,Male,Detergent,Personal Care,2.44,7,1.2,18.28,1
997,998,A,New York,Member,Female,Shampoo,Stationery,17.92,2,2.51,38.35,3
998,999,A,New York,Member,Female,Shampoo,Beverages,17.41,4,4.87,74.51,7
999,1000,A,New York,Normal,Male,Orange Juice,Stationery,4.11,4,1.15,17.59,0


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   sale_id           1000 non-null   int64  
 1   branch            1000 non-null   object 
 2   city              1000 non-null   object 
 3   customer_type     1000 non-null   object 
 4   gender            1000 non-null   object 
 5   product_name      1000 non-null   object 
 6   product_category  1000 non-null   object 
 7   unit_price        1000 non-null   float64
 8   quantity          1000 non-null   int64  
 9   tax               1000 non-null   float64
 10  total_price       1000 non-null   float64
 11  reward_points     1000 non-null   int64  
dtypes: float64(3), int64(3), object(6)
memory usage: 93.9+ KB


In [6]:
branch_city_sales = df.groupby(["branch", "city"])["total_price"].sum().reset_index()
fig1 = px.bar(
    branch_city_sales,
    x="branch",
    y="total_price",
    color="city",
    title="Sales Distribution by Branch and City",
    text="total_price",
    color_discrete_sequence=px.colors.qualitative.Bold
)
fig1.update_layout(
    template="plotly_dark",
    xaxis_title="Branch",
    yaxis_title="Total Sales",
    font=dict(color="white"),
    plot_bgcolor="black",
    paper_bgcolor="black"
)
fig1.show()

In [7]:
city_sales = df.groupby("city")["total_price"].sum().reset_index()
fig2 = px.bar(
    city_sales,
    x="city",
    y="total_price",
    title="Total Sales by City",
    text="total_price",
    color="total_price",
    color_continuous_scale="reds"
)
fig2.update_layout(
    template="plotly_dark",
    xaxis_title="City",
    yaxis_title="Total Sales",
    font=dict(color="white"),
    plot_bgcolor="black",
    paper_bgcolor="black"
)
fig2.show()

In [8]:
branch_city_avg = df.groupby(["branch", "city"])[["unit_price", "total_price"]].mean().reset_index()
fig3 = px.bar(
    branch_city_avg,
    x="branch",
    y="unit_price",
    color="city",
    title="Average Unit Price per Branch/City",
    text="unit_price",
    barmode="group",
    color_discrete_sequence=px.colors.qualitative.Dark24
)
fig3.update_layout(
    template="plotly_dark",
    xaxis_title="Branch",
    yaxis_title="Average Unit Price",
    font=dict(color="white"),
    plot_bgcolor="black",
    paper_bgcolor="black"
)
fig3.show()

2. 👥 Customer Analysis:

In [18]:
customer_sales = df.groupby("customer_type")["total_price"].sum().reset_index()
fig1 = px.pie(
    customer_sales,
    values="total_price",
    names="customer_type",
    title="Sales Distribution by Customer Type",
    color_discrete_sequence=px.colors.sequential.RdBu
)
fig1.update_layout(
    template="plotly_dark",
    font=dict(color="white"),
    plot_bgcolor="black",
    paper_bgcolor="black"
)
fig1.show()

In [20]:
gender_quantity = df.groupby("gender")["quantity"].sum().reset_index()
fig2 = px.bar(
    gender_quantity,
    x="gender",
    y="quantity",
    title="Total Quantity Purchased by Gender",
    text="quantity",
    color="gender",
    color_discrete_sequence=px.colors.qualitative.Vivid
)
fig2.update_layout(
    template="plotly_dark",
    xaxis_title="Gender",
    yaxis_title="Total Quantity",
    font=dict(color="white"),
    plot_bgcolor="black",
    paper_bgcolor="black"
)
fig2.show()

#### Average spending by gender

In [23]:
gender_spending = df.groupby("gender")["total_price"].mean().reset_index()
fig3 = px.bar(
    gender_spending,
    x="gender",
    y="total_price",
    title="Average Spending by Gender",
    text="total_price",
    color="gender",
    color_discrete_sequence=px.colors.qualitative.Pastel
)
fig3.update_layout(
    template="plotly_dark",
    xaxis_title="Gender",
    yaxis_title="Average Spending (Total Price)",
    font=dict(color="white"),
    plot_bgcolor="black",
    paper_bgcolor="black"
)
fig3.show()