<a href="https://colab.research.google.com/github/erena29/Data-Analysis-SQL/blob/main/Supermarket%20Sales/Supermarket_Sales.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Supermarket Sales Data Analysis with SQL**

## Data Import and Database Setup

In [None]:
import pandas as pd
import sqlite3

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
sales = pd.read_csv('/content/drive/MyDrive/Dataset/supermarket/sales.csv', encoding='latin-1')

In [None]:
print(sales[sales.duplicated])

Empty DataFrame
Columns: [Order_ID, Order_Date, Ship_Date, Segment, City, State, Product_ID, Category, Sub_Category, Product_Name, Price, Quantity, Profit, Returns, Payment_Mode]
Index: []


In [None]:
!pip install prettytable==3.6.0

Collecting prettytable==3.6.0
  Downloading prettytable-3.6.0-py3-none-any.whl.metadata (25 kB)
Downloading prettytable-3.6.0-py3-none-any.whl (27 kB)
Installing collected packages: prettytable
  Attempting uninstall: prettytable
    Found existing installation: prettytable 3.12.0
    Uninstalling prettytable-3.12.0:
      Successfully uninstalled prettytable-3.12.0
Successfully installed prettytable-3.6.0


In [None]:
# Load the SQL extension
%load_ext sql

# Create a SQLite database
%sql sqlite://

In [None]:
%sql drop table if exists sales;
# Persist the DataFrames as tables in SQLi
%sql --persist sales

 * sqlite://
Done.
 * sqlite://


'Persisted sales'

## Sample Data

In [None]:
%%sql
SELECT * FROM sales LIMIT 5

 * sqlite://
Done.


index,Order_ID,Order_Date,Ship_Date,Segment,City,State,Product_ID,Category,Sub_Category,Product_Name,Price,Quantity,Profit,Returns,Payment_Mode
0,CA-2019-160304,2019-01-01,2019-01-07,Corporate,Gaithersburg,Maryland,FUR-BO-10004709,Furniture,Bookcases,"Bush Westfield Collection Bookcases, Medium Cherry Finish",73.94,1,28.2668,No,Online
1,CA-2019-160304,2019-01-02,2019-01-07,Corporate,Gaithersburg,Maryland,FUR-BO-10004709,Furniture,Bookcases,"Bush Westfield Collection Bookcases, Medium Cherry Finish",173.94,3,38.2668,No,Online
2,CA-2019-160304,2019-01-02,2019-01-07,Corporate,Gaithersburg,Maryland,TEC-PH-10000455,Technology,Phones,GE 30522EE2,231.98,2,67.2742,No,Cards
3,CA-2019-125206,2019-01-03,2019-01-05,Consumer,Los Angeles,California,OFF-ST-10003692,Office Supplies,Storage,Recycled Steel Personal File for Hanging File Folders,114.46,2,28.615,No,Online
4,US-2019-116365,2019-01-03,2019-01-08,Corporate,San Antonio,Texas,TEC-AC-10002217,Technology,Accessories,Imation Clip USB flash drive - 8 GB,30.08,2,-5.264,No,Online


## SQL Analysis

### **Total Transactions Each Month**

In [None]:
%%sql
WITH month_trans AS(
  SELECT
    strftime('%Y', Order_Date) AS Year,
    strftime('%m', Order_Date) AS Month,
    COUNT(*) AS Total_Transaction
  FROM(
      SELECT
        Order_ID,
        Order_Date
      FROM sales
      GROUP BY Order_ID,Order_Date)
  GROUP BY Year,Month)
SELECT
  Month,
  SUM(CASE WHEN Year = '2019' THEN Total_Transaction ELSE 0 END) AS Transaction_2019,
  SUM(CASE WHEN Year = '2020' THEN Total_Transaction ELSE 0 END) AS Transaction_2020
FROM month_trans
GROUP BY Month
ORDER BY Month

 * sqlite://
Done.


Month,Transaction_2019,Transaction_2020
1,49,69
2,45,53
3,86,118
4,89,116
5,108,118
6,97,133
7,96,111
8,90,111
9,192,226
10,105,147


### **Top 5 Transaction by Total Price**

In [None]:
%%sql
SELECT
  Order_ID,
  ROUND(SUM(Price),2) AS Total_Price
FROM sales
GROUP BY Order_ID
ORDER BY Total_Price DESC
LIMIT 5

 * sqlite://
Done.


Order_ID,Total_Price
US-2019-107440,9135.19
CA-2020-100111,8262.92
US-2019-143819,6125.82
CA-2020-138289,5963.7
CA-2020-143112,5594.14


### **Top 5 Sub-Categories with the Highest Sales in the Top 10 of Product Rankings**

In [None]:
%%sql

WITH Rank_Product AS (
  SELECT
    *,
    DENSE_RANK() OVER (ORDER BY Total_Product DESC) AS Product_Rank
  FROM (
    SELECT
      Product_Name,
      Sub_Category,
      Category,
      SUM(Quantity) AS Total_Product
    FROM sales
    GROUP BY Product_Name, Sub_Category, Category)
  ORDER BY Product_Rank
)

SELECT Sub_Category_Rank,Sub_Category
FROM (
    SELECT
      Sub_Category,
      Category,
      SUM(Total_Product) AS Total,
      ROW_NUMBER() OVER (ORDER BY SUM(Total_Product) DESC) AS Sub_Category_Rank
    FROM Rank_Product
    WHERE Product_Rank <= 10
    GROUP BY Sub_Category, Category)
WHERE Sub_Category_Rank <=5
ORDER BY Sub_Category_Rank


 * sqlite://
Done.


Sub_Category_Rank,Sub_Category
1,Binders
2,Fasteners
3,Paper
4,Envelopes
5,Art


### **Top Selling Product for Each Sub-Category**

In [None]:
%%sql
WITH Rank_Product AS (
  SELECT
    *,
    DENSE_RANK() OVER (PARTITION BY Sub_Category ORDER BY Total_Sales DESC) AS Product_Rank
  FROM(
    SELECT
      Sub_Category,
      Product_Name,
      SUM(Quantity) AS Total_Sales
    FROM sales
    GROUP BY Sub_Category,Product_Name)
)
SELECT
  Sub_Category,
  Product_Name
FROM Rank_Product
WHERE Product_Rank=1

 * sqlite://
Done.


Sub_Category,Product_Name
Accessories,Memorex Mini Travel Drive 16 GB USB 2.0 Flash Drive
Appliances,Staple holder
Art,Staples in misc. colors
Binders,Storex Dura Pro Binders
Bookcases,"Bush Westfield Collection Bookcases, Medium Cherry Finish"
Bookcases,O'Sullivan 4-Shelf Bookcase in Odessa Pine
Chairs,"Situations Contoured Folding Chairs, 4/Set"
Copiers,Hewlett Packard LaserJet 3310 Copier
Envelopes,Staple envelope
Fasteners,Staples


### **Top 3 Products by Average Order Quantity per Transaction**

In [None]:
%%sql
WITH Rank_Product AS (
  SELECT
    Product_Name,
    CEIL(AVG(Total_Order)) AS Average_Total_Order,
    DENSE_RANK() OVER (ORDER BY CEIL(AVG(Total_Order)) DESC) AS Product_Rank
  FROM (
    SELECT
      Order_ID,
      Product_Name,
      SUM(Quantity) AS Total_Order
    FROM sales
    GROUP BY Order_ID, Product_Name
  )
  GROUP BY Product_Name
)
SELECT
  Product_Rank,
  Product_Name,
  Average_Total_Order
FROM Rank_Product
WHERE Product_Rank <= 3
ORDER BY Product_Rank;

 * sqlite://
Done.


Product_Rank,Product_Name,Average_Total_Order
1,Xerox 1964,12.0
2,Sauder Camden County Collection Library,10.0
2,"Sanford Uni-Blazer View Highlighters, Chisel Tip, Yellow",10.0
2,Panasonic KX MB2061 Multifunction Printer,10.0
2,"Acco PRESSTEX Data Binder with Storage Hooks, Dark Blue, 14 7/8"" X 11""",10.0
3,Xerox 197,9.0
3,"Tennsco Lockers, Sand",9.0
3,Southworth 100% Cotton The Best Paper,9.0
3,"Redi-Strip #10 Envelopes, 4 1/8 x 9 1/2",9.0
3,Luxo Adjustable Task Clamp Lamp,9.0


### **Top 5 Sub-Categories by Profit for Each Year**

In [None]:
%%sql
WITH Rank_Sub_Category AS (
  SELECT
    *,
    ROW_NUMBER() OVER (PARTITION BY Year ORDER BY Total_Profit DESC) AS Sub_Category_Rank
  FROM(
      SELECT
        strftime('%Y', Order_Date) AS Year,
        Sub_Category,
        ROUND(SUM(Profit), 2) AS Total_Profit
      FROM sales
      GROUP BY Year,Sub_Category)
)

SELECT
  Sub_Category_Rank,
  MAX(CASE WHEN Year = '2019' THEN Sub_Category END) AS Sub_Category_2019,
  MAX(CASE WHEN Year = '2020' THEN Sub_Category END) AS Sub_Category_2020
FROM Rank_Sub_Category
WHERE Sub_Category_Rank <= 5
GROUP BY Sub_Category_Rank
ORDER BY Sub_Category_Rank

 * sqlite://
Done.


Sub_Category_Rank,Sub_Category_2019,Sub_Category_2020
1,Copiers,Copiers
2,Binders,Accessories
3,Accessories,Phones
4,Phones,Paper
5,Paper,Appliances


### **Sub-Categories with Negative Profit Each Year**

In [None]:
%%sql
SELECT
  strftime('%Y', Order_Date) AS Year,
  Sub_Category,
  ROUND(SUM(Profit), 2) AS Total_Profit
FROM sales
GROUP BY Sub_Category,Year
HAVING SUM(Profit) < 0
ORDER BY Year;

 * sqlite://
Done.


Year,Sub_Category,Total_Profit
2019,Supplies,-698.96
2019,Tables,-2950.94
2020,Bookcases,-583.63
2020,Machines,-2869.22
2020,Supplies,-955.31
2020,Tables,-8140.69


### **States with Reduced Profitability in 2020**

In [None]:
%%sql
SELECT
  State
FROM (
  SELECT
    State,
    SUM(CASE WHEN strftime('%Y', Order_Date) = '2019' THEN Profit ELSE 0 END) AS Profit_2019,
    SUM(CASE WHEN strftime('%Y', Order_Date) = '2020' THEN Profit ELSE 0 END) AS Profit_2020
  FROM sales
  GROUP BY State
)
WHERE Profit_2019>Profit_2020

 * sqlite://
Done.


State
Alabama
Colorado
District of Columbia
Idaho
Illinois
Indiana
Iowa
Maine
Michigan
Mississippi


### **Return Rate by Payment Mode**

In [None]:
%%sql
SELECT
  Payment_Mode,
  ROUND((COUNT(CASE WHEN Returns = 'Yes' THEN 1 END) * 100.0) / COUNT(*),2) AS Return_Rate
FROM sales
GROUP BY Payment_Mode
ORDER BY Return_Rate DESC

 * sqlite://
Done.


Payment_Mode,Return_Rate
Online,5.55
COD,4.69
Cards,4.05


### **Order Processing Time: Days to Ship**

In [None]:
%%sql
SELECT
  CAST(julianday(Ship_Date) - julianday(Order_Date) AS INT) AS Days_to_Ship,
  Count(*) AS Total
FROM sales
GROUP BY Days_to_Ship
ORDER BY Days_to_Ship

 * sqlite://
Done.


Days_to_Ship,Total
0,326
1,265
2,783
3,624
4,1552
5,1217
6,763
7,369
8,2
