This notebook will define the data model for super marker data, and then analyse this data. 
Finally, visual diagrams will be used to show the data inseights

In [0]:
-- Step 1: Create catalog if it does not exist
CREATE CATALOG IF NOT EXISTS supermarket_sales COMMENT 'This is super market sales catalog';

-- Step 2: Create a schema/database for bronze layer. The raw data will be stored in this layer.
CREATE SCHEMA IF NOT EXISTS `supermarket_sales`.`sales_bronze`
COMMENT 'This is a schema for bronze layer. The raw incoming data will be stored in this layer';

-- Step 3: Create a schema/database for silver layer. The cleansed data will be stored in this layer.
CREATE SCHEMA IF NOT EXISTS `supermarket_sales`.`sales_silver`
COMMENT 'This is a schema for silver layer. The cleansed data will be stored in this layer';



In [0]:
-- Step 4: Create a volume for incoming sales data (CSV files)
CREATE VOLUME IF NOT EXISTS `supermarket_sales`.`sales_bronze`.`raw_sales_data`
COMMENT 'This is a volume for incoming sales data'

As Step 5, upload the Sales.csv file to the newly created volume using Catalog exlorer. This file is available at 'dataset/super-market/sales.csv'

![create_volume.png](./create_volume.png "create_volume.png")

In [0]:
%sql
-- Step 6: Create a table in the bronze layer to store the incoming sales data using the CSV file based on the Volumne you have created earlier
-- CREATE TABLE IF NOT EXISTS `supermarket_sales`.`sales_bronze`.`incoming_sales_data` USING CSV
-- LOCATION 's3:/Volumes/supermarket_sales/sales_bronze/incoming_sales_data/sales.csv'

-- As its a test project, no cloud provider used but a local file is used for testing purpose. Please create table manually from Catalog by uploading the CSV file
DESCRIBE TABLE `supermarket_sales`.`sales_bronze`.`raw_sales_data`;

-- Describe the schema
-- DESCRIBE DATABASE EXTENDED `supermarket_sales`.`sales_bronze`;

col_name,data_type,comment
Invoice_ID,string,
Branch,string,
City,string,
Customer_Type,string,
Gender,string,
Product_Line,string,
Unit_Price,double,
Quantity,bigint,
Tax_5%,double,
Total,double,


In [0]:
SHOW DATABASES FROM supermarket_sales;

databaseName
default
information_schema
sales_bronze
sales_silver


In [0]:
-- SHOW VOLUMES from random.sales_bronze; 
DESCRIBE VOLUME supermarket_sales.sales_bronze.raw_sales_data;

name,catalog,database,owner,storage_location,volume_type,comment,securable_type,securable_kind
incoming_sales_data,supermarket_sales,sales_bronze,amit.dixit@impetus.com,,MANAGED,This is a volume for incoming sales data,VOLUME,VOLUME_DB_STORAGE


In [0]:
%sql
-- Step 7: Select 10 records
SELECT * FROM `supermarket_sales`.`sales_bronze`.`raw_sales_data` LIMIT 10

Invoice_ID,Branch,City,Customer_Type,Gender,Product_Line,Unit_Price,Quantity,Tax_5%,Total,Date,Time,Payment,cogs,gross Margin_Percentage,Gross_Income,Rating
750-67-8428,A,Yangon,Member,Female,Health and beauty,74.69,7,26.1415,548.9715,2019-01-05,2025-08-02T13:08:00.000Z,Ewallet,522.83,4.761904762,26.1415,9.1
226-31-3081,C,Naypyitaw,Normal,Female,Electronic accessories,15.28,5,3.82,80.22,2019-03-08,2025-08-02T10:29:00.000Z,Cash,76.4,4.761904762,3.82,9.6
631-41-3108,A,Yangon,Normal,Male,Home and lifestyle,46.33,7,16.2155,340.5255,2019-03-03,2025-08-02T13:23:00.000Z,Credit card,324.31,4.761904762,16.2155,7.4
123-19-1176,A,Yangon,Member,Male,Health and beauty,58.22,8,23.288,489.048,2019-01-27,2025-08-02T20:33:00.000Z,Ewallet,465.76,4.761904762,23.288,8.4
373-73-7910,A,Yangon,Normal,Male,Sports and travel,86.31,7,30.2085,634.3785,2019-02-08,2025-08-02T10:37:00.000Z,Ewallet,604.17,4.761904762,30.2085,5.3
699-14-3026,C,Naypyitaw,Normal,Male,Electronic accessories,85.39,7,29.8865,627.6165,2019-03-25,2025-08-02T18:30:00.000Z,Ewallet,597.73,4.761904762,29.8865,4.1
355-53-5943,A,Yangon,Member,Female,Electronic accessories,68.84,6,20.652,433.692,2019-02-25,2025-08-02T14:36:00.000Z,Ewallet,413.04,4.761904762,20.652,5.8
315-22-5665,C,Naypyitaw,Normal,Female,Home and lifestyle,73.56,10,36.78,772.38,2019-02-24,2025-08-02T11:38:00.000Z,Ewallet,735.6,4.761904762,36.78,8.0
665-32-9167,A,Yangon,Member,Female,Health and beauty,36.26,2,3.626,76.146,2019-01-10,2025-08-02T17:15:00.000Z,Credit card,72.52,4.761904762,3.626,7.2
692-92-5582,B,Mandalay,Member,Female,Food and beverages,54.84,3,8.226,172.746,2019-02-20,2025-08-02T13:27:00.000Z,Credit card,164.52,4.761904762,8.226,5.9


In [0]:
%sql
-- Step 8: Find the sum of gross income
SELECT SUM(Gross_Income) AS Average_Gross_Income FROM `supermarket_sales`.`sales_bronze`.`raw_sales_data`

Average_Gross_Income
15379.369000000002


In [0]:
%sql
-- Step 9: Select the average gross income by product line
SELECT City, Product_Line, AVG(Gross_Income) AS Average_Gross_Income
FROM `supermarket_sales`.`sales_bronze`.`raw_sales_data`
GROUP BY City, Product_Line;

City,Product_Line,Average_Gross_Income
Naypyitaw,Fashion accessories,15.794923076923078
Yangon,Sports and travel,15.635754237288136
Naypyitaw,Food and beverages,17.147803030303027
Naypyitaw,Electronic accessories,16.423354545454544
Mandalay,Health and beauty,17.952075471698112
Naypyitaw,Sports and travel,16.679288888888895
Naypyitaw,Home and lifestyle,14.704288888888891
Yangon,Food and beverages,14.09121551724138
Mandalay,Fashion accessories,12.60623387096774
Naypyitaw,Health and beauty,15.2155


In [0]:
SELECT * FROM `supermarket_sales`.`sales_bronze`.`raw_sales_data` LIMIT 10;

Invoice_ID,Branch,City,Customer_Type,Gender,Product_Line,Unit_Price,Quantity,Tax_5_Percentage,Total,Date,Time,Payment,COGS,Gross_Margin_Percentage,Gross_Income,Rating
750-67-8428,A,Yangon,Member,Female,Health and beauty,74.69,7,26.1415,548.9715,2019-01-05,2025-08-06T13:08:00.000Z,Ewallet,522.83,4.761904762,26.1415,9.1
226-31-3081,C,Naypyitaw,Normal,Female,Electronic accessories,15.28,5,3.82,80.22,2019-03-08,2025-08-06T10:29:00.000Z,Cash,76.4,4.761904762,3.82,9.6
631-41-3108,A,Yangon,Normal,Male,Home and lifestyle,46.33,7,16.2155,340.5255,2019-03-03,2025-08-06T13:23:00.000Z,Credit card,324.31,4.761904762,16.2155,7.4
123-19-1176,A,Yangon,Member,Male,Health and beauty,58.22,8,23.288,489.048,2019-01-27,2025-08-06T20:33:00.000Z,Ewallet,465.76,4.761904762,23.288,8.4
373-73-7910,A,Yangon,Normal,Male,Sports and travel,86.31,7,30.2085,634.3785,2019-02-08,2025-08-06T10:37:00.000Z,Ewallet,604.17,4.761904762,30.2085,5.3
699-14-3026,C,Naypyitaw,Normal,Male,Electronic accessories,85.39,7,29.8865,627.6165,2019-03-25,2025-08-06T18:30:00.000Z,Ewallet,597.73,4.761904762,29.8865,4.1
355-53-5943,A,Yangon,Member,Female,Electronic accessories,68.84,6,20.652,433.692,2019-02-25,2025-08-06T14:36:00.000Z,Ewallet,413.04,4.761904762,20.652,5.8
315-22-5665,C,Naypyitaw,Normal,Female,Home and lifestyle,73.56,10,36.78,772.38,2019-02-24,2025-08-06T11:38:00.000Z,Ewallet,735.6,4.761904762,36.78,8.0
665-32-9167,A,Yangon,Member,Female,Health and beauty,36.26,2,3.626,76.146,2019-01-10,2025-08-06T17:15:00.000Z,Credit card,72.52,4.761904762,3.626,7.2
692-92-5582,B,Mandalay,Member,Female,Food and beverages,54.84,3,8.226,172.746,2019-02-20,2025-08-06T13:27:00.000Z,Credit card,164.52,4.761904762,8.226,5.9


Databricks visualization. Run in Databricks to view.