In [0]:
%sql

SELECT b.city, sum(profit) / sum(amount) *100  as gross_margin_percent
FROM sales_order_detail a
JOIN sales_order_header b
ON a.order_id = b.order_id
GROUP BY city
ORDER BY gross_margin_percent desc

city,gross_margin_percent
Surat,19.698301113063856
Allahabad,18.27727353621641
Udaipur,18.15226225955026
Kolkata,17.748118699417862
Delhi,14.021343778728165
Thiruvananthapuram,13.90147856452931
Pune,13.55694274364565
Amritsar,12.07011315731085
Gangtok,7.600454890068234
Simla,7.569813062543272


In [0]:
%sql

SELECT category, sub_category, sum(profit) profit
FROM sales_order_detail a
GROUP BY category, sub_category
ORDER BY profit desc

category,sub_category,profit
Electronics,Printers,5964.0
Furniture,Bookcases,4888.0
Electronics,Accessories,3559.0
Clothing,Trousers,2847.0
Clothing,Stole,2559.0
Electronics,Phones,2207.0
Clothing,Hankerchief,2098.0
Clothing,T-shirt,1500.0
Clothing,Shirt,1131.0
Furniture,Furnishings,844.0


In [0]:
%sql

SELECT month(order_date) as month, category, sum(profit) as profit, sum(quantity) as quantity
FROM sales_order_detail a
JOIN sales_order_header b
ON a.order_id = b.order_id
GROUP BY month,category
ORDER BY profit,category,month

month,category,profit,quantity
4,Furniture,-3425.0,63
5,Electronics,-2523.0,116
9,Furniture,-2468.0,83
6,Electronics,-2078.0,82
6,Clothing,-2036.0,235
7,Electronics,-1633.0,40
9,Clothing,-1585.0,192
8,Electronics,-1548.0,98
10,Furniture,-1316.0,50
8,Clothing,-1075.0,276


## Query performance
pre-joined 'denormalized' table; rank by partition
## SQL API 
partion, rank
-- https://spark.apache.org/docs/latest/api/sql/index.html#dense_rank

In [0]:
%sql

SELECT 
distinct 
category, month(order_date) as month, city, sum(quantity) OVER (PARTITION BY category, city,month(order_date)) as quantity
--,* --to check details
from sales_denorm
where category='Clothing' and month(order_date)=1
order by category, month, quantity desc

-- select * from sales_denorm where category='Clothing' and month(order_date)=1 order by quantity desc -- 110 rows

category,month,city,quantity
Clothing,1,Indore,117
Clothing,1,Ahmedabad,48
Clothing,1,Delhi,48
Clothing,1,Allahabad,41
Clothing,1,Mumbai,39
Clothing,1,Kolkata,31
Clothing,1,Pune,26
Clothing,1,Chandigarh,23
Clothing,1,Hyderabad,19
Clothing,1,Surat,14


In [0]:
%sql
--so we can get target and actual sales amount; 
--get insights: check whether the target was met or not, potential forecast what could happen in near future at CATEGORY level

select t.category, t.year, t.month, t.target, a.total_sold_amount,
case 
  when t.target > a.total_sold_amount then 'not achieved'
  else 'achieved'
end as performance
FROM sales_target t
LEFT JOIN
(
  select category, month(order_date) as month, year(order_date) as year,sum(amount) as total_sold_amount
  from sales_denorm
  group by  category, year, month
) as a 
  on t.month = a.month
  and t.year = a.year
  and t.category=a.category
--where t.category='Furniture' and t.year=2018 and t.month=4

category,year,month,target,total_sold_amount,performance
Furniture,2018,4,10400.0,8121.0,not achieved
Furniture,2018,5,10500.0,6220.0,not achieved
Furniture,2018,6,10600.0,5532.0,not achieved
Furniture,2018,7,10800.0,3483.0,not achieved
Furniture,2018,8,10900.0,9538.0,not achieved
Furniture,2018,9,11000.0,8704.0,not achieved
Furniture,2018,10,11100.0,6766.0,not achieved
Furniture,2018,11,11300.0,15165.0,achieved
Furniture,2018,12,11400.0,9474.0,not achieved
Furniture,2019,1,11500.0,21257.0,achieved
