## Grouping Sets and Pivoting Data

***

### Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random

import pyodbc

%matplotlib inline

pd.set_option('display.max_columns',None)
#pd.set_option('display.max_rows',None)
pd.set_option('display.width', 1000)
pd.option_context('float_format','{:.2f}'.format)

random.seed(0)
np.random.seed(0)
np.set_printoptions(suppress=True)

## Load Data from SQL database

### MSSQL

In [2]:
conn = pyodbc.connect('Driver={SQL Server};'
                      'Server=DESKTOP-ARQLULI\SQLEXPRESS2019;'
                      'Database=AdventureWorksLT2012;'
                      'Trusted_Connection=yes;')

## Challenge 1: Retrieve Regional Sales Totals

In [3]:
pd.read_sql_query("SELECT a.CountryRegion, a.StateProvince, SUM(soh.TotalDue) AS Revenue \
                   FROM SalesLT.Address AS a \
                   JOIN SalesLT.CustomerAddress AS ca \
                   ON a.AddressID = ca.AddressID \
                   JOIN SalesLT.Customer AS c \
                   ON ca.CustomerID = c.CustomerID \
                   JOIN SalesLT.SalesOrderHeader as soh \
                   ON c.CustomerID = soh.CustomerID \
                   GROUP BY ROLLUP(a.CountryRegion, a.StateProvince) \
                   ORDER BY a.CountryRegion, a.StateProvince;", conn)

Unnamed: 0,CountryRegion,StateProvince,Revenue
0,,,956303.5949
1,United Kingdom,,572496.5594
2,United Kingdom,England,572496.5594
3,United States,,383807.0355
4,United States,California,346517.6072
5,United States,Colorado,14017.9083
6,United States,Nevada,7330.8972
7,United States,New Mexico,15275.1977
8,United States,Utah,665.4251


In [4]:
pd.read_sql_query("SELECT a.CountryRegion, a.StateProvince,\
                   IIF(GROUPING_ID(a.CountryRegion) = 1 AND GROUPING_ID(a.StateProvince) = 1, 'Total', IIF(GROUPING_ID(a.StateProvince) = 1, a.CountryRegion + ' Subtotal', a.StateProvince + ' Subtotal')) AS Level,\
                   SUM(soh.TotalDue) AS Revenue \
                   FROM SalesLT.Address AS a \
                   JOIN SalesLT.CustomerAddress AS ca \
                   ON a.AddressID = ca.AddressID \
                   JOIN SalesLT.Customer AS c \
                   ON ca.CustomerID = c.CustomerID \
                   JOIN SalesLT.SalesOrderHeader as soh \
                   ON c.CustomerID = soh.CustomerID \
                   GROUP BY ROLLUP(a.CountryRegion, a.StateProvince) \
                   ORDER BY a.CountryRegion, a.StateProvince;", conn)

Unnamed: 0,CountryRegion,StateProvince,Level,Revenue
0,,,Total,956303.5949
1,United Kingdom,,United Kingdom Subtotal,572496.5594
2,United Kingdom,England,England Subtotal,572496.5594
3,United States,,United States Subtotal,383807.0355
4,United States,California,California Subtotal,346517.6072
5,United States,Colorado,Colorado Subtotal,14017.9083
6,United States,Nevada,Nevada Subtotal,7330.8972
7,United States,New Mexico,New Mexico Subtotal,15275.1977
8,United States,Utah,Utah Subtotal,665.4251


In [5]:
pd.read_sql_query("SELECT a.CountryRegion, a.StateProvince, a.City,\
                   CHOOSE (1 + GROUPING_ID(a.CountryRegion) + GROUPING_ID(a.StateProvince) + GROUPING_ID(a.City),\
                           a.City + ' Subtotal', a.StateProvince + ' Subtotal',\
                           a.CountryRegion + ' Subtotal', 'Total') AS Level,\
                   SUM(soh.TotalDue) AS Revenue\
                   FROM SalesLT.Address AS a\
                   JOIN SalesLT.CustomerAddress AS ca\
                   ON a.AddressID = ca.AddressID\
                   JOIN SalesLT.Customer AS c\
                   ON ca.CustomerID = c.CustomerID\
                   JOIN SalesLT.SalesOrderHeader as soh\
                   ON c.CustomerID = soh.CustomerID\
                   GROUP BY ROLLUP(a.CountryRegion, a.StateProvince, a.City)\
                   ORDER BY a.CountryRegion, a.StateProvince, a.City;", conn)

Unnamed: 0,CountryRegion,StateProvince,City,Level,Revenue
0,,,,Total,956303.5949
1,United Kingdom,,,United Kingdom Subtotal,572496.5594
2,United Kingdom,England,,England Subtotal,572496.5594
3,United Kingdom,England,Abingdon,Abingdon Subtotal,45.1995
4,United Kingdom,England,Cambridge,Cambridge Subtotal,2711.4098
5,United Kingdom,England,Gloucestershire,Gloucestershire Subtotal,70698.9922
6,United Kingdom,England,High Wycombe,High Wycombe Subtotal,608.1766
7,United Kingdom,England,Liverpool,Liverpool Subtotal,86222.8072
8,United Kingdom,England,London,London Subtotal,206736.1667
9,United Kingdom,England,Maidenhead,Maidenhead Subtotal,43.0437


## Challenge 2: Retrieve Customer Sales Revenue by Category

In [6]:
pd.read_sql_query("SELECT * FROM \
                   (SELECT cat.ParentProductCategoryName, cust.CompanyName, sod.LineTotal\
                    FROM SalesLT.SalesOrderDetail AS sod \
                    JOIN SalesLT.SalesOrderHeader AS soh ON sod.SalesOrderID = soh.SalesOrderID \
                    JOIN SalesLT.Customer AS cust ON soh.CustomerID = cust.CustomerID \
                    JOIN SalesLT.Product AS prod ON sod.ProductID = prod.ProductID \
                    JOIN SalesLT.vGetAllCategories AS cat ON prod.ProductcategoryID = cat.ProductCategoryID) AS catsales\
                   PIVOT (SUM(LineTotal) FOR ParentProductCategoryName \
                   IN ([Accessories], [Bikes], [Clothing], [Components])) AS pivotedsales \
                   ORDER BY CompanyName;",conn)

Unnamed: 0,CompanyName,Accessories,Bikes,Clothing,Components
0,Action Bicycle Specialists,1299.885268,76613.651796,2461.65725,9494.082
1,Aerobic Exercise Company,,,,1732.89
2,Bulk Discount Store,730.464,70597.284,851.562,1980.918
3,Central Bicycle Specialists,,,,31.584
4,Channel Outlet,216.0,,308.664,
5,Closest Bicycle Store,,20389.668,559.164108,8001.846
6,Coalition Bike Company,,529.4928,124.776,1201.938
7,Discount Tours,72.0,2041.188,341.058,72.882
8,Eastside Department Store,1220.235601,51096.054808,2772.229577,10594.848
9,Engineered Bike Systems,,2604.762,178.746,63.9


In [7]:
conn.close()

#### Python code done by Dennis Lam