# Northwind Traders Analytics

- Connect to PostgreSQL database and load SQL extension
- Perform data exploration to view all base tables
- Create tables with common joins for quering throughout the rest of the project

In [4]:
%load_ext sql
%sql postgresql://postgres@localhost:5432/northwind

In [5]:
%%sql

SELECT table_name, table_type 
  FROM information_schema.tables 
 WHERE table_schema = 'public';

 * postgresql://postgres@localhost:5432/northwind
17 rows affected.


table_name,table_type
territories,BASE TABLE
order_details,BASE TABLE
employee_territories,BASE TABLE
us_states,BASE TABLE
customers,BASE TABLE
orders,BASE TABLE
employees,BASE TABLE
shippers,BASE TABLE
products,BASE TABLE
categories,BASE TABLE


In [6]:
%%sql 

CREATE OR REPLACE VIEW customer_orders AS 
SELECT c.*, o.order_id, o.employee_id, o.order_date, o.required_date, o.shipped_date, o.ship_via, o.freight, o.ship_name, o.ship_address, o.ship_city, o.ship_region, o.ship_postal_code, o.ship_country
  FROM customers AS c
  JOIN orders AS o 
    ON c.customer_id = o.customer_id;

CREATE OR REPLACE VIEW product_quantity_orders AS
SELECT p.*, od.quantity, od.discount, o.*
  FROM products AS p
  JOIN order_details AS od ON p.product_id = od.product_id
  JOIN orders AS o ON od.order_id = o.order_id
 ORDER BY p.product_id;

CREATE OR REPLACE VIEW employee_orders AS 
SELECT e.last_name, e.first_name, e.title, e.title_of_courtesy, e.birth_date, e.hire_date, address, city, region, postal_code, country, home_phone, extension, notes, reports_to, photo_path, o.*
  FROM employees AS e
  JOIN orders AS o ON e.employee_id = o.employee_id
 ORDER BY o.employee_id;

 * postgresql://postgres@localhost:5432/northwind
Done.
Done.
Done.


[]

## Employees Sales Performance

- Rank employees based on their total sales amount

In [21]:
%%sql

WITH aggregate_sales AS(
SELECT e.employee_id, SUM(od.unit_price * od.quantity * (1 - od.discount)) AS total_sales
  FROM employees AS e
  JOIN orders AS o ON e.employee_id = o.employee_id
  JOIN order_details AS od ON o.order_id = od.order_id
 GROUP BY e.employee_id)

SELECT employee_id, ROUND(total_sales::numeric,2) AS total_sales, RANK() OVER(ORDER BY total_sales DESC) AS employee_rank
  FROM aggregate_sales;

 * postgresql://postgres@localhost:5432/northwind
9 rows affected.


employee_id,total_sales,employee_rank
4,232890.85,1
3,202812.84,2
1,192107.6,3
2,166537.76,4
8,126862.28,5
7,124568.23,6
9,77308.07,7
6,73913.13,8
5,68792.28,9


- Calculated the total sales for each employee by multiplying unit price * quantity and subtracting any discounts at the order level
- Utilized ranking window function to identify high and low performers
- High performers are employees 4,3,1 and low performers are employees 9,6,5

## Monthly Sales Trends

- Visualize the progress of the sales and identify trends that might shape the company's future strategies
- Aggregate sales data at a monthly level and calculating a running total of sales by month
- Analyze the month-over-month sales growth rate

In [43]:
%%sql

WITH monthly_sales AS (
SELECT DATE_TRUNC('month', order_date) AS order_month, SUM(unit_price * quantity * (1 - discount)) AS monthly_sales
  FROM product_quantity_orders
 GROUP BY order_month
 ORDER BY order_month ASC)

SELECT order_month, ROUND(monthly_sales::numeric,2) AS monthly_sales, 
       ROUND(LAG(monthly_sales) OVER w::numeric,2) AS previous_month_sales,
       ROUND((monthly_sales::numeric -  LAG(monthly_sales) OVER w)::numeric / LAG(monthly_sales) OVER w::numeric * 100,2) AS MoM_growth_rate,
       ROUND(SUM(monthly_sales) OVER w::numeric,2) AS running_total
  FROM monthly_sales
WINDOW w AS (ORDER BY order_month);

 * postgresql://postgres@localhost:5432/northwind
23 rows affected.


order_month,monthly_sales,previous_month_sales,mom_growth_rate,running_total
1996-07-01 00:00:00-04:00,34863.66,,,34863.66
1996-08-01 00:00:00-04:00,31828.9,34863.66,-8.7,66692.56
1996-09-01 00:00:00-04:00,32996.87,31828.9,3.67,99689.43
1996-10-01 00:00:00-04:00,46904.67,32996.87,42.15,146594.11
1996-11-01 00:00:00-05:00,57033.17,46904.67,21.59,203627.27
1996-12-01 00:00:00-05:00,56575.6,57033.17,-0.8,260202.87
1997-01-01 00:00:00-05:00,76604.76,56575.6,35.4,336807.63
1997-02-01 00:00:00-05:00,48154.58,76604.76,-37.14,384962.21
1997-03-01 00:00:00-05:00,48200.63,48154.58,0.1,433162.84
1997-04-01 00:00:00-05:00,54061.93,48200.63,12.16,487224.77


## High Value Customers
- Identify customers with above-average order values
- Rank customers based on the quantity of above-average orders and highlight the top 5 customers

In [63]:
%%sql

WITH customer_order_totals AS (
SELECT c.customer_id, c.order_id, SUM(o.unit_price * o.quantity * (1 - o.discount)) AS order_total
  FROM customer_orders AS c
  JOIN order_details AS o ON c.order_id = o.order_id
 GROUP BY c.customer_id, c.order_id
 ORDER BY c.customer_id, c.order_id),

order_categorization AS (
SELECT customer_id, order_id, order_total,
       CASE
        WHEN order_total > AVG(order_total) OVER w THEN 'Above Average'
        WHEN order_total <= AVG(order_total) OVER w THEN 'Average/Below Average'
       END AS order_category
  FROM customer_order_totals
WINDOW w AS (ORDER BY order_total RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)),

customer_ranking AS (
SELECT customer_id, COUNT(DISTINCT order_id) AS above_avg_order_count,
       ROW_NUMBER() OVER(ORDER BY COUNT(DISTINCT order_id) DESC) AS customer_rnk
  FROM order_categorization
 WHERE order_category = 'Above Average'
 GROUP BY customer_id)

SELECT customer_id, above_avg_order_count
  FROM customer_ranking
 WHERE customer_rnk <=5;

 * postgresql://postgres@localhost:5432/northwind
5 rows affected.


customer_id,above_avg_order_count
ERNSH,26
SAVEA,26
QUICK,22
HUNGO,11
RATTC,10


The top 5 customers with the most above-average orders are ERNSH, SAVEA, QUICK, HUNGO & RATTC.