# Lecture 13 (Postgres Docs)

In [1]:
%reload_ext sql

There's a new jupysql version available (0.10.14), you're running 0.10.0. To upgrade: pip install jupysql --upgrade


In [2]:
!psql -h localhost -c 'DROP DATABASE IF EXISTS lec13'
!psql -h localhost -c 'CREATE DATABASE lec13' 

NOTICE:  database "lec13" does not exist, skipping
DROP DATABASE
CREATE DATABASE


In [3]:
%sql postgresql://127.0.0.1:5432/lec13

# Window Functions

First set up the table.

In [5]:
%%sql
DROP TABLE IF EXISTS empsalary;
CREATE TABLE empsalary (
    depname TEXT,
    empno INTEGER,
    salary INTEGER
);

INSERT INTO empsalary VALUES
    ('develop',  11, 5200),
    ('develop',   7, 4200),
    ('develop',   9, 4500),
    ('develop',   8, 6000),
    ('develop',  10, 5200),
    ('personnel', 5, 3500),
    ('personnel', 2, 3900),
    ('sales',     3, 4800),
    ('sales',     1, 5000),
    ('sales',     4, 4800)
;

SELECT * FROM empsalary;

depname,empno,salary
develop,11,5200
develop,7,4200
develop,9,4500
develop,8,6000
develop,10,5200
personnel,5,3500
personnel,2,3900
sales,3,4800
sales,1,5000
sales,4,4800


In [21]:
%%sql
SELECT depname, empno,
       AVG(salary)
FROM empsalary
GROUP BY depname, empno;

depname,empno,avg
sales,4,4800.0
develop,11,5200.0
develop,7,4200.0
sales,1,5000.0
develop,8,6000.0
personnel,5,3500.0
personnel,2,3900.0
sales,3,4800.0
develop,9,4500.0
develop,10,5200.0


In [24]:
%%sql
SELECT depname, empno, salary, 
      sum(salary) OVER (ORDER BY salary)
    FROM empsalary;

depname,empno,salary,sum
personnel,5,3500,3500
personnel,2,3900,7400
develop,7,4200,11600
develop,9,4500,16100
sales,4,4800,25700
sales,3,4800,25700
sales,1,5000,30700
develop,11,5200,41100
develop,10,5200,41100
develop,8,6000,47100


## Window frames

In [None]:
%%sql
SELECT depname, salary, sum(salary) OVER () FROM empsalary;

In [None]:
%%sql
SELECT depname, salary, sum(salary) OVER (ORDER BY salary) FROM empsalary;

In [None]:
%%sql
SELECT depname, salary,
   sum(salary) OVER (
       ORDER BY salary
       RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
FROM empsalary;

In [None]:
%%sql
SELECT depname, salary,
       sum(salary) OVER (
           ORDER BY salary
           RANGE BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING)
FROM empsalary;

In [None]:
%%sql
SELECT depname, salary,
       sum(salary) OVER (
           PARTITION BY depname
           ORDER BY salary)
FROM empsalary;

### For slide display purposes

In [None]:
%%sql
SELECT
    depname, salary,
    sum(salary) OVER () AS a,
    sum(salary) OVER (ORDER BY salary) AS b,
/*    sum(salary) OVER (
      ORDER BY salary
      RANGE BETWEEN UNBOUNDED 
        PRECEDING AND CURRENT ROW) AS b, */
    sum(salary) OVER (PARTITION BY 
      depname ORDER BY salary) AS c
FROM empsalary;

# Recursive Queries

In [None]:
%%sql
WITH RECURSIVE t(n) AS (
    VALUES (10)
  UNION ALL
    SELECT n-1 FROM t WHERE n > 0 
)
SELECT n FROM t;

In [None]:
%%sql
/* A bit harder to understand */
WITH RECURSIVE t(n) AS (
    VALUES (1)
  UNION ALL
    SELECT n+1 FROM t WHERE n < 10
)
SELECT n FROM t;

In [27]:
%%sql
DROP TABLE IF EXISTS parts;
CREATE TABLE parts (
    part TEXT, sub_part TEXT, quantity INTEGER
);

INSERT INTO parts VALUES
    ('A', 'A.1', 1),
    ('A', 'A.2', 2),
    ('A', 'A.3', 3),
    ('B', 'B.1', 1),
    ('B', 'B.2', 2),
    ('B.2', 'B.2.i', 10),
    ('C', 'C.1', 1),
    ('C.1', 'C.1.i', 10),
    ('C.1.i', 'C.1.i.a', 10),
    ('C.1.i', 'C.1.i.b', 20),
    ('C.1.i.a', 'C.1.i.a.1', 30)
;

In [34]:
%config SqlMagic.displaylimit=15

In [35]:
%%sql 
SELECT * FROM parts;

part,sub_part,quantity
A,A.1,1
A,A.2,2
A,A.3,3
B,B.1,1
B,B.2,2
B.2,B.2.i,10
C,C.1,1
C.1,C.1.i,10
C.1.i,C.1.i.a,10
C.1.i,C.1.i.b,20


In [36]:
%%sql
WITH RECURSIVE included_parts(sub_part, part, quantity, depth) AS (
    SELECT sub_part, part, quantity, 0 FROM parts WHERE part = 'C'
  UNION ALL
    SELECT p.sub_part, p.part, p.quantity * pr.quantity, pr.depth + 1
    FROM included_parts pr, parts p
    WHERE p.part = pr.sub_part
)
SELECT sub_part, depth, SUM(quantity) as total_quantity
FROM included_parts
GROUP BY sub_part, depth
ORDER BY depth;

sub_part,depth,total_quantity
C.1,0,1
C.1.i,1,10
C.1.i.b,2,200
C.1.i.a,2,100
C.1.i.a.1,3,3000


# CTEs

In [None]:
%config SqlMagic.displaylimit=30

In [None]:
%%sql
DROP TABLE IF EXISTS orders;
CREATE TABLE orders (
    region TEXT,
    product TEXT,
    quantity INTEGER,
    amount FLOAT
);

INSERT INTO orders VALUES
    ('US', 'apple', 100, 175.40),
    ('US', 'apple', 80, 99.70),
    ('US', 'banana', 50, 80.60),
    ('US', 'cheerios', 1000, 2500.95),
    ('Canada', 'apple', 500, 198.00),
    ('Canada', 'banana', 20, 45.50),
    ('Mexico', 'apple', 1000, 1100.39),
    ('Mexico', 'cheerios', 200, 500.50),
    ('Mexico', 'cheerios', 400, 430.65),
    ('Australia', 'banana', 40, 80.00),
    ('Australia', 'banana', 50, 99.45),
    ('Australia', 'banana', 2000, 2100.60),
    ('Australia', 'banana', 60, 75.00),
    ('Australia', 'cheerios', 500, 760.94)
;

SELECT * FROM orders;

In [None]:
%%sql
WITH regional_sales AS (
    SELECT region, SUM(amount) AS total_sales
    FROM orders
    GROUP BY region
), top_regions AS (
    SELECT region
    FROM regional_sales
    WHERE total_sales > (SELECT SUM(total_sales)/10 FROM regional_sales)
)
SELECT region,
       product,
       SUM(quantity) AS product_units,
       SUM(amount) AS product_sales
FROM orders
WHERE region IN (SELECT region FROM top_regions)
GROUP BY region, product;