In [1]:
import psycopg2
%load_ext sql

### Connect to the local database

In [2]:
DB_ENDPOINT = "127.0.0.1"
DB = 'pagila'
DB_USER = 'postgres'
DB_PASSWORD = 'password'
DB_PORT = '5432'

# postgresql://username:password@host:port/database
conn_string = "postgresql://{}:{}@{}:{}/{}" \
                        .format(DB_USER, DB_PASSWORD, DB_ENDPOINT, DB_PORT, DB)

print(conn_string)

postgresql://postgres:password@127.0.0.1:5432/pagila


In [3]:
%sql $conn_string

### ERD for the Star Schema

![Dimensional Model](Dimension-Model-Schema.jpg)

### Grouping Sets

It happens often that for 3 dimensions, you want to aggregate a fact:

- by nothing (total)
- then by the 1st dimension
- then by the 2nd
- then by the 3rd
- then by the 1st and 2nd
- then by the 2nd and 3rd
- then by the 1st and 3rd
- then by the 1st and 2nd and 3rd

#### Total Revenue

In [4]:
%%sql
SELECT sum(f.sales_amount) as revenue
FROM factsales f

 * postgresql://postgres:***@127.0.0.1:5432/pagila
1 rows affected.


revenue
67416.51


#### Revenue by Country

In [5]:
%%sql
SELECT s.country, sum(f.sales_amount) as revenue
FROM factsales f 
JOIN dimstore s ON (f.store_key = s.store_key)
GROUP BY s.country
ORDER BY  revenue desc
LIMIT 20

 * postgresql://postgres:***@127.0.0.1:5432/pagila
2 rows affected.


country,revenue
Australia,33726.77
Canada,33689.74


#### Revenue Total, by Month, by Country, by Month & Country All in one shot

In [6]:
%%sql
SELECT t.month, s.country, sum(f.sales_amount) as revenue
FROM factsales f 
JOIN dimdate t ON (f.date_key = t.date_key)
JOIN dimstore s ON (f.store_key = s.store_key)
GROUP BY GROUPING SETS (
    (),
    t.month,
    s.country,
    (t.month, s.country)
)
LIMIT 20;

 * postgresql://postgres:***@127.0.0.1:5432/pagila
18 rows affected.


month,country,revenue
1.0,Australia,2364.19
1.0,Canada,2460.24
1.0,,4824.43
2.0,Australia,4895.1
2.0,Canada,4736.78
2.0,,9631.88
3.0,Australia,12060.33
3.0,Canada,11826.23
3.0,,23886.56
4.0,Australia,14136.07
