# Intermediate Non-Event Models

In [None]:
import os
from pathlib import Path

from dotenv import load_dotenv
from sqlalchemy import create_engine
from snowflake.sqlalchemy import URL

In [None]:
PROJ_ROOT = Path().resolve().parents[3]
env_file_dir = PROJ_ROOT / '.env'
_ = load_dotenv(env_file_dir, verbose=True)

## About

### Objective

Develop an **intermediate** model that includes logic to combine the following (non-event related) staging models

1. `stg_postgres_orders`
2. `stg_postgres_order_items`
3. `stg_postgres_promos`

**at the order level**. The model should be named `int_orders_joined_to_addresses_promos`.

### Use-Case

To get this **at the order level**, `order_items` must be aggregated from the product granularity to the order level by `order_id` before joining with the two other models. This `order_item` aggregation will be performed here so it does not need to repeated by the following marts models

1. `fct_user_orders`
2. `fct_promos`
3. `fct_orders`

all of which will use this intermediate model.

### Notes

1. This notebook supports <kbd>Run</kbd> > <kbd>Run All Cells</kbd>.

## User Inputs

In [None]:
#

In [None]:
engine = create_engine(
    URL(
        drivername="driver",
        account=os.getenv("UPLIMIT_SNOWFLAKE_ACCOUNT"),
        user=os.getenv("UPLIMIT_SNOWFLAKE_USER"),
        password=os.getenv("UPLIMIT_SNOWFLAKE_PASS"),
        warehouse=os.getenv("UPLIMIT_SNOWFLAKE_WAREHOUSE"),
        role=os.getenv("UPLIMIT_SNOWFLAKE_ROLE"),
        database=os.getenv("UPLIMIT_SNOWFLAKE_DB_NAME"),
        schema=os.getenv("UPLIMIT_SNOWFLAKE_SCHEMA"),
    )
)

## Connect

Load Jupyter SQL extension

In [None]:
%load_ext sql

Connect to DuckDB database

In [None]:
%sql engine --alias connection

## Exploratory Data Analysis

Show the different types of order statuses

In [None]:
%%sql
SELECT DISTINCT(status) AS status
FROM stg_postgres_orders

Show the different address states

In [None]:
%%sql
SELECT DISTINCT(state) AS state_name
FROM stg_postgres_addresses

Show the first few rows of the `addresses` table

In [None]:
%%sql
SELECT *
FROM stg_postgres_addresses
LIMIT 4

Show the first few rows of the `users` table

In [None]:
%%sql
SELECT *
FROM stg_postgres_users
LIMIT 4

Count the number of users in all the models with `user_id` in them

1. `stg_postgres_users`
2. `stg_postgres_orders`
3. `stg_postgres_events`

In [None]:
%%sql
SELECT COUNT(DISTINCT(user_id)) As num_users,
       'stg_postgres_users' AS model_name
FROM stg_postgres_users
UNION ALL
SELECT COUNT(DISTINCT(user_id)) As num_users,
       'stg_postgres_orders' AS model_name
FROM stg_postgres_orders
UNION ALL
SELECT COUNT(DISTINCT(user_id)) As num_users,
       'stg_postgres_events' AS model_name
FROM stg_postgres_events

Show the first few rows of the `orders` table

In [None]:
%%sql
SELECT *
FROM stg_postgres_orders
LIMIT 4

Show the first few rows of the `order_items` table

In [None]:
%%sql
SELECT *
FROM stg_postgres_order_items
LIMIT 8

Show the number of

1. orders and products
2. distinct orders and distinct products

In [None]:
%%sql
SELECT COUNT(DISTINCT(order_id)) As num_distinct_orders,
       COUNT(order_id) As num_orders,
       COUNT(DISTINCT(product_id)) As num_distinct_products,
       COUNT(product_id) As num_products
FROM stg_postgres_order_items

Show number of orders that meet any of the following criteria

1. `order_cost` = 0
1. `order_total` = 0

In [None]:
%%sql
SELECT COUNT(*) AS num_zero_cost_orders
FROM stg_postgres_orders
WHERE order_cost = 0
OR order_total = 0

Get the first and last order date from the `orders` table

In [None]:
%%sql
SELECT TO_DATE(MIN(created_at)) AS first_order_date,
       TO_DATE(MAX(created_at)) AS last_order_date
FROM stg_postgres_orders

## Models

### `int_orders_joined_to_addresses_promos`

In [None]:
%%sql
/* get orders */
WITH orders AS (
    SELECT *
    FROM stg_postgres_orders
),
/* get users */
users AS (
    SELECT user_id,
           address_id
    FROM stg_postgres_users
),
/* get order item summary per order */
order_items AS (
    SELECT order_id,
           -- get number of unique greenery products included in an order
           COUNT(DISTINCT(product_id)) as num_unique_products,
           -- get total quantity of products included in an order
           SUM(quantity) as total_order_size
    FROM stg_postgres_order_items
    GROUP BY order_id
),
/* get promotion discount in dollars */
promos AS (
    SELECT promo_id,
           discount
    FROM stg_postgres_promos
),
/* get state in which user's address is located */
addresses AS (
    SELECT address_id,
           state AS state_name
    FROM stg_postgres_addresses
),
/* create order profile from combination of orders, order items and promotion
discount */
order_summary AS (
    SELECT oi.order_id,
           o.created_at,
           u.user_id,
           u.address_id,
           o.order_cost,
           o.shipping_cost,
           p.promo_id,
           -- if no discount is offered then the discount value should be zero
           ZEROIFNULL(p.discount) AS discount,
           o.order_total,
           oi.total_order_size,
           oi.num_unique_products,
           o.status,
           o.delivered_at,
           o.estimated_delivery_at
    FROM users u
    LEFT JOIN orders o USING (user_id)
    -- use LEFT JOIN to capture all available users, including those that have
    -- not yet placed orders and so do not yet have any itemized orders
    LEFT JOIN order_items oi USING (order_id)
    -- use LEFT JOIN to capture orders that do not include products that are
    -- offered as part of a promotion
    LEFT JOIN promos p USING (promo_id)
),
/* append state name to combined order profile */
order_summary_with_state AS (
    SELECT os.order_id,
           os.created_at,
           os.user_id,
           a.state_name,
           os.order_cost,
           os.shipping_cost,
           os.discount,
           os.order_total,
           os.total_order_size,
           os.num_unique_products,
           os.status,
           os.delivered_at,
           os.estimated_delivery_at,
           -- append column to indicate if delivery timestamp occurred later
           -- than estimated delivery timestamp
           (
               CASE
                   WHEN
                       delivered_at > estimated_delivery_at
                       AND status = 'delivered'
                   THEN False
                   WHEN status = 'shipped' THEN NULL
                   ELSE True
               END
           ) AS is_on_time_delivery
    FROM order_summary os
    -- use INNER JOIN to only capture orders from known addresses
    -- (the state is a requirement for this model but cannot be determined if
    -- the delivery address is missing, so exclude orders without an address)
    INNER JOIN addresses a USING (address_id)
)
SELECT *
FROM order_summary_with_state

**Notes**

1. Using
   ```sql
   SELECT COUNT(DISTINCT(user_id))
   FROM order_summary_with_state
   ```
   gives all the users in the `stg_postgres_users` table.

## Disconnect

Close connection

In [None]:
%sql --close connection