# Core (Facts) Models

In [None]:
import os
from pathlib import Path

from dotenv import load_dotenv
from sqlalchemy import create_engine
from snowflake.sqlalchemy import URL

In [None]:
PROJ_ROOT = Path().resolve().parents[4]
env_file_dir = PROJ_ROOT / '.env'
_ = load_dotenv(env_file_dir, verbose=True)

## About

Develop queries for **mart (core)** without using DBT's `intermediate` models.

The model used in this notebook was developed in `06_intermediate_non_event_models.ipynb`.

### Notes

1. This notebook supports <kbd>Run</kbd> > <kbd>Run All Cells</kbd>.

## User Inputs

In [None]:
#

In [None]:
engine = create_engine(
    URL(
        drivername="driver",
        account=os.getenv("UPLIMIT_SNOWFLAKE_ACCOUNT"),
        user=os.getenv("UPLIMIT_SNOWFLAKE_USER"),
        password=os.getenv("UPLIMIT_SNOWFLAKE_PASS"),
        warehouse=os.getenv("UPLIMIT_SNOWFLAKE_WAREHOUSE"),
        role=os.getenv("UPLIMIT_SNOWFLAKE_ROLE"),
        database=os.getenv("UPLIMIT_SNOWFLAKE_DB_NAME"),
        schema=os.getenv("UPLIMIT_SNOWFLAKE_SCHEMA"),
    )
)

## Connect

Load Jupyter SQL extension

In [None]:
%load_ext sql

Connect to database

In [None]:
%sql engine --alias connection

## Models

### `marts/core/fct_orders`

In [None]:
%%sql
-- ####### START INTERMEDIATE MODEL #######
WITH orders AS (
    SELECT *
    FROM stg_postgres_orders
),
users AS (
    SELECT user_id,
           address_id
    FROM stg_postgres_users
),
/* get order item summary per order */
order_items AS (
    SELECT order_id,
           -- get number of unique greenery products included in an order
           COUNT(DISTINCT(product_id)) as num_unique_products,
           -- get total quantity of products included in an order
           SUM(quantity) as total_order_size
    FROM stg_postgres_order_items
    GROUP BY order_id
),
promos AS (
    SELECT promo_id,
           discount
    FROM stg_postgres_promos
),
addresses AS (
    SELECT address_id,
           state AS state_name
    FROM stg_postgres_addresses
),
order_summary AS (
    SELECT oi.order_id,
           o.created_at,
           u.user_id,
           u.address_id,
           o.order_cost,
           o.shipping_cost,
           p.promo_id,
           -- if no discount is offered then the discount value should be zero
           IFNULL(p.discount, 0) AS discount,
           o.order_total,
           oi.total_order_size,
           oi.num_unique_products,
           o.status,
           o.delivered_at,
           o.estimated_delivery_at
    FROM users u
    LEFT JOIN orders o USING (user_id)
    -- use LEFT JOIN to capture all available users, including those that have
    -- not yet placed orders and so do not yet have any itemized orders
    LEFT JOIN order_items oi USING (order_id)
    -- use LEFT JOIN to capture orders that do not include products that are
    -- offered as part of a promotion
    LEFT JOIN promos p USING (promo_id)
),
order_summary_with_state AS (
    SELECT os.order_id,
           os.created_at,
           os.user_id,
           a.state_name,
           os.order_cost,
           os.shipping_cost,
           os.promo_id,
           os.discount,
           os.order_total,
           os.total_order_size,
           os.num_unique_products,
           os.estimated_delivery_at,
           os.delivered_at,       
           status
           -- (
           --     CASE
           --         WHEN
           --             delivered_at > estimated_delivery_at
           --             AND status = 'delivered'
           --         THEN False
           --         WHEN status = 'shipped' THEN NULL
           --         ELSE True
           --     END
           -- ) AS is_on_time_delivery
    FROM order_summary os
    -- use INNER JOIN to only capture orders from known addresses
    -- (the state is a requirement for this model but cannot be determined if
    -- the delivery address is missing, so exclude orders without an address)
    INNER JOIN addresses a USING (address_id)
),
-- ####### END INTERMEDIATE MODEL #######
orders_with_delivery_details AS (
    SELECT *,
           datediff(
               second, created_at, estimated_delivery_at
           ) AS estimated_delivery_time_seconds,
           datediff(second, created_at, delivered_at) AS delivery_time_seconds,
           (
               CASE
                   WHEN delivered_at > estimated_delivery_at
                   THEN ABS(
                       DATEDIFF(second, delivered_at, estimated_delivery_at)
                   )
                   ELSE NULL
               END
           ) AS delivery_delay_seconds
    FROM order_summary_with_state
)
SELECT *
FROM orders_with_delivery_details

## Disconnect

Close connection

In [None]:
%sql --close connection