# Inspect DBT Snapshots

In [None]:
import os
from pathlib import Path

from dotenv import find_dotenv, load_dotenv
from sqlalchemy import create_engine
from snowflake.sqlalchemy import URL

In [None]:
PROJ_ROOT = Path().resolve().parents[3]
env_file_dir = PROJ_ROOT / '.env'
_ = load_dotenv(env_file_dir, verbose=True)

## About

Identify products with weekly and overall changes using DBT sbapshots.

### Assumptions

1. Since the week 1 assignment was submitted late, the snapshots in my personal schema should be for weeks 2, 3 and 4. <span style="color:green"><b>This assumption is enforced here</b></span>.

### Notes

1. This notebook supports <kbd>Run</kbd> > <kbd>Run All Cells</kbd>.

## User Inputs

In [None]:
#

In [None]:
engine = create_engine(
    URL(
        drivername="driver",
        account=os.getenv("UPLIMIT_SNOWFLAKE_ACCOUNT"),
        user=os.getenv("UPLIMIT_SNOWFLAKE_USER"),
        password=os.getenv("UPLIMIT_SNOWFLAKE_PASS"),
        warehouse=os.getenv("UPLIMIT_SNOWFLAKE_WAREHOUSE"),
        role=os.getenv("UPLIMIT_SNOWFLAKE_ROLE"),
        database=os.getenv("UPLIMIT_SNOWFLAKE_DB_NAME"),
        schema=os.getenv("UPLIMIT_SNOWFLAKE_SCHEMA"),
    )
)

## Connect

Load Jupyter SQL extension

In [None]:
%load_ext sql

Set the maximum number of rows to be displayed to `None` (shows all rows)

In [None]:
%config SqlMagic.displaylimit = None

Connect to DuckDB database

In [None]:
%sql engine --alias connection

## Queries

### Changes

#### Changes from Week 2 to 3

Show changes in this snapshot table from week 2 to week 3

In [None]:
%%sql
WITH snapshot_updated_at AS (
    SELECT DISTINCT(dbt_updated_at) AS dbt_updated_at
    FROM inventory_snapshot
),
snapshot_runtimes AS (
    SELECT *,
           (ROW_NUMBER() OVER(ORDER BY dbt_updated_at))+1 AS week_number
    FROM snapshot_updated_at
),
inventory_snapshot_metadata AS (
    SELECT * EXCLUDE(price, dbt_scd_id)
    FROM inventory_snapshot
    LEFT JOIN snapshot_runtimes USING (dbt_updated_at)
),
inventory_changes AS (
    SELECT *,
           LAG(inventory) OVER(
               PARTITION BY product_id ORDER BY week_number
           ) AS inventory_previous,
           (inventory-inventory_previous) AS inventory_changed
    FROM inventory_snapshot_metadata
    WHERE week_number IN (2, 3)
),
inventory_changes_week_three AS (
    SELECT dbt_updated_at,
           product_id,
           name,
           inventory,
           dbt_valid_from,
           dbt_valid_to,
           week_number,
           inventory_changed
    FROM inventory_changes
    WHERE week_number = 3
)
SELECT *
FROM inventory_changes_week_three

#### Changes from Week 3 to 4

Show changes in this snapshot table from week 3 to week 4

In [None]:
%%sql
WITH snapshot_updated_at AS (
    SELECT DISTINCT(dbt_updated_at) AS dbt_updated_at
    FROM inventory_snapshot
),
snapshot_runtimes AS (
    SELECT *,
           (ROW_NUMBER() OVER(ORDER BY dbt_updated_at))+1 AS week_number
    FROM snapshot_updated_at
),
inventory_snapshot_metadata AS (
    SELECT * EXCLUDE(price, dbt_scd_id)
    FROM inventory_snapshot
    LEFT JOIN snapshot_runtimes USING (dbt_updated_at)
),
inventory_changes AS (
    SELECT *,
           LAG(inventory) OVER(
               PARTITION BY product_id ORDER BY week_number
           ) AS inventory_previous,
           (inventory-inventory_previous) AS inventory_changed
    FROM inventory_snapshot_metadata
    WHERE week_number IN (3, 4)
),
inventory_changes_week_four AS (
    SELECT dbt_updated_at,
           product_id,
           name,
           inventory,
           dbt_valid_from,
           dbt_valid_to,
           week_number,
           inventory_changed
    FROM inventory_changes
    WHERE week_number = 4
)
SELECT *
FROM inventory_changes_week_four

### From All Weeks of Snapshot Data, Products with Most Fluctuations in Inventory

In [None]:
%%sql
WITH snapshot_updated_at AS (
    SELECT DISTINCT(dbt_updated_at) AS dbt_updated_at
    FROM inventory_snapshot
),
snapshot_runtimes AS (
    SELECT *,
           (ROW_NUMBER() OVER(ORDER BY dbt_updated_at))+1 AS week_number
    FROM snapshot_updated_at
),
inventory_snapshot_metadata AS (
    SELECT * EXCLUDE(price, dbt_scd_id)
    FROM inventory_snapshot
    LEFT JOIN snapshot_runtimes USING (dbt_updated_at)
),
inventory_changes AS (
    SELECT *,
           LAG(inventory) OVER(
               PARTITION BY product_id ORDER BY week_number
           ) AS inventory_previous,
           (inventory-inventory_previous) AS inventory_changed
    FROM inventory_snapshot_metadata
    -- capture weeks in which week-to-week fluctuations can be calculated
    -- (i.e. ignore week 1)
    WHERE week_number IN (2, 3, 4)
),
products_with_inventory_changes AS (
    SELECT DISTINCT(product_id) AS product_id,
           True AS is_changed
    FROM inventory_changes
    WHERE inventory_changed IS NOT NULL
),
inventory_changes_overall AS (
    SELECT product_id,
           name,
           LISTAGG(inventory, ', ') AS inventory_list,
           ARRAY_TO_STRING(ARRAY_AGG(inventory), ', ') AS inventory_array_string,
           SUM(ABS(inventory_changed)) AS overall_fluctuation
    FROM inventory_changes
    LEFT JOIN products_with_inventory_changes USING (product_id)
    WHERE is_changed = True
    GROUP BY ALL
    ORDER BY overall_fluctuation DESC
)
SELECT *
FROM inventory_changes_overall

### Items that Went Out of Stock in the Last 3 Weeks

In [None]:
%%sql
WITH snapshot_updated_at AS (
    SELECT DISTINCT(dbt_updated_at) AS dbt_updated_at
    FROM inventory_snapshot
),
snapshot_runtimes AS (
    SELECT *,
           (ROW_NUMBER() OVER(ORDER BY dbt_updated_at))+1 AS week_number
    FROM snapshot_updated_at
),
inventory_snapshot_metadata AS (
    SELECT * EXCLUDE(price, dbt_scd_id)
    FROM inventory_snapshot
    LEFT JOIN snapshot_runtimes USING (dbt_updated_at)
),
inventory_changes AS (
    SELECT *,
           LAG(inventory) OVER(
               PARTITION BY product_id ORDER BY week_number
           ) AS inventory_previous,
           (inventory-inventory_previous) AS inventory_changed
    FROM inventory_snapshot_metadata
    -- capture last three weeks
    WHERE week_number IN (2, 3, 4)
),
out_of_stock_products AS (
    SELECT *
    FROM inventory_changes
    WHERE inventory_changed IS NOT NULL
    AND inventory = 0
)
SELECT *
FROM out_of_stock_products

## Disconnect

Close connection

In [None]:
%sql --close connection