# Get Count of Missing Values Per Table

In [None]:
import os
from pathlib import Path

from dotenv import load_dotenv
from sqlalchemy import create_engine
from snowflake.sqlalchemy import URL

In [None]:
PROJ_ROOT = Path().resolve().parents[3]
env_file_dir = PROJ_ROOT / '.env'
_ = load_dotenv(env_file_dir, verbose=True)

## About

Show the number of missing values in each column of all tables.

### Notes

1. This notebook supports <kbd>Run</kbd> > <kbd>Run All Cells</kbd>.

## User Inputs

In [None]:
#

In [None]:
engine = create_engine(
    URL(
        drivername="driver",
        account=os.getenv("UPLIMIT_SNOWFLAKE_ACCOUNT"),
        user=os.getenv("UPLIMIT_SNOWFLAKE_USER"),
        password=os.getenv("UPLIMIT_SNOWFLAKE_PASS"),
        warehouse=os.getenv("UPLIMIT_SNOWFLAKE_WAREHOUSE"),
        role=os.getenv("UPLIMIT_SNOWFLAKE_ROLE"),
        database=os.getenv("UPLIMIT_SNOWFLAKE_DB_NAME"),
        schema=os.getenv("UPLIMIT_SNOWFLAKE_SCHEMA"),
    )
)

## Connect

Load Jupyter SQL extension

In [None]:
%load_ext sql

Connect to database

In [None]:
%sql engine --alias connection

## Exploratory Data Analysis

### Addresses

In [None]:
%%sql
WITH cte AS (
  SELECT
      COUNT(*) AS total_rows,
      total_rows - COUNT(address_id) AS address_id,
      total_rows - COUNT(address) AS address,
      total_rows - COUNT(zipcode) AS zipcode,
      total_rows - COUNT(state) AS state_name,
      total_rows - COUNT(country) AS country
  FROM stg_postgres_addresses
)
SELECT lower(column_name) AS column_name,
       nulls_column_count
FROM cte
UNPIVOT (
    nulls_column_count
    FOR column_name IN (
        address_id,
        address,
        zipcode,
        state_name,
        country
    )
)
ORDER BY nulls_column_count DESC

### Users

In [None]:
%%sql
WITH cte AS (
  SELECT
      COUNT(*) AS total_rows,
      total_rows - COUNT(user_id) AS user_id,
      total_rows - COUNT(first_name) AS first_name,
      total_rows - COUNT(last_name) AS last_name,
      total_rows - COUNT(email) AS email,
      total_rows - COUNT(phone_number) AS phone_number,
      total_rows - COUNT(created_at) AS created_at,
      total_rows - COUNT(updated_at) AS updated_at,
      total_rows - COUNT(address_id) AS address_id,
  FROM stg_postgres_users
)
SELECT lower(column_name) AS column_name,
       nulls_column_count
FROM cte
UNPIVOT (
    nulls_column_count
    FOR column_name IN (
        user_id,
        first_name,
        last_name,
        email,
        created_at,
        updated_at,
        address_id
    )
)
ORDER BY nulls_column_count DESC

### Products

In [None]:
%%sql
WITH cte AS (
  SELECT
      COUNT(*) AS total_rows,
      total_rows - COUNT(product_id) AS product_id,
      total_rows - COUNT(name) AS name,
      total_rows - COUNT(price) AS price,
      total_rows - COUNT(inventory) AS inventory
  FROM stg_postgres_products
)
SELECT lower(column_name) AS column_name,
       nulls_column_count
FROM cte
UNPIVOT (
    nulls_column_count
    FOR column_name IN (
        product_id,
        name,
        price,
        inventory
    )
)
ORDER BY nulls_column_count DESC

### Orders

In [None]:
%%sql
WITH cte AS (
  SELECT
      COUNT(*) AS total_rows,
      total_rows - COUNT(order_id) AS order_id,
      total_rows - COUNT(user_id) AS user_id,
      total_rows - COUNT(promo_id) AS promo_id,
      total_rows - COUNT(address_id) AS address_id,
      total_rows - COUNT(created_at) AS created_at,
      total_rows - COUNT(order_cost) AS order_cost,
      total_rows - COUNT(shipping_cost) AS shipping_cost,
      total_rows - COUNT(order_total) AS order_total,
      total_rows - COUNT(tracking_id) AS tracking_id,
      total_rows - COUNT(shipping_service) AS shipping_service,
      total_rows - COUNT(estimated_delivery_at) AS estimated_delivery_at,
      total_rows - COUNT(delivered_at) AS delivered_at,
      total_rows - COUNT(status) AS status
  FROM stg_postgres_orders
)
SELECT lower(column_name) AS column_name,
       nulls_column_count
FROM cte
UNPIVOT (
    nulls_column_count
    FOR column_name IN (
        order_id,
        user_id,
        promo_id,
        address_id,
        created_at,
        order_cost,
        shipping_cost,
        order_total,
        tracking_id,
        shipping_service,
        estimated_delivery_at,
        delivered_at,
        status
    )
)
ORDER BY nulls_column_count DESC

### Order Items

In [None]:
%%sql
WITH cte AS (
  SELECT
      COUNT(*) AS total_rows,
      total_rows - COUNT(order_id) AS order_id,
      total_rows - COUNT(product_id) AS product_id,
      total_rows - COUNT(quantity) AS quantity
  FROM stg_postgres_order_items
)
SELECT lower(column_name) AS column_name,
       nulls_column_count
FROM cte
UNPIVOT (
    nulls_column_count
    FOR column_name IN (
        order_id,
        product_id,
        quantity
    )
)
ORDER BY nulls_column_count DESC

### Promos

In [None]:
%%sql
WITH cte AS (
  SELECT
      COUNT(*) AS total_rows,
      total_rows - COUNT(promo_id) AS promo_id,
      total_rows - COUNT(discount) AS discount,
      total_rows - COUNT(status) AS status
  FROM stg_postgres_promos
)
SELECT lower(column_name) AS column_name,
       nulls_column_count
FROM cte
UNPIVOT (
    nulls_column_count
    FOR column_name IN (
        promo_id,
        discount,
        status
    )
)
ORDER BY nulls_column_count DESC

### Events

In [None]:
%%sql
WITH cte AS (
  SELECT
      COUNT(*) AS total_rows,
      total_rows - COUNT(event_id) AS event_id,
      total_rows - COUNT(session_id) AS session_id,
      total_rows - COUNT(user_id) AS user_id,
      total_rows - COUNT(page_url) AS page_url,
      total_rows - COUNT(created_at) AS created_at,
      total_rows - COUNT(event_type) AS event_type,
      total_rows - COUNT(order_id) AS order_id,
      total_rows - COUNT(product_id) AS product_id,
  FROM stg_postgres_events
)
SELECT lower(column_name) AS column_name,
       nulls_column_count
FROM cte
UNPIVOT (
    nulls_column_count
    FOR column_name IN (
        event_id,
        session_id,
        user_id,
        page_url,
        created_at,
        event_type,
        order_id,
        product_id
    )
)
ORDER BY nulls_column_count DESC

## Disconnect

Close connection

In [None]:
%sql --close connection