# Conversion Rate Queries

In [1]:
import os
from pathlib import Path

from dotenv import find_dotenv, load_dotenv
from sqlalchemy import create_engine
from snowflake.sqlalchemy import URL

  functions.register_function("flatten", flatten)


In [2]:
PROJ_ROOT = Path().resolve().parents[3]
env_file_dir = PROJ_ROOT / '.env'
_ = load_dotenv(env_file_dir, verbose=True)

## About

Develop **conversion rate** queries per product and overall.

### Notes

1. This notebook supports <kbd>Run</kbd> > <kbd>Run All Cells</kbd>.

## User Inputs

In [3]:
#

In [4]:
engine = create_engine(
    URL(
        drivername="driver",
        account=os.getenv("UPLIMIT_SNOWFLAKE_ACCOUNT"),
        user=os.getenv("UPLIMIT_SNOWFLAKE_USER"),
        password=os.getenv("UPLIMIT_SNOWFLAKE_PASS"),
        warehouse=os.getenv("UPLIMIT_SNOWFLAKE_WAREHOUSE"),
        role=os.getenv("UPLIMIT_SNOWFLAKE_ROLE"),
        database=os.getenv("UPLIMIT_SNOWFLAKE_DB_NAME"),
        schema=os.getenv("UPLIMIT_SNOWFLAKE_SCHEMA"),
        timezone='US/Eastern'
    )
)

## Connect

Load Jupyter SQL extension

In [5]:
%load_ext sql

Set the maximum number of rows to be displayed to `None` (shows all rows)

In [6]:
%config SqlMagic.displaylimit = None

Connect to DuckDB database

In [7]:
%sql engine --alias connection

## Exploratory Data Analysis

Show the different types of events

In [8]:
%%sql
SELECT DISTINCT(event_type) AS event_type
FROM stg_postgres_events

event_type
checkout
package_shipped
add_to_cart
page_view


Show the first few rows of the events table

In [9]:
%%sql
SELECT *
FROM stg_postgres_events
LIMIT 4

event_id,session_id,user_id,page_url,created_at,event_type,order_id,product_id
5c357e7e-2b8d-4a59-ae41-816f8e5f45d6,a646b9aa-0044-4fbd-8bae-a1018f5d4ace,b3367c91-53bd-4aac-ab6d-0a596fe382c2,https://greenary.com/checkout/8329a65b-7ddf-4250-aeee-bd625f8a401a,2021-02-11 23:30:33,checkout,8329a65b-7ddf-4250-aeee-bd625f8a401a,
3674eaa5-e3a4-4634-aa56-d85162ac36da,a646b9aa-0044-4fbd-8bae-a1018f5d4ace,b3367c91-53bd-4aac-ab6d-0a596fe382c2,https://greenary.com/shipping/8329a65b-7ddf-4250-aeee-bd625f8a401a,2021-02-12 02:30:34,package_shipped,8329a65b-7ddf-4250-aeee-bd625f8a401a,
444e7cb4-cca8-4441-ba7f-9c9a7f7de07d,a646b9aa-0044-4fbd-8bae-a1018f5d4ace,b3367c91-53bd-4aac-ab6d-0a596fe382c2,https://greenary.com/product/fb0e8be7-5ac4-4a76-a1fa-2cc4bf0b2d80,2021-02-11 23:29:53,add_to_cart,,fb0e8be7-5ac4-4a76-a1fa-2cc4bf0b2d80
acf5bc96-84ea-4bdc-b47e-f222f255506c,a646b9aa-0044-4fbd-8bae-a1018f5d4ace,b3367c91-53bd-4aac-ab6d-0a596fe382c2,https://greenary.com/product/fb0e8be7-5ac4-4a76-a1fa-2cc4bf0b2d80,2021-02-11 23:29:17,page_view,,fb0e8be7-5ac4-4a76-a1fa-2cc4bf0b2d80


Show chronolgically ordered events in 15 sessions in which there was no checkout or package shipment

In [10]:
%%sql
WITH t1 AS (
    SELECT * EXCLUDE(event_id),
           ROW_NUMBER() OVER(
               PARTITION BY session_id ORDER BY user_id, created_at DESC
           ) AS rn
    FROM stg_postgres_events
),
t2 AS (
    SELECT session_id
    FROM t1
    -- get sessions in which last event is not shipping or checkout
    WHERE (rn = 1 AND event_type NOT IN ('checkout', 'package_shipped'))
    ORDER BY session_id
    LIMIT 15
),
sessions_without_purchase AS (
    SELECT * EXCLUDE(event_id)
    FROM stg_postgres_events
    -- user INNER JOIN to only get sessions in which order was not placed
    INNER JOIN t2 USING (session_id)
    ORDER BY session_id, user_id, created_at
)
SELECT *
FROM sessions_without_purchase

session_id,user_id,page_url,created_at,event_type,order_id,product_id
005c5f16-d0f2-48d8-ac66-af9a9e1f84c5,d1f08820-32e6-4a31-abba-5aa533bc15a9,https://greenary.com/product/e18f33a6-b89a-4fbc-82ad-ccba5bb261cc,2021-02-11 14:23:56,page_view,,e18f33a6-b89a-4fbc-82ad-ccba5bb261cc
0145bd3e-439d-4c39-8616-ba01e20d2578,d1f08820-32e6-4a31-abba-5aa533bc15a9,https://greenary.com/product/e8b6528e-a830-4d03-a027-473b411c7f02,2021-02-11 14:23:57,page_view,,e8b6528e-a830-4d03-a027-473b411c7f02
0145bd3e-439d-4c39-8616-ba01e20d2578,d1f08820-32e6-4a31-abba-5aa533bc15a9,https://greenary.com/product/b86ae24b-6f59-47e8-8adc-b17d88cbd367,2021-02-11 14:25:17,page_view,,b86ae24b-6f59-47e8-8adc-b17d88cbd367
0145bd3e-439d-4c39-8616-ba01e20d2578,d1f08820-32e6-4a31-abba-5aa533bc15a9,https://greenary.com/product/b86ae24b-6f59-47e8-8adc-b17d88cbd367,2021-02-11 14:25:32,add_to_cart,,b86ae24b-6f59-47e8-8adc-b17d88cbd367
019eb7e1-b46e-4705-bef5-f69ce5e467ee,d1f08820-32e6-4a31-abba-5aa533bc15a9,https://greenary.com/product/58b575f2-2192-4a53-9d21-df9a0c14fc25,2021-02-11 14:21:17,page_view,,58b575f2-2192-4a53-9d21-df9a0c14fc25
019eb7e1-b46e-4705-bef5-f69ce5e467ee,d1f08820-32e6-4a31-abba-5aa533bc15a9,https://greenary.com/product/58b575f2-2192-4a53-9d21-df9a0c14fc25,2021-02-11 14:21:23,add_to_cart,,58b575f2-2192-4a53-9d21-df9a0c14fc25
019eb7e1-b46e-4705-bef5-f69ce5e467ee,d1f08820-32e6-4a31-abba-5aa533bc15a9,https://greenary.com/product/a88a23ef-679c-4743-b151-dc7722040d8c,2021-02-11 14:23:14,page_view,,a88a23ef-679c-4743-b151-dc7722040d8c
019eb7e1-b46e-4705-bef5-f69ce5e467ee,d1f08820-32e6-4a31-abba-5aa533bc15a9,https://greenary.com/product/74aeb414-e3dd-4e8a-beef-0fa45225214d,2021-02-11 14:25:09,page_view,,74aeb414-e3dd-4e8a-beef-0fa45225214d
027e751e-4d55-4107-bc63-bcbac49fc49d,d1f08820-32e6-4a31-abba-5aa533bc15a9,https://greenary.com/product/74aeb414-e3dd-4e8a-beef-0fa45225214d,2021-02-11 14:24:27,page_view,,74aeb414-e3dd-4e8a-beef-0fa45225214d
027e751e-4d55-4107-bc63-bcbac49fc49d,d1f08820-32e6-4a31-abba-5aa533bc15a9,https://greenary.com/product/74aeb414-e3dd-4e8a-beef-0fa45225214d,2021-02-11 14:24:36,add_to_cart,,74aeb414-e3dd-4e8a-beef-0fa45225214d


A single session is associated with a single user. There are no multi-user sessions

In [11]:
%%sql
WITH multi_user_sessions AS (
    SELECT session_id,
           COUNT(DISTINCT(user_id)) AS num_users
    FROM stg_postgres_events
    GROUP BY ALL
    HAVING num_users > 1
)
SELECT COUNT(*) AS num_multi_user_sessions
FROM multi_user_sessions

num_multi_user_sessions
0


A single user can have multiple sessions

In [12]:
%%sql
WITH sessions_per_user AS (
    SELECT user_id,
           COUNT(DISTINCT(session_id)) AS num_sessions
    FROM stg_postgres_events
    GROUP BY ALL
),
single_multi_session_users AS (
    SELECT *
    FROM (
        SELECT '1' AS num_sessions,
               COUNT(*) AS num_users,
               False AS is_multi_session_user
        FROM sessions_per_user
        WHERE num_sessions = 1
        GROUP BY ALL
    )
    UNION ALL
    SELECT '1+' AS num_sessions,
           COUNT(*) AS num_users,
           True AS is_multi_session_user
    FROM sessions_per_user
    WHERE num_sessions > 1
)
SELECT *
FROM single_multi_session_users

num_sessions,num_users,is_multi_session_user
1,25,False
1+,99,True


As seen from the total of the `num_users` column in the above query result, all users in the `users` table do not have an entry in the `events` table. This is verified below

In [13]:
%%sql
WITH num_users_from_events AS (
    SELECT COUNT(DISTINCT(user_id)) AS num_users_events,
           1 AS row_num
    FROM stg_postgres_events
),
num_users_overall AS (
    SELECT COUNT(DISTINCT(user_id)) AS num_users,
           1 AS row_num
    FROM stg_postgres_users
)
SELECT *
FROM num_users_from_events
INNER JOIN num_users_overall USING (row_num)

row_num,num_users_events,num_users
1,124,130


## Product Queries - Conversion Rate

### By Product

#### Using `staging` models

In [14]:
%%sql
-- ####### START INTERMEDIATE MODEL #######
/* get product events for sessions that did not end in a purchase */
WITH products_non_purchase_sessions AS (
    SELECT session_id,
           user_id,
           event_id,
           event_type,
           product_id,
           created_at,
           0 AS is_purchased
    FROM stg_postgres_events
    -- get sessions in which the last event does not indicate a purchase
    QUALIFY (
        LAST_VALUE(event_type)
        OVER(PARTITION BY session_id ORDER BY session_id, created_at)
    ) IN ('page_view', 'add_to_cart')
),
/* get the session ID for sessions ending in a purchase */
sessions_with_purchase AS (
    SELECT DISTINCT(session_id) AS session_id
    FROM stg_postgres_events
    -- get sessions in which the last event indicates a purchase
    QUALIFY (
        LAST_VALUE(event_type)
        OVER(PARTITION BY session_id ORDER BY session_id, created_at)
    ) IN ('checkout', 'package_shipped')
),
/* get product events for sessions that did convert to a purchase */
products_purchase_sessions AS (
    SELECT s.session_id,
           s.user_id,
           s.event_id,
           s.event_type,
           s.product_id,
           s.created_at,
           1 AS is_purchased
    FROM stg_postgres_events s
    -- user INNER JOIN to only get sessions ending in a purchase
    INNER JOIN sessions_with_purchase sp USING (session_id)
    -- get events showing the ID of the purchased product
    WHERE product_id IS NOT NULL
    ORDER BY session_id, created_at
),
/* count sessions not ending in a purchase in which product page was viewed */
product_non_purchase_page_views AS (
    SELECT product_id,
           COUNT(DISTINCT(session_id)) AS num_non_purchase_page_view_sessions
    FROM products_non_purchase_sessions
    WHERE event_type = 'page_view'
    GROUP BY product_id
),
/* count sessions ending in a purchase in which product page was viewed */
product_purchase_page_views AS (
    SELECT product_id,
           COUNT(DISTINCT(session_id)) AS num_purchase_page_view_sessions
    FROM products_purchase_sessions
    WHERE event_type = 'page_view'
    GROUP BY product_id
),
/* count sessions ending in a purchase */
product_purchases AS (
    SELECT product_id,
           COUNT(DISTINCT(session_id)) AS num_purchase_sessions
    FROM products_purchase_sessions
    -- get add-to-cart events since only products in a cart can be purchased
    -- (exclude products with a page_view that are not followed by add-to-cart
    -- since these cannot be purchased)
    WHERE event_type = 'add_to_cart'
    GROUP BY product_id
),
/* join three types of session counts */
product_session_totals AS (
    SELECT pp.product_id,
           npv.num_non_purchase_page_view_sessions,
           ppv.num_purchase_page_view_sessions,
           pp.num_purchase_sessions
    FROM product_non_purchase_page_views npv
    INNER JOIN product_purchases pp USING (product_id)
    INNER JOIN product_purchase_page_views ppv USING (product_id)
),
-- ####### END INTERMEDIATE MODEL #######
products AS (
    SELECT product_id,
           name AS product_name
    FROM stg_postgres_products
),
/* get conversion rate */
product_conversion_rates AS (
    SELECT product_id,
           (
               num_non_purchase_page_view_sessions
               +num_purchase_page_view_sessions
           ) AS total_num_page_view_sessions,
           num_purchase_sessions,
           (
               100*num_purchase_sessions/total_num_page_view_sessions
           ) AS conversion_rate
    FROM product_session_totals
),
/* get first and last event timestamp */
product_event_timestamp_bounds AS (
    SELECT product_id,
           MIN(created_at) AS first_event,
           MAX(created_at) AS last_event
    FROM stg_postgres_events
    GROUP BY product_id
),
/* combine conversion rates and event timestamp bounds */
product_conversion_rates_timestamp_bounds AS (
    SELECT pn.product_name,
           c.total_num_page_view_sessions,
           c.num_purchase_sessions,
           c.conversion_rate,
           b.first_event,
           b.last_event
    FROM product_conversion_rates c
    INNER JOIN product_event_timestamp_bounds b USING (product_id)
    INNER JOIN products pn USING (product_id)
    ORDER BY conversion_rate DESC
)
SELECT *
FROM product_conversion_rates_timestamp_bounds

product_name,total_num_page_view_sessions,num_purchase_sessions,conversion_rate,first_event,last_event
String of pearls,64,39,60.9375,2021-02-10 01:44:53,2021-02-11 23:32:19
Arrow Head,63,35,55.555556,2021-02-10 00:03:46,2021-02-11 23:34:47
Cactus,55,30,54.545455,2021-02-09 23:55:45,2021-02-11 23:41:40
ZZ Plant,63,34,53.968254,2021-02-10 01:46:28,2021-02-11 22:41:12
Bamboo,67,36,53.731343,2021-02-10 03:39:03,2021-02-11 23:47:44
Rubber Plant,54,28,51.851852,2021-02-10 00:05:33,2021-02-11 23:38:40
Monstera,49,25,51.020408,2021-02-10 01:54:21,2021-02-11 23:50:58
Calathea Makoyana,53,27,50.943396,2021-02-09 23:55:08,2021-02-11 23:34:14
Fiddle Leaf Fig,56,28,50.0,2021-02-10 00:05:48,2021-02-11 20:06:39
Majesty Palm,67,33,49.253731,2021-02-10 00:51:50,2021-02-11 23:48:49


### Definitions

1. Number of sessions with product (page) views and without a purchase
   - `num_non_purchase_page_view_sessions`
   - number of sessions ending without a purchase in which a product page was viewed
2. Number of sessions with product (page) views and with a purchase
   - `num_purchase_page_view_sessions`
   - number of sessions ending with a purchase in which a product page was viewed
3. Total sessions with product page views
   - `total_num_page_view_sessions`
   - sum of 1. and 2. above
4. Number of sessions with product purchases
   - `num_purchase_sessions`
   - number of sessions ending with a purchase
     - since only one purchase is allowed per session, this is the same as `num_purchases` (number of purcases)
5. Product conversion rate
   - `conversion_rate`
   - 100 X number of sessions with product purchase / Total sessions with product page views

#### Using `intermediate` models

In [15]:
%%sql
WITH products AS (
    SELECT product_id,
           name AS product_name
    FROM stg_postgres_products
),
/* get conversion rate */
product_conversion_rates AS (
    SELECT product_id,
           (
               num_non_purchase_page_view_sessions
               +num_purchase_page_view_sessions
           ) AS total_num_page_view_sessions,
           num_purchases AS num_purchase_sessions,
           (
               100*num_purchase_sessions/total_num_page_view_sessions
           ) AS conversion_rate
    FROM int_events_sessions_aggregated_to_product
),
/* get first and last event timestamp */
product_event_timestamp_bounds AS (
    SELECT product_id,
           MIN(created_at) AS first_event,
           MAX(created_at) AS last_event
    FROM stg_postgres_events
    GROUP BY product_id
),
/* combine conversion rates and event timestamp bounds */
product_conversion_rates_timestamp_bounds AS (
    SELECT p.product_name,
           c.total_num_page_view_sessions,
           c.num_purchase_sessions,
           c.conversion_rate,
           b.first_event,
           b.last_event
    FROM product_conversion_rates c
    INNER JOIN product_event_timestamp_bounds b USING (product_id)
    INNER JOIN products p USING (product_id)
    ORDER BY conversion_rate DESC
)
SELECT *
FROM product_conversion_rates_timestamp_bounds

product_name,total_num_page_view_sessions,num_purchase_sessions,conversion_rate,first_event,last_event
String of pearls,64,39,60.9375,2021-02-10 01:44:53,2021-02-11 23:32:19
Arrow Head,63,35,55.555556,2021-02-10 00:03:46,2021-02-11 23:34:47
Cactus,55,30,54.545455,2021-02-09 23:55:45,2021-02-11 23:41:40
ZZ Plant,63,34,53.968254,2021-02-10 01:46:28,2021-02-11 22:41:12
Bamboo,67,36,53.731343,2021-02-10 03:39:03,2021-02-11 23:47:44
Rubber Plant,54,28,51.851852,2021-02-10 00:05:33,2021-02-11 23:38:40
Monstera,49,25,51.020408,2021-02-10 01:54:21,2021-02-11 23:50:58
Calathea Makoyana,53,27,50.943396,2021-02-09 23:55:08,2021-02-11 23:34:14
Fiddle Leaf Fig,56,28,50.0,2021-02-10 00:05:48,2021-02-11 20:06:39
Majesty Palm,67,33,49.253731,2021-02-10 00:51:50,2021-02-11 23:48:49


### Overall

Get conversion rate per product using `intermediate` models

In [16]:
%%sql
WITH products AS (
    SELECT 1 AS product_id,
           'all' AS product_name
),
/* get conversion rate */
overall_conversion_rates AS (
    SELECT 1 AS product_id,
           SUM(
               num_non_purchase_page_view_sessions
               +num_purchase_page_view_sessions
           ) AS total_num_page_view_sessions,
           SUM(num_purchases) AS num_purchase_sessions,
           (
               100*num_purchase_sessions/total_num_page_view_sessions
           ) AS conversion_rate
    FROM int_events_sessions_aggregated_to_product
),
/* get first and last event timestamp */
overall_event_timestamp_bounds AS (
    SELECT 1 AS product_id,
           MIN(created_at) AS first_event,
           MAX(created_at) AS last_event
    FROM stg_postgres_events
),
/* combine conversion rates and event timestamp bounds */
overall_conversion_rates_timestamp_bounds AS (
    SELECT p.product_name,
           c.total_num_page_view_sessions,
           c.num_purchase_sessions,
           c.conversion_rate,
           b.first_event,
           b.last_event
    FROM overall_conversion_rates c
    INNER JOIN overall_event_timestamp_bounds b USING (product_id)
    INNER JOIN products p USING (product_id)
)
SELECT *
FROM overall_conversion_rates_timestamp_bounds

product_name,total_num_page_view_sessions,num_purchase_sessions,conversion_rate,first_event,last_event
all,1845,862,46.720867,2021-02-09 23:55:08,2021-02-12 08:55:36


## Disconnect

Close connection

In [17]:
%sql --close connection