Skip to content

Commit

Permalink
create new table to track weekly unlabeled routes for payments (#2686)
Browse files Browse the repository at this point in the history
* create new weekly unlabeled routes tracking table, add 'monthly' to title of original table

* update ymls
  • Loading branch information
charlie-costanzo committed Jun 6, 2023
1 parent 0ee2e9c commit 328842c
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ models:
description: A participant's total ridership on a particular day
- name: relative_difference
description: The relative change in a participant's ridership count as compared to the previous day
- name: v2_payments_reliability_unlabeled_routes
- name: v2_payments_reliability_monthly_unlabeled_routes
tests:
- dbt_utils.expression_is_true:
expression: "n_all_rides >= (n_route_z_rides + n_null_rides)"
Expand All @@ -52,3 +52,24 @@ models:
description: The percentage of unlabeled rides compared to the total number of rides for the given month
- name: recency_rank
description: Used to identify a month's recency (for help with filtering)
- name: v2_payments_reliability_weekly_unlabeled_routes
tests:
- dbt_utils.expression_is_true:
expression: "n_all_rides >= (n_route_z_rides + n_null_rides)"
columns:
- name: participant_id
description: Littlepay-assigned Participant ID
- name: week_start
description: The first day of the week for which the count aggregation is being caclulated
- name: n_route_z_rides
description: The total number of rides labeled as `Route Z` for the given month
- name: n_null_rides
description: The total number of rides labeled as `Null` for the given month
- name: total_unlabeled_rides
description: The total number of rides labeled as either `Route Z` or `Null` for the given month
- name: n_all_rides
description: The total number of rides for the given month
- name: pct_unlabeled_rides_to_total_rides
description: The percentage of unlabeled rides compared to the total number of rides for the given month
- name: recency_rank
description: Used to identify a month's recency (for help with filtering)
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ aggregations_and_date_spine AS (
USING (participant_id, month_start)
),

v2_payments_reliability_unlabeled_routes AS (
v2_payments_reliability_monthly_unlabeled_routes AS (
SELECT

participant_id,
Expand All @@ -57,4 +57,4 @@ v2_payments_reliability_unlabeled_routes AS (
FROM aggregations_and_date_spine
)

SELECT * FROM v2_payments_reliability_unlabeled_routes
SELECT * FROM v2_payments_reliability_monthly_unlabeled_routes
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
WITH payments_rides AS (
SELECT * FROM {{ ref('fct_payments_rides_v2') }}
),

payments_tests_weekly_date_spine AS (
SELECT * FROM {{ ref('payments_tests_weekly_date_spine') }}
),

count_rides AS (
SELECT

spine.participant_id,
spine.week_start,
COUNTIF(route_id = 'Route Z') AS n_route_z_rides,
COUNTIF(route_id IS NULL) AS n_null_rides,
COUNTIF(route_id = '') AS n_empty_string_rides,
COUNT(*) AS n_all_rides

FROM payments_tests_weekly_date_spine AS spine
INNER JOIN payments_rides
ON (spine.participant_id = payments_rides.participant_id) AND transaction_date_pacific >= week_start AND transaction_date_pacific <= week_end
GROUP BY week_start, participant_id
),

aggregations_and_date_spine AS (
SELECT

date_spine.participant_id,
date_spine.week_start,
count_rides.n_route_z_rides,
count_rides.n_null_rides,
count_rides.n_empty_string_rides,
count_rides.n_all_rides,

(count_rides.n_route_z_rides + count_rides.n_null_rides + count_rides.n_empty_string_rides) AS total_unlabeled_rides,

SAFE_DIVIDE((count_rides.n_route_z_rides + count_rides.n_null_rides + count_rides.n_empty_string_rides), count_rides.n_all_rides) * 100 AS pct_unlabeled_rides_to_total_rides

FROM payments_tests_weekly_date_spine AS date_spine
LEFT JOIN count_rides
USING (participant_id, week_start)
),

v2_payments_reliability_weekly_unlabeled_routes AS (
SELECT

participant_id,
week_start,
n_route_z_rides,
n_null_rides,
total_unlabeled_rides,
n_empty_string_rides,
n_all_rides,
pct_unlabeled_rides_to_total_rides,
RANK() OVER (PARTITION BY participant_id ORDER BY week_start DESC) AS recency_rank

FROM aggregations_and_date_spine
)

SELECT * FROM v2_payments_reliability_weekly_unlabeled_routes

0 comments on commit 328842c

Please sign in to comment.