Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
lauriemerrell committed Dec 14, 2022
1 parent 60b4d55 commit 039858a
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 67 deletions.
Original file line number Diff line number Diff line change
@@ -1,83 +1,28 @@
{{ config(materialized='table') }}

WITH dim_calendar_dates AS (
SELECT *
FROM {{ ref('dim_calendar_dates') }}
),

int_gtfs_schedule__long_calendar AS (
SELECT *
FROM {{ ref('int_gtfs_schedule__long_calendar') }}
),

fct_daily_schedule_feeds AS (
WITH fct_daily_schedule_feeds AS (
SELECT
*,
EXTRACT(DAYOFWEEK FROM date) AS day_num
FROM {{ ref('fct_daily_schedule_feeds') }}
),

boolean_calendar_dates AS (
SELECT
date,
feed_key,
service_id,
CASE
WHEN exception_type = 1 THEN TRUE
WHEN exception_type = 2 THEN FALSE
END AS has_service
FROM dim_calendar_dates
),

-- decide that exception type 2 trumps exception type 1
-- i.e., if same date appears twice with two exception types
-- the cancelation wins and we say no service on that date
-- (this generally shouldn't happen)
summarize_calendar_dates AS (
SELECT
date,
feed_key,
service_id,
LOGICAL_AND(has_service) AS has_service
FROM boolean_calendar_dates
GROUP BY date, feed_key, service_id
),

daily_services AS (

SELECT
daily_feeds.date AS service_date,
cal_dates.date AS cd_date,
daily_feeds.feed_key,
long_cal.service_id AS calendar_service_id,
long_cal.has_service AS calendar_has_service,
cal_dates.service_id AS calendar_dates_service_id,
cal_dates.has_service AS calendar_dates_has_service,
COALESCE(long_cal.service_id, cal_dates.service_id) AS service_id,
-- calendar_dates takes precedence if present: it can modify calendar
-- if no calendar_dates, use calendar
-- if neither, no service
COALESCE(cal_dates.has_service, long_cal.has_service, FALSE) AS has_service
FROM fct_daily_schedule_feeds AS daily_feeds
LEFT JOIN int_gtfs_schedule__long_calendar AS long_cal
ON daily_feeds.feed_key = long_cal.feed_key
AND daily_feeds.day_num = long_cal.day_num
AND daily_feeds.date BETWEEN long_cal.start_date AND long_cal.end_date
LEFT JOIN summarize_calendar_dates AS cal_dates
ON daily_feeds.feed_key = cal_dates.feed_key
AND daily_feeds.date = cal_dates.date
AND (long_cal.service_id = cal_dates.service_id OR long_cal.service_id IS NULL)
all_scheduled_service AS (
SELECT *
FROM {{ ref('int_gtfs_schedule__all_scheduled_service') }}
),

int_gtfs_schedule__daily_scheduled_service_index AS (
SELECT
service_date,
cd_date,
feed_key,
service_id
FROM daily_services
WHERE service_id IS NOT NULL
AND has_service
fct_daily_schedule_feeds.feed_key,
service_id,
calendar_key,
calendar_dates_key
FROM all_scheduled_service
INNER JOIN fct_daily_schedule_feeds
ON all_scheduled_service.feed_key = fct_daily_schedule_feeds.feed_key
AND all_scheduled_service.service_date = fct_daily_schedule_feeds.date
)

SELECT * FROM int_gtfs_schedule__daily_scheduled_service_index
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
{{ config(materialized='table',
cluster_by = ['day_num', 'start_date']
) }}

-- TODO: make an intermediate calendar and use that instead of the dimension
WITH dim_calendar AS (
SELECT *
Expand Down

0 comments on commit 039858a

Please sign in to comment.