In [50]:
from calitp.tables import tbls
import calitp.magics
from calitp import query_sql
import matplotlib.pyplot as plt
plt.style.use('tableau-colorblind10')
import pandas as pd
import numpy as np

import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(20_000_000_000)

In [51]:
relative_ridership_change = query_sql("""

WITH extract_count_date AS (

  SELECT 
  
    COUNT(*) AS ridership_count,
    DATE(EXTRACT(DATE FROM transaction_date_time_pacific)) AS date
    
  FROM `cal-itp-data-infra.views.payments_rides`
  WHERE participant_id = 'mst'
  GROUP BY date
),


calculate_relative_difference AS (

    SELECT

      *,
      ((ridership_count - LEAD(ridership_count, 1) OVER (ORDER BY date DESC)) / LEAD(ridership_count, 1) OVER (ORDER BY date DESC)) * 100 
        AS relative_difference

    FROM extract_count_date
    WHERE date BETWEEN DATE("2021-08-30")
        AND DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)
    ORDER BY date

)
--,

--find_extremes AS (

--    SELECT * 
--    FROM calculate_relative_difference
--    WHERE relative_difference > 25.0 
--        OR relative_difference < -25.0
--)
--,

--find_average_difference AS (
--
--    SELECT AVG(relative_difference)
--    FROM calculate_relative_difference
--)

SELECT * FROM calculate_relative_difference

""", as_df=True)

relative_ridership_change

Unnamed: 0,ridership_count,date,relative_difference
0,150,2021-08-30,
1,140,2021-08-31,-6.666667
2,131,2021-09-01,-6.428571
3,120,2021-09-02,-8.396947
4,144,2021-09-03,20.000000
...,...,...,...
445,286,2022-11-28,100.000000
446,236,2022-11-29,-17.482517
447,277,2022-11-30,17.372881
448,204,2022-12-01,-26.353791


In [52]:
df = pd.DataFrame(relative_ridership_change)

In [53]:
# Compute frequency and bins
frequency, bins = np.histogram(df['relative_difference'], bins=20, range=[0, 100])

# Pretty Print
for b, f in zip(bins[1:], frequency):
    print(round(b, 1), ' '.join(np.repeat('*', f)))

5.0 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
10.0 * * * * * * * * * * * * * * * * * * * * * * * * * *
15.0 * * * * * * * * * * * * * * * * * * * * * * * * * *
20.0 * * * * * * * * * * * * * * * * *
25.0 * * * * * * * * * * *
30.0 * * * * * * * * * * * * * * *
35.0 * * * * * * * * *
40.0 * * * * *
45.0 * * * * *
50.0 * * * * * * * * * * *
55.0 * * * *
60.0 *
65.0 * * * * *
70.0 * * * *
75.0 *
80.0 * *
85.0 * *
90.0 * * * *
95.0 * *
100.0 * * * *


In [54]:
relative_ridership_change_test = query_sql("""

WITH extract_count_date AS (

  SELECT 
  
    COUNT(*) AS ridership_count,
    DATE(EXTRACT(DATE FROM transaction_date_time_pacific)) AS date
    
  FROM `cal-itp-data-infra.views.payments_rides`
  WHERE participant_id = 'mst'
  GROUP BY date
),


calculate_relative_difference AS (

    SELECT

      *,
      ((ridership_count - LEAD(ridership_count, 1) OVER (ORDER BY date DESC)) / LEAD(ridership_count, 1) OVER (ORDER BY date DESC)) * 100 
        AS relative_difference

    FROM extract_count_date

),

test_recent_values AS (

    SELECT
    
        date,
        ridership_count,
        relative_difference
    
    FROM calculate_relative_difference
    WHERE ABS(relative_difference) > 25.0
    AND date BETWEEN DATE_SUB(CURRENT_DATE(), INTERVAL 1 WEEK)
        AND DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)
    ORDER BY date

)

SELECT * FROM test_recent_values

""", as_df=True)

relative_ridership_change_test

Unnamed: 0,date,ridership_count,relative_difference
0,2022-11-28,286,100.0
1,2022-12-01,204,-26.353791
