In [1]:
from psycopg2 import connect
import configparser
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# DB Connection
CONFIG = configparser.ConfigParser(interpolation=None)
CONFIG.read('./ttc_db.cfg')
dbset = CONFIG['DBSETTINGS']
con = connect(**dbset)

In [3]:
sql = '''
DROP MATERIALIZED VIEW IF EXISTS headway_data CASCADE; 
CREATE MATERIALIZED VIEW headway_data AS 
WITH ordered_trains AS (
  SELECT subwayline, station_char, trainid, timint, requestid,
  RANK() OVER (PARTITION BY subwayline, station_char, requestid ORDER BY timint, trainid) as train_order
  FROM ntas_data
), closest_trains AS (
  SELECT subwayline, station_char, trainid, timint, requestid
  FROM ordered_trains
  WHERE train_order = 1
), valid_trains_next AS (
  SELECT subwayline, station_char, trainid, timint, requestid, request_date,
    DATE(request_date - INTERVAL '3 hours') as adj_date,
    LEAD(trainid) OVER (PARTITION BY subwayline, station_char, DATE(request_date - INTERVAL '3 hours') ORDER BY request_date, requestid) as next_train
  FROM closest_trains
  JOIN requests USING (requestid)
  WHERE (
  (request_date::time <= '01:30:00'::time
   OR extract(dow from request_date) > 0 AND extract(hour from request_date) >= 6)
   OR (extract(dow from request_date) = 0 AND extract(hour from request_date) >= 8)
   )
)
  SELECT subwayline, station_char, trainid, timint, requestid, adj_date, request_date, 
  LEAD(request_date) OVER (PARTITION BY subwayline, station_char, DATE(request_date - INTERVAL '3 hours') ORDER BY request_date, requestid) AS next_train_request_date,
  LEAD(timint) OVER (PARTITION BY subwayline, station_char, DATE(request_date - INTERVAL '3 hours') ORDER BY request_date, requestid) AS next_train_timint
  FROM valid_trains_next
  WHERE trainid <> next_train
''' 
with con:
    with con.cursor() as cur:
        cur.execute(sql)

In [4]:
pd.set_option('float_format', '{0:.2f}'.format)

sql = '''
SELECT station_char, EXTRACT(EPOCH FROM next_train_request_date-request_date) / 60 as headway
FROM headway_data
WHERE subwayline = 'BD'
AND RIGHT(station_char, 1) = '1'
'''
data = pd.read_sql(sql, con)
data.groupby('station_char').describe()


Unnamed: 0_level_0,headway,headway,headway,headway,headway,headway,headway,headway
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
station_char,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
BAT1,7550.0,3.84,2.51,0.6,2.13,3.72,5.0,136.74
BAU1,7555.0,3.81,2.53,0.6,2.12,3.39,5.0,140.0
BLO1,0.0,,,,,,,
BRD1,7658.0,3.74,2.54,0.6,2.0,3.13,5.0,139.0
BSP1,7943.0,3.65,2.33,0.0,2.13,3.27,4.88,137.74
CFK1,7673.0,3.74,2.5,0.6,2.01,3.26,5.0,138.01
CHE1,8035.0,3.56,2.64,0.25,2.0,3.0,4.98,140.0
CHR1,7531.0,3.85,2.54,0.59,2.13,3.73,5.0,140.14
COX1,8302.0,3.5,2.51,0.6,2.0,3.0,4.87,137.74
DNW1,7743.0,3.75,2.43,0.6,2.25,3.4,4.99,137.74
