In [3]:
import os
from time import strftime
from datetime import date
from collections import defaultdict
from six import string_types
from copy import deepcopy

import pandas as pd

import duckdb

import plotly
import plotly.express as px
from dash import Dash, html, dcc

import sqlalchemy
import jinja2
from jinjasql import JinjaSql
import dash_bootstrap_components as dbc

from dotenv import load_dotenv

# Import ipython-sql Jupyter extension to create SQL cells
# %load_ext sql
# directly output data to Pandas and to simplify the output that is printed to the notebook.
# %config SqlMagic.autopandas = True
# %config SqlMagic.feedback = False
# %config SqlMagic.displaycon = False

pd.options.display.max_rows=100

# Connect ipython-sql to DuckDB using a SQLAlchemy-style connection string. You may either connect to an in memory DuckDB, or a file backed db.
# %sql duckdb:///mta.db

# import qgrid
# from qgrid import show_grid

print(f"pandas               {pd.__version__:<20}")
print(f"plotly               {plotly.__version__:<20}")
print(f"sqlalchemy           {sqlalchemy.__version__:<20}")
print(f"duckdb               {duckdb.__version__:<20}")
print(f"jinja2               {jinja2.__version__:<20}")



pandas               1.5.3               
plotly               5.13.0              
sqlalchemy           1.4.46              
duckdb               0.6.1               
jinja2               3.1.2               


In [4]:
# put up text and markdown in 1, 2, 3 columns https://medium.com/a-r-g-o/using-plotlys-dash-to-deliver-public-sector-decision-support-dashboards-ac863fa829fb

# run the overall filter, have sum of 2019, pandemic, current period
# print this
# put it in the dashboard

# 2019    pandemic   2022 (filter period)
# avg          avg                    avg
#        ch v 2019        chg vs pandemic
#                             chg vs 2019

# put in dashboard:
# time series - by dow - by tod

# put in dashboard - table with sorts

# put filter widgets in daashboard

# map - 3 column

# station table - 3 col
# pretty name , entries, ch from 2019, ch from pandemic

# filter hourly and daily using
# startdate/end_date
# day of week
# hour of day
# cbd

# show startdate to enddate
# markdown 3 big text boxes: 2019, pandemic (%), selected (% vs 2019, %v pandemic)

# entries by date - selected timeperiod
# entries by dow - selected timeperiod
# entries by hour of day - selected timeperiod

# map across full dashboard

# station table
# by date startdate to enddate
# 2019

In [3]:
load_dotenv()

mapbox_token = os.getenv('MAPBOX_TOKEN')


In [5]:
connection_string = "duckdb:////Users/drucev/projects/MTA/mta.db"
con = sqlalchemy.create_engine(connection_string, connect_args={'read_only': True})

# query = "describe station_hourly "
# con.execute(query)
# con.fetchall()



In [None]:
dowmap = {
    'Sunday': 0, 'Monday': 1, 'Tuesday': 2, 'Wednesday': 3, 'Thursday': 4, 'Friday': 5, 'Saturday': 6, 
}
dowinvmap = {v:k for k,v in dowmap.items()}

boromap = {
    'Manhattan below 63 St': 1,
    'Manhattan above 63 St': 2, 
    'Brooklyn': 3,
    'Queens': 4,
    'Bronx': 5,    
}
boroinvmap = {v:k for k,v in boromap.items()}


In [None]:
filters = defaultdict(str)
filters['dow'] = list(map(lambda s: dowmap[s], 
                     ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']))
filters['boro'] = list(map(lambda s: boromap[s],
                           ['Manhattan below 63 St','Manhattan above 63 St', 'Brooklyn','Queens', 'Bronx']))
filters['tod'] = [4,8,12,16,20,24]
filters['startdate'] = '2022-01-01'
filters['enddate'] = '2023-02-01'

filters['pandemic_start'] = '2020-04-01'
filters['pandemic_end'] = '2021-04-01'


In [6]:
query_template = """
select
    DATE,
    hour,
    STATION,
    boro,
    entries,
    exits
from
    mta_clean
where true
{% if startdate %} and date >= {{startdate}} {% endif %}
{% if enddate %} and date < {{enddate}} {% endif %}
{% if dow %} and date_part('dow', date) in {{ dow | inclause }}  {% endif %}
{% if tod %} and hour in {{ tod | inclause }} {% endif %}
{% if boro %} and boro in {{ boro | inclause }} {% endif %}
limit 10
"""


In [7]:
def quote_sql_string(value):
    '''
    If `value` is a string type, escapes single quotes in the string
    and returns the string enclosed in single quotes.
    else if value is a list, map quote_sql_string to each item in list
    else return value unchanged
    '''
    if isinstance(value, string_types):
        new_value = str(value)
        new_value = new_value.replace("'", "''")
        return "'{}'".format(new_value)
    elif isinstance(value, list):
        return [quote_sql_string(v) for v in value]
    return value


def get_sql_from_template(con, query, bind_params=None, verbose=False):
    """
    Run Jinja template query against con, substituting bind_params
    """
    if not bind_params:
        if verbose:
            print(query)
        return pd.read_sql(query, con)
    
    if verbose:
        # copy and escape params for legibility
        params = deepcopy(bind_params)
        for key, val in params.items():
            params[key] = quote_sql_string(val)
        query_str, query_vals = JinjaSql().prepare_query(query, params)
        print(query_str % tuple(query_vals))
        
    # process params using ? style, run query, return dataframe 
    query_str, query_vals = JinjaSql(param_style='qmark').prepare_query(query, bind_params)
    return pd.read_sql(query_str, con, params=query_vals)


get_sql_from_template(con, query_template, filters, verbose=True)



NameError: name 'filters' is not defined

In [None]:
def create_filter_current(con, filters, verbose=False):
    """make temp table filter_current from filters (just filter, no group by)"""

    query = """
    create or replace temp table filter_current as
    select
        date,
        datepart('dow', date) dow,
        hour,
        station,
        boro,
        entries,
        exits
    from
        mta_clean
    where
        TRUE
        {% if startdate %} and date >= {{startdate}} {% endif %}
        {% if enddate %} and date < {{enddate}} {% endif %}
        {% if dow %} and date_part('dow', date) in {{ dow | inclause }}  {% endif %}
        {% if tod %} and hour in {{ tod | inclause }} {% endif %}
        {% if boro %} and boro in {{ boro | inclause }} {% endif %}
        {% if sta %} and station in {{ sta | inclause }} {% endif %}
    """
    return get_sql_from_template(con, query, filters, verbose)

create_filter_current(con, filters, verbose=True)
get_sql_from_template(con, 'select * from filter_current order by date, station limit 10', None, verbose=True)



In [None]:
def create_filter_2019(con, filters, verbose=False):
    """make temp table filter_2019 from filters (just filter, no group by)"""

    query = """
    create or replace temp table filter_2019 as
    select
        date,
        datepart('dow', date) dow,
        hour,
        station,
        boro,
        entries,
        exits
    from
        mta_clean
    where
        date_part('year', DATE)=2019
        {% if dow %} and date_part('dow', date) in {{ dow | inclause }}  {% endif %}
        {% if tod %} and hour in {{ tod | inclause }} {% endif %}
        {% if boro %} and boro in {{ boro | inclause }} {% endif %}
        {% if sta %} and station in {{ sta | inclause }} {% endif %}
    """
    return get_sql_from_template(con, query, filters, verbose)

create_filter_2019(con, filters, verbose=True)
get_sql_from_template(con, 'select * from filter_2019 order by date, station limit 10', None, verbose=True)



In [None]:
def create_filter_pandemic(con, filters, verbose=False):
    """make temp table filter_pandemic from filters (just filter, no group by)"""

    query = """
    create or replace temp table filter_pandemic as
    select
        date,
        datepart('dow', date) dow,
        hour,
        station,
        boro,
        entries,
        exits
    from
        mta_clean
    where
        date >= '2020-04-01' and date < '2021-04-01'
        {% if dow %} and date_part('dow', date) in {{ dow | inclause }}  {% endif %}
        {% if tod %} and hour in {{ tod | inclause }} {% endif %}
        {% if boro %} and boro in {{ boro | inclause }} {% endif %}
        {% if sta %} and station in {{ sta | inclause }} {% endif %}
    """
    return get_sql_from_template(con, query, filters, verbose)

create_filter_pandemic(con, filters, verbose=True)
get_sql_from_template(con, 'select * from filter_pandemic order by date, station limit 10', None, verbose=True)



In [None]:
def agg_station(con, source, verbose=False):
    """make temp table %source%_daily, group by station, aggregate by day"""
    query = """
    create or replace temp table {source}_daily as
    select
        date,
        dow,
        station,
        boro,
        sum(entries) entries,
        sum(exits) exits
    from
        {source}
    group by
        date,
        dow,
        station,
        boro,
    """.format(source=source)

    return get_sql_from_template(con, query, None, verbose)

def create_filter_current_daily(con, verbose=False):
    """make temp table filter_current_daily, group by station, aggregate by day"""
    return agg_station(con, "filter_current", verbose=verbose)


create_filter_current_daily(con, verbose=True)
get_sql_from_template(con, 'select * from filter_current_daily order by date, station limit 10', None, verbose=True)



In [None]:
def create_filter_2019_daily(con, verbose=False):
    """make temp table filter_2019_daily, group by station, aggregate by day"""
    return agg_station(con, "filter_2019", verbose=verbose)

create_filter_2019_daily(con, verbose=True)
get_sql_from_template(con, 'select * from filter_2019_daily order by date, station limit 10', None, verbose=True)


In [None]:
def create_filter_pandemic_daily(con, verbose=False):
    """make temp table filter_current_daily, group by station, aggregate by day"""
    return agg_station(con, "filter_pandemic", verbose=verbose)


create_filter_pandemic_daily(con, verbose=True)
get_sql_from_template(con, 'select * from filter_pandemic_daily order by date, station limit 10', None, verbose=True)


In [None]:
def agg_summary(con, source, verbose=False):
    """make temp table %source%_summary, aggregate by date (all stations)"""

    query = """
    create or replace temp table {source}_summary as
    select
        date,
        dow,
        sum(entries) entries,
        sum(exits) exits
    from
        {source}_daily
    group by
        date,
        dow,
    """.format(source=source)

    return get_sql_from_template(con, query, None, verbose)


def create_filter_current_summary(con, verbose=False):
    """make temp table filter_current_day, group by station, aggregate by day"""
    return agg_summary(con, "filter_current", verbose=verbose)

create_filter_current_summary(con, verbose=True)
get_sql_from_template(con, 'select * from filter_current_summary order by date', None, verbose=True)


In [None]:
def create_filter_2019_summary(con, verbose=False):
    """make temp table filter_2019_day, group by station, aggregate by day"""
    return agg_summary(con, "filter_2019", verbose=verbose)


create_filter_2019_summary(con, verbose=True)
get_sql_from_template(con, 'select * from filter_2019_summary order by date', None, verbose=True)


In [None]:
def create_filter_pandemic_summary(con, verbose=False):
    """make temp table filter_pandemic_day, group by station, aggregate by day"""
    return agg_summary(con, "filter_pandemic", verbose=verbose)

create_filter_pandemic_summary(con, verbose=True)
get_sql_from_template(con, 'select * from filter_pandemic_summary order by date', None, verbose=True)


In [None]:
def fn_day_count_current(con, verbose=False):
    """return number of days in the filter"""
    query = "select count(*) as days from filter_current_summary;"
    return get_sql_from_template(con, query, None, verbose).iloc[0][0]

fn_day_count_current(con, verbose=True)


In [None]:
def fn_day_count_2019(con, verbose=False):
    """return number of days in the filter"""
    query = "select count(*) as days from filter_2019_summary;"
    return get_sql_from_template(con, query, None, verbose).iloc[0][0]

fn_day_count_2019(con, verbose=True)



In [None]:
def fn_day_count_pandemic(con, verbose=False):
    """return number of days in the filter"""
    query = "select count(*) as days from filter_pandemic_summary;"
    return get_sql_from_template(con, query, None, verbose).iloc[0][0]

fn_day_count_pandemic(con, verbose=True)


In [None]:
def entries_by_date(con, verbose=False):
    """return dataframe of all entries by date, subject to filters"""

    query = "select date, entries, exits from filter_current_summary order by date"

    return get_sql_from_template(con, query, None, verbose)

df_entries_by_date = entries_by_date(con, True)
df_entries_by_date

In [None]:
def entries_by_dow(con, verbose=False):
    """return dataframe of all entries by day of week, comps, subject to filters"""

    query = """
        (select
            'selection' as when,
            count(*) n,
            dow,
            sum(entries)/n as entries,
            sum(exits)/n as exits
        from filter_current_summary
        group by
            dow
        )
        union
        (select
            '2019' as when,
            count(*) n,
            dow,
            sum(entries)/n as entries,
            sum(exits)/n as exits
        from filter_2019_summary
        group by
            dow
        )  
        union
        (select
            'pandemic' as when,
            count(*) n,
            dow,
            sum(entries)/n as entries,
            sum(exits)/n as exits
        from filter_pandemic_summary
        group by
            dow
        )
    """

    return get_sql_from_template(con, query, None, verbose=True)

df_entries_by_dow = entries_by_dow(con, verbose=True)
df_entries_by_dow

In [None]:
def entries_by_tod(con, verbose=False):
    """return dataframe of all entries by time of day, subject to filters"""

    query = """ 
    (with cur as 
        (select 
            date, 
            hour, 
            sum(entries) as entries, 
            sum(exits) as exits 
        from filter_current 
        group by 
            date, 
            hour)
    select 'selection' as when, hour, count(*) n, sum(entries)/n as entries, sum(exits)/n as exits from cur group by hour)

    union

    (with pand as 
        (select 
            date, 
            hour, 
            sum(entries) as entries, 
            sum(exits) as exits 
        from filter_pandemic 
        group by 
            date, 
            hour)
    select 'pandemic' as when, hour, count(*) n, sum(entries)/n as entries, sum(exits)/n as exits from pand group by hour)

    union

    (with f19 as 
        (select 
            date, 
            hour, 
            sum(entries) as entries, 
            sum(exits) as exits 
        from filter_2019
        group by 
            date, 
            hour)
    select '2019' as when, hour, count(*) n, sum(entries)/n as entries, sum(exits)/n as exits from f19 group by hour)
    """

    return get_sql_from_template(con, query, None, verbose=True)

df_entries_by_tod = entries_by_tod(con, verbose=True)
df_entries_by_tod



In [None]:
%%time

create_filter_current(con, filters, verbose=False)
create_filter_pandemic(con, filters, verbose=False)
create_filter_2019(con, filters, verbose=False)

create_filter_current_daily(con, verbose=False)
create_filter_pandemic_daily(con, verbose=False)
create_filter_2019_daily(con, verbose=False)

create_filter_current_summary(con, verbose=False)
create_filter_pandemic_summary(con, verbose=False)
create_filter_2019_summary(con, verbose=False)

day_count_current=fn_day_count_current(con, verbose=False)
day_count_pandemic=fn_day_count_pandemic(con, verbose=False)
day_count_2019=fn_day_count_2019(con, verbose=False)



In [None]:
def entries_by_station(con, verbose=False):

    query = """
    with cur as 
    (SELECT 
        station,
        count(*) n,
        sum(entries)/n entries,
        sum(exits)/n exits
    from
        filter_current_daily
        group by station
    ),
    pand as 
    (SELECT 
        station,
        count(*) n,
        sum(entries)/n entries,
        sum(exits)/n exits
    from
        filter_pandemic_daily
        group by station
    ),
    f19 as 
    (SELECT 
        station,
        count(*) n,
        sum(entries)/n entries,
        sum(exits)/n exits
    from
        filter_2019_daily
        group by station
    )
    select
        station_list.pretty_name,
        latitude,
        longitude,
        cur.entries entries_selection,
        cur.exits exits_selection,
        pand.entries entries_pandemic,
        pand.exits exits_pandemic,
        f19.entries entries_2019,
        f19.exits exits_2019
    from 
        station_list
        left outer join cur on station_list.pretty_name = cur.station
        left outer join f19 on station_list.pretty_name = f19.station
        left outer join pand on station_list.pretty_name = pand.station
    order by station_list.station;
    """

    return get_sql_from_template(con, query, None, verbose=verbose)

df_entries_by_station = entries_by_station(con, verbose=False)
df_entries_by_station

In [None]:
df_entries_by_date

In [None]:
fig1 = px.line(df_entries_by_date, x="date", y="entries", height=360)
fig1.update_traces(line=dict(width=2))
fig1.update_layout(
    paper_bgcolor="LightSteelBlue",
    showlegend=False,
    plot_bgcolor="white",
    xaxis_title="Date",
    yaxis_title="Entries",
    legend_title="Legend Title",

    xaxis={
        'ticks': 'inside',
        'showgrid': True,            # thin lines in the background
        'zeroline': False,           # thick line at x=0
        'visible': True,             # numbers below
        'showline': True,            # Show X-Axis
        'linecolor': 'black',        # Color of X-axis
        'tickfont_color': 'black',   # Color of ticks
        'showticklabels': True,      # Show X labels
        'mirror': True,              # draw right axis
    },
    yaxis={
        'ticks': 'inside',
        'showgrid': True,            # thin lines in the background
        'zeroline': False,           # thick line at x=0
        'visible': True,             # numbers below
        'showline': True,            # Show X-Axis
        'linecolor': 'black',        # Color of X-axis
        'tickfont_color': 'black',   # Color of ticks
        'showticklabels': True,      # Show X labels
        'side': 'left',
        'mirror': True,
    },
    #     font=dict(
    #         family="Courier New, monospace",
    #         size=18,
    #         color="RebeccaPurple"
    #     )
 )



In [None]:
df_entries_by_dow['Weekday'] = df_entries_by_dow['dow'].apply(lambda i: dowinvmap[i])
df_entries_by_dow

In [None]:
fig2 = px.bar(df_entries_by_dow[['Weekday', 'when', 'entries']],
              x="Weekday", y="entries", color='when', barmode="group", height=360)
fig2.update_layout(
    paper_bgcolor="LightSteelBlue",
    showlegend=True,
    plot_bgcolor="white",
    xaxis_title="Date",
    yaxis_title="Entries",
    legend_title=None,
    xaxis={
        'tickmode': 'linear',
        'tick0': 0,
        'dtick': 1,
        'title': None,
        'ticks': 'inside',
        'showgrid': True,            # thin lines in the background
        'zeroline': False,           # thick line at x=0
        'visible': True,             # numbers below
        'showline': True,            # Show X-Axis
        'linecolor': 'black',        # Color of X-axis
        'tickfont_color': 'black',   # Color of ticks
        'showticklabels': True,      # Show X labels
        'mirror': True,              # draw right axis
    },
    yaxis={
        'ticks': 'inside',
        'showgrid': True,            # thin lines in the background
        'zeroline': False,           # thick line at x=0
        'visible': True,             # numbers below
        'showline': True,            # Show X-Axis
        'linecolor': 'black',        # Color of X-axis
        'tickfont_color': 'black',   # Color of ticks
        'showticklabels': True,      # Show X labels
        'side': 'left',
        'mirror': True,
    },
 )


In [None]:
df_entries_by_tod

In [None]:
fig = px.bar(df_entries_by_tod,
              x="hour", y="entries", color='when', barmode="group", height=360)
fig.update_layout(
    xaxis = dict(
        tickmode= 'linear',
        tick0=0,
        dtick=4,
        title='Time of day'
    )
)

fig.show()

In [None]:
df = df_entries_by_station[['pretty_name','entries_selection', 'entries_pandemic', 'entries_2019']].copy()
df['%Ch vs. 2019']=df['entries_selection']/df['entries_2019']-1
df['%Ch vs. Pandemic']=df['entries_selection']/df['entries_pandemic']-1
df=df[['pretty_name','entries_selection', '%Ch vs. 2019', '%Ch vs. Pandemic']]
df.columns = ['station','entries', '%Ch vs. 2019', '%Ch vs. Pandemic']
df

In [None]:
df.columns

In [None]:
df = df_entries_by_station.copy()
df


In [None]:
df = df[['pretty_name', 'Latitude', 'Longitude', 'entries_selection',
       'entries_pandemic', 'entries_2019',]].copy()
df['pct_v_2019'] = df['entries_selection'] / df['entries_2019'] - 1
df['pct_v_pandemic'] = df['entries_selection'] / df['entries_pandemic'] - 1
df.rename(columns={'pretty_name': 'station', 'entries_selection': 'entries'}, inplace=True)

#df = df[['station','Latitude','Longitude','entries','entries_pandemic','entries_2019']]
df

In [None]:
fig = px.scatter_mapbox(df, 
                        lat="Latitude", lon="Longitude", 
                        hover_name="station", hover_data={"entries": True, "Latitude": False, "Longitude": False, 
                                                          "pct_v_2019": True, "pct_v_pandemic" : True,
                                                          "entries_2019": False, "entries_pandemic": False},
                        size="entries", size_max=20,
                        color_continuous_scale=px.colors.sequential.Jet, color="pct_v_2019",
                        zoom=10, height=480, width=880)
fig.update_layout(mapbox_style="carto-darkmatter", mapbox_accesstoken=mapbox_token)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0}, showlegend=False)
fig.show()


In [None]:
#fig.write_html("/Users/drucev/projects/druce.github.io/_includes/MtA/MTA_map.html")

In [None]:
get_sql_from_template(con, 'select complex, count(*) from station_map group by complex having count(*) > 1', None)

In [None]:
get_sql_from_template(con, 'select * from station_map where complex in (445,606,611,612,619,622,624,629,635,636,) order by complex', None)


In [None]:
query = """
select station, sum(entries) as entries_2019, sum(exits) as exits_2019
from mta_clean
where date_part('year', date) = 2019
and date_part('month', date) = 1
group by station
order by station

"""

df2019 = get_sql_from_template(con, query, None)
df2019

In [None]:
query = """select station, sum(entries) entries_2022, sum(exits) exits_2022
from mta_clean
where date_part('year', date) = 2022
and date_part('month', date) = 1
group by station
order by station

"""

df2022 = get_sql_from_template(con, query, None)
df2022

In [None]:
df = pd.merge(left=df2019, right=df2022, on='station')

In [None]:
pd.options.display.max_rows=500


In [None]:
df['ratio2019'] = df['entries_2019']/df['exits_2019']
df['ratio2022'] = df['entries_2022']/df['exits_2022']
df['ratio_ch'] = df['ratio2022'] /  df['ratio2019'] -1


In [None]:
df.sort_values(by='entries_2019').reset_index(drop=True)

In [None]:
query="""select distinct station from mta_clean where station like 'Nostrand%';"""
get_sql_from_template(con, query, None)


In [None]:
query="""select distinct station from mta_staging where station like 'NOSTR%';"""
get_sql_from_template(con, query, None)


In [None]:
query = """
select distinct station from mta.mta_raw
where station like '%NOSTR%'
"""
get_sql_from_template(con, query, None)


In [7]:
station = 'NOSTRAND AV'
stationline = 'NOSTRAND AV-3'
pretty = 'Nostrand Av-3 (Bk)'


In [8]:
query = """
select date, sum(entries) entries, sum(exits) exits from mta_clean 
where station = '{station}'
group by date
order by date
""".format(station=pretty)

get_sql_from_template(con, query, None).tail(100)


Unnamed: 0,date,entries,exits
1393,2022-10-27,1862,3300
1394,2022-10-28,1842,3096
1395,2022-10-29,1110,2715
1396,2022-10-30,870,2145
1397,2022-10-31,1709,2935
1398,2022-11-01,1846,3291
1399,2022-11-02,1840,3172
1400,2022-11-03,1850,3245
1401,2022-11-04,1903,3277
1402,2022-11-05,1107,2412


In [9]:
query = """
select *
from mta.mta_raw
where station ='{station}' and LINENAME='3'
and date >= '2023-02-02'
order by station, scp, date, time
""".format(station=station)

get_sql_from_template(con, query, None)

Unnamed: 0,C/A,UNIT,SCP,STATION,LINENAME,DIVISION,DATE,time,DESC,ENTRY_COUNTER,EXIT_COUNTER
0,R623,R061,00-00-00,NOSTRAND AV,3,IRT,2023-02-02,03:00:00,REGULAR,817200,1444400
1,R623,R061,00-00-00,NOSTRAND AV,3,IRT,2023-02-02,07:00:00,REGULAR,817249,1444452
2,R623,R061,00-00-00,NOSTRAND AV,3,IRT,2023-02-02,11:00:00,REGULAR,817442,1444756
3,R623,R061,00-00-00,NOSTRAND AV,3,IRT,2023-02-02,15:00:00,REGULAR,817546,1444935
4,R623,R061,00-00-00,NOSTRAND AV,3,IRT,2023-02-02,19:00:00,REGULAR,817795,1445335
5,R623,R061,00-00-00,NOSTRAND AV,3,IRT,2023-02-02,23:00:00,REGULAR,817852,1445647
6,R623,R061,00-00-00,NOSTRAND AV,3,IRT,2023-02-03,03:00:00,REGULAR,817872,1445746
7,R623,R061,00-00-00,NOSTRAND AV,3,IRT,2023-02-03,07:00:00,REGULAR,817912,1445785
8,R623,R061,00-00-00,NOSTRAND AV,3,IRT,2023-02-03,11:00:00,REGULAR,818056,1446042
9,R623,R061,00-00-00,NOSTRAND AV,3,IRT,2023-02-03,15:00:00,REGULAR,818165,1446262


In [10]:
query = """
select *
from mta_staging
where station ='{station}'
and date_time >= '2023-02-03'
order by station, turnstile, date_time

""".format(station=stationline)
get_sql_from_template(con, query, None)


Unnamed: 0,DATE_TIME,TURNSTILE,STATION,ENTRY_COUNTER,EXIT_COUNTER
0,2023-02-03 03:00:00,R623 R061 00-00-00,NOSTRAND AV-3,817872,1445746
1,2023-02-03 07:00:00,R623 R061 00-00-00,NOSTRAND AV-3,817912,1445785
2,2023-02-03 11:00:00,R623 R061 00-00-00,NOSTRAND AV-3,818056,1446042
3,2023-02-03 15:00:00,R623 R061 00-00-00,NOSTRAND AV-3,818165,1446262
4,2023-02-03 19:00:00,R623 R061 00-00-00,NOSTRAND AV-3,818380,1446654
5,2023-02-03 23:00:00,R623 R061 00-00-00,NOSTRAND AV-3,818424,1446893
6,2023-02-03 03:00:00,R623 R061 00-00-01,NOSTRAND AV-3,625374,1023717
7,2023-02-03 07:00:00,R623 R061 00-00-01,NOSTRAND AV-3,625404,1023736
8,2023-02-03 11:00:00,R623 R061 00-00-01,NOSTRAND AV-3,625492,1023934
9,2023-02-03 15:00:00,R623 R061 00-00-01,NOSTRAND AV-3,625560,1024083


In [29]:
query = """
select date_time, sum(entries),
round((date_part('hour', date_time)::float + date_part('minute', date_time)/60)::float / 4) * 4  as z
from mta_diff
where station ='{station}'
and date_time >= '2023-02-03'
group by date_time
""".format(station=stationline)
get_sql_from_template(con, query, None)


Unnamed: 0,DATE_TIME,sum(entries),z
0,2023-02-03 03:00:00,34,4.0
1,2023-02-03 07:00:00,133,8.0
2,2023-02-03 11:00:00,444,12.0
3,2023-02-03 15:00:00,320,16.0
4,2023-02-03 19:00:00,527,20.0
5,2023-02-03 23:00:00,123,24.0


In [9]:
query = """
-- select date_part('month', date) as month, date_part('year', date) as year, 
select date, sum(entries), sum(exits)
from mta_clean
where date >= '2023-02-02'
and station not like '%PTH (M)'
-- and station = '{station}'
group by date
order by date
"""
#.format(station=pretty)
get_sql_from_template(con, query, None)


Unnamed: 0,date,sum(entries),sum(exits)
0,2023-02-02,2253787,3449408
1,2023-02-03,1912035,2890544


In [13]:
query = """
select date, hour, entries, exits
from mta_clean
where date >= '2023-01-02'
and station not like '%PTH (M)'
and station = '{station}'
order by date
""".format(station=pretty)
get_sql_from_template(con, query, None)


Unnamed: 0,date,hour,entries,exits
0,2023-01-02,12,265,301
1,2023-01-02,20,114,424
2,2023-01-02,8,174,274
3,2023-01-02,16,242,514
4,2023-01-02,4,50,53
...,...,...,...,...
192,2023-02-03,16,527,865
193,2023-02-03,12,320,481
194,2023-02-03,8,444,578
195,2023-02-03,4,133,85


In [14]:
query = """
-- select date_part('month', date) as month, date_part('year', date) as year, 
select date, sum(entries), sum(exits)
from mta_clean
where date >= '2022-12-31'
and station not like '%PTH (M)'
and station = '{station}'
group by date
order by date
""".format(station=pretty)
get_sql_from_template(con, query, None)


Unnamed: 0,date,sum(entries),sum(exits)
0,2022-12-31,892,1994
1,2023-01-01,572,1467
2,2023-01-02,866,1731
3,2023-01-03,1572,2638
4,2023-01-04,1657,2868
5,2023-01-05,1699,2813
6,2023-01-06,1673,2946
7,2023-01-07,963,1915
8,2023-01-08,661,1469
9,2023-01-09,1567,2786


In [15]:
query="""
select
    date_trunc('day', date_time) date,
    4 * floor(date_part('hour', date_time) / 4) as hour,
    -- pretty name, borough from station_list
    map.pretty_name station,
    map.borough boro,
    sum(entries) entries,
    sum(exits) exits
from
    mta_diff
    left outer join station_list map on mta_diff.station=map.station
where 
mta_diff.station = '{station}'
and date = '2023-02-03'
group by 
    date,
    hour,
    map.pretty_name,
    boro
-- drop periods with no exits or entries
having sum(mta_diff.entries) > 0 or sum(mta_diff.exits) > 0

""".format(station=stationline)
get_sql_from_template(con, query, None)


Unnamed: 0,date,hour,station,boro,entries,exits
0,2023-02-03,0.0,Nostrand Av-3 (Bk),3,34,234
1,2023-02-03,4.0,Nostrand Av-3 (Bk),3,133,85
2,2023-02-03,8.0,Nostrand Av-3 (Bk),3,444,578
3,2023-02-03,12.0,Nostrand Av-3 (Bk),3,320,481
4,2023-02-03,16.0,Nostrand Av-3 (Bk),3,527,865
5,2023-02-03,20.0,Nostrand Av-3 (Bk),3,123,483


In [16]:
query = """
with sq as
(select station, turnstile, count(*)
from mta_diff
group by station, turnstile)
select station, count(*) from sq
group by station
order by count(*) limit 40;
"""
get_sql_from_template(con, query, None)


Unnamed: 0,STATION,count_star()
0,CLEVELAND ST-J,2
1,SUTTER AV-L,2
2,BAY PKWY-F,3
3,BAY 50 ST-D,3
4,CHAUNCEY ST-JZ,3
5,138/GRAND CONC-45,3
6,21 ST-G,3
7,225 ST-25,3
8,NEW LOTS-L,3
9,PARKSIDE AV-BQ,3


In [17]:
query = """
select round(2.5, 0)
"""
get_sql_from_template(con, query, None)


Unnamed: 0,"round(2.5, 0)"
0,3.0
