In [1]:
import pandas as pd
import numpy as np
import datetime
import pickle
from string import Template

In [2]:
def get_MTA_data(startdate, enddate):
    
    '''
    Reads data from New York City MTA web iste that counts subway sation turnstile entries and exits.
    Data is available in weekly comma delimited text files with a week beginning on Sunday and ending on Saturday.
    Data file names include the ending Saturday date to identify the week the data in the file is for.
    Weekly data is concatenated into a Pandas dataframe making it available for analysis.
    
    Input Parameters
    startdate: Saturday date on which to begin reading data (ending Saturday for data file from previous week)
    enddate: Saturday date on which to end reading data
    
    '''
    
    data = pd.DataFrame()
    startdate = pd.to_datetime(startdate)  ### Assumption: Dates are Saturdays
    enddate = pd.to_datetime(enddate)
    
   # data url = 'http://web.mta.info/developers/data/nyct/turnstile/turnstile_180623.txt'
    urlbase = Template('http://web.mta.info/developers/data/nyct/turnstile/turnstile_$date.txt')
    
    week = pd.Timedelta('7 days')
    next_saturday = startdate
    
    week_counter = 1
    
    while(next_saturday <= enddate):
        urlday = next_saturday.strftime('%y%m%d')
        url = urlbase.substitute(date=urlday)
        next_saturday = next_saturday+week
        
        newdata = pd.read_csv(url)
        
        #label each week's observations with a different integer
        newdata['week'] = week_counter
        week_counter += 1
        
        data = pd.concat([data,newdata])
        
   # url = urlbase.substitute(date=date)
    return(data)
    




In [3]:
# Read data for weeks beginning with ending Saturday 04/28/2018 and ending with ending Saturday 06/30/2018.
data = get_MTA_data('04/28/2018','06/30/2018')


In [4]:
# Save ataframe to poc
with open('MTA_Apr_June_with_weeks.pickle', 'wb') as f:
    pickle.dump(data,f)