In [83]:
import pandas as pd
from datetime import date, timedelta

baseURL = "http://web.mta.info/developers/data/nyct/turnstile/turnstile_"
start_date = date(2019, 9, 21) # 21st September 2019

############## Helper Functions ##############
def string_of_weeks(start_date, number_of_weeks):
    '''
    Returns a list of strings where each string represents a date in "yymmdd" format
    Starting with the "start_date"
    start_date should be a date object
    '''
    #print(start_date.strftime("%y%m%d")) #for testing
    a_week = datetime.timedelta(days=7)
    
    list_of_dates = list([start_date.strftime("%y%m%d")])
    
    #Works but not readable
    #list_of_dates += ([(d-week*a_week).strftime("%y%m%d") for week in range(1,number_of_weeks)])
    
    assert number_of_weeks >= 1
    
    for week in range(1, number_of_weeks):
        temp = start_date - week * a_week # a number * timedelta to walk backwards in time
        list_of_dates.append(temp.strftime("%y%m%d"))
    
    return list_of_dates

#print(string_of_weeks(start_date, 52)) #for testing

def create_MTA_dataframe(start_date, number_of_weeks):
    '''
    Returns a panda DataFrame object of MTA turnstile data starting from
    start_date and going back to number_of_weeks
    '''    
    dates_for_downloads = string_of_weeks(start_date, number_of_weeks)
    print(dates_for_downloads)
    
    mta_df = pd.read_csv(baseURL+dates_for_downloads[0]+".txt")
    #print(type(mta_df)) #for testing
    
    for week in dates_for_downloads:
        temp = pd.read_csv(baseURL+week+".txt")
        mta_df = mta_df.append(temp, ignore_index=True)
        print("Added one dataframe")
    
    mta_df.info()
    return mta_df
    

In [84]:
mta_df_test = create_MTA_dataframe(start_date, 2)
mta_df_test

['190921', '190914']
<class 'pandas.core.frame.DataFrame'>
Added one dataframe
Added one dataframe
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 615441 entries, 0 to 615440
Data columns (total 11 columns):
C/A                                                                     615441 non-null object
UNIT                                                                    615441 non-null object
SCP                                                                     615441 non-null object
STATION                                                                 615441 non-null object
LINENAME                                                                615441 non-null object
DIVISION                                                                615441 non-null object
DATE                                                                    615441 non-null object
TIME                                                                    615441 non-null object
DESC                          

Unnamed: 0,C/A,UNIT,SCP,STATION,LINENAME,DIVISION,DATE,TIME,DESC,ENTRIES,EXITS
0,A002,R051,02-00-00,59 ST,NQR456W,BMT,09/14/2019,00:00:00,REGULAR,7198818,2438323
1,A002,R051,02-00-00,59 ST,NQR456W,BMT,09/14/2019,04:00:00,REGULAR,7198834,2438325
2,A002,R051,02-00-00,59 ST,NQR456W,BMT,09/14/2019,08:00:00,REGULAR,7198847,2438354
3,A002,R051,02-00-00,59 ST,NQR456W,BMT,09/14/2019,12:00:00,REGULAR,7198929,2438428
4,A002,R051,02-00-00,59 ST,NQR456W,BMT,09/14/2019,16:00:00,REGULAR,7199125,2438483
...,...,...,...,...,...,...,...,...,...,...,...
615436,TRAM2,R469,00-05-01,RIT-ROOSEVELT,R,RIT,09/13/2019,05:00:00,REGULAR,5554,420
615437,TRAM2,R469,00-05-01,RIT-ROOSEVELT,R,RIT,09/13/2019,09:00:00,REGULAR,5554,420
615438,TRAM2,R469,00-05-01,RIT-ROOSEVELT,R,RIT,09/13/2019,13:00:00,REGULAR,5554,420
615439,TRAM2,R469,00-05-01,RIT-ROOSEVELT,R,RIT,09/13/2019,17:00:00,REGULAR,5554,420
