# Code Snippet
Load CSV files (may be compressed) from data sub-dir

In [1]:
import sys
import os
import glob

In [2]:
import pandas as pd

In [68]:
file_list = glob.glob("data/turnstile_*.txt.gz")

df_list = [pd.read_csv(file_name) for file_name in file_list]
df = pd.concat(df_list)

df.reset_index(inplace=True)

def normalize_line(line):
    """Normalize line list (a line is a single char)
    LNQR456W -> 456LNQRW
    """
    return ''.join(sorted(set(line.upper())))

df['LINENAME'] = df['LINENAME'].apply(normalize_line)
df['STATION'] = df['STATION'] + ' ' + df['LINENAME']
df.drop(labels='LINENAME', axis=1, errors='ignore', inplace=True)

In [69]:
df.columns

Index(['index', 'C/A', 'UNIT', 'SCP', 'STATION', 'DIVISION', 'DATE', 'TIME',
       'DESC', 'ENTRIES',
       'EXITS                                                               '],
      dtype='object')

In [81]:
station_num_turnstiles = (
    df.sort_values(by=['C/A', 'UNIT', 'SCP', 'STATION'])
    [['C/A', 'UNIT', 'SCP', 'STATION']].drop_duplicates()
    .groupby('STATION')
    .apply(lambda x: x.shape[0])
)
station_num_turnstiles.name = 'Turnstile Counts'

In [96]:
station_num_turnstiles['103 ST-CORONA 7']

9

In [95]:
df[df['STATION'] == '103 ST-CORONA 7']

Unnamed: 0,index,C/A,UNIT,SCP,STATION,DIVISION,DATE,TIME,DESC,ENTRIES,EXITS
179410,179410,R529,R208,00-00-00,103 ST-CORONA 7,IRT,12/31/2016,03:00:00,REGULAR,14189134,22467405
179411,179411,R529,R208,00-00-00,103 ST-CORONA 7,IRT,12/31/2016,07:00:00,REGULAR,14189286,22467545
179412,179412,R529,R208,00-00-00,103 ST-CORONA 7,IRT,12/31/2016,11:00:00,REGULAR,14189580,22467828
179413,179413,R529,R208,00-00-00,103 ST-CORONA 7,IRT,12/31/2016,15:00:00,REGULAR,14189841,22468224
179414,179414,R529,R208,00-00-00,103 ST-CORONA 7,IRT,12/31/2016,19:00:00,REGULAR,14190042,22468933
179415,179415,R529,R208,00-00-00,103 ST-CORONA 7,IRT,12/31/2016,23:00:00,REGULAR,14190190,22469516
179416,179416,R529,R208,00-00-00,103 ST-CORONA 7,IRT,01/01/2017,03:00:00,REGULAR,14190235,22469924
179417,179417,R529,R208,00-00-00,103 ST-CORONA 7,IRT,01/01/2017,07:00:00,REGULAR,14190306,22470048
179418,179418,R529,R208,00-00-00,103 ST-CORONA 7,IRT,01/01/2017,11:00:00,REGULAR,14190459,22470311
179419,179419,R529,R208,00-00-00,103 ST-CORONA 7,IRT,01/01/2017,15:00:00,REGULAR,14190667,22470592
