# Code Snippet
Load CSV files (may be compressed) from data sub-dir

In [1]:
import sys
import os
import glob

In [2]:
import pandas as pd

In [68]:
file_list = glob.glob("data/turnstile_*.txt.gz")

df_list = [pd.read_csv(file_name) for file_name in file_list]
df = pd.concat(df_list)

df.reset_index(inplace=True)

def normalize_line(line):
    """Normalize line list (a line is a single char)
    LNQR456W -> 456LNQRW
    """
    return ''.join(sorted(set(line.upper())))

df['LINENAME'] = df['LINENAME'].apply(normalize_line)
df['STATION'] = df['STATION'] + ' ' + df['LINENAME']
df.drop(labels='LINENAME', axis=1, errors='ignore', inplace=True)

In [69]:
df.columns

Index(['index', 'C/A', 'UNIT', 'SCP', 'STATION', 'DIVISION', 'DATE', 'TIME',
       'DESC', 'ENTRIES',
       'EXITS                                                               '],
      dtype='object')

In [81]:
station_num_turnstiles = (
    df.sort_values(by=['C/A', 'UNIT', 'SCP', 'STATION'])
    [['C/A', 'UNIT', 'SCP', 'STATION']].drop_duplicates()
    .groupby('STATION')
    .apply(lambda x: x.shape[0])
)
station_num_turnstiles.name = 'Turnstile Counts'

In [82]:
station_num_turnstiles

STATION
1 AV L                         10
103 ST 1                        6
103 ST 6                        6
103 ST BC                       3
103 ST-CORONA 7                 9
104 ST A                        5
104 ST JZ                       4
110 ST 6                        6
111 ST 7                        5
111 ST A                        7
111 ST J                        4
116 ST 23                       8
116 ST 6                        8
116 ST BC                       6
116 ST-COLUMBIA 1               6
121 ST JZ                       6
125 ST 1                        5
125 ST 23                       8
125 ST 456                     14
125 ST ABCD                    16
135 ST 23                      12
135 ST BC                      11
137 ST CITY COL 1              10
138/GRAND CONC 45               3
14 ST 123FLM                   33
14 ST ACEL                     10
14 ST-UNION SQ 456LNQRW        38
145 ST 1                        7
145 ST 3                        3
145 ST