In [1]:
# Standard tools
import numpy as np
import pandas as pd
import datetime as dt
from collections import Counter
import re

# Turn off warnings
import warnings
warnings.filterwarnings('ignore')

# Pandas options
pd.set_option("max_rows", 30)
pd.set_option("max_columns", None)
pd.set_option("precision", 3)

# For opening and closing files
import glob

In [2]:
filenames_list = glob.glob("Turnstile Data/Processed CSV/*_proc.csv") # Grab a list of filenames
filenames_list = sorted(filenames_list)

# Separating into two groups based on timestamps
filenames_list_norm = filenames_list[0:32] + filenames_list[49:] 
turnstile_proc_norm = [pd.read_csv(filename) for filename in filenames_list_norm] # Put the dataframes into a list that can be called by index

filenames_list_dst = filenames_list[32:49]
turnstile_proc_dst = [pd.read_csv(filename) for filename in filenames_list_dst] # Put the dataframes into a list that can be called by index

In [3]:
# Show that the import worked for normal files
for i in range(0, len(filenames_list_norm)):
    print(i, filenames_list_norm[i])
    print(turnstile_proc_norm[i].iloc[:, 1:5].head())

0 Turnstile Data/Processed CSV/turnstile_180331_proc.csv
       StationName  Latitude  Longitude  2018-03-24 00:00:00
0  34 ST-HERALD SQ    40.749    -73.989               5290.0
1   TIMES SQ-42 ST    40.755    -73.987               7404.0
2   34 ST-PENN STA    40.751    -73.990               3990.0
3   59 ST COLUMBUS    40.768    -73.982               3229.0
4            86 ST    40.780    -73.956               1766.0
1 Turnstile Data/Processed CSV/turnstile_180407_proc.csv
       StationName  Latitude  Longitude  2018-03-31 00:00:00
0  34 ST-HERALD SQ    40.749    -73.989               4532.0
1   TIMES SQ-42 ST    40.755    -73.987               7059.0
2   34 ST-PENN STA    40.751    -73.990               4174.0
3   59 ST COLUMBUS    40.768    -73.982               2739.0
4            86 ST    40.780    -73.956               1819.0
2 Turnstile Data/Processed CSV/turnstile_180414_proc.csv
       StationName  Latitude  Longitude  2018-04-07 00:00:00
0  34 ST-HERALD SQ    40.749    -73.

In [4]:
# Show that the import worked for dst files
for i in range(0, len(filenames_list_dst)):
    print(i, filenames_list_dst[i])
    print(turnstile_proc_dst[i].iloc[:, 1:5].head())

0 Turnstile Data/Processed CSV/turnstile_181117_proc.csv
       StationName  Latitude  Longitude  2018-11-10 03:00:00
0  34 ST-HERALD SQ    40.749    -73.989               3085.0
1   TIMES SQ-42 ST    40.755    -73.987               2930.0
2   34 ST-PENN STA    40.751    -73.990               3953.0
3   59 ST COLUMBUS    40.768    -73.982               1930.0
4            86 ST    40.780    -73.956               2600.0
1 Turnstile Data/Processed CSV/turnstile_181124_proc.csv
       StationName  Latitude  Longitude  2018-11-17 03:00:00
0  34 ST-HERALD SQ    40.749    -73.989               3484.0
1   TIMES SQ-42 ST    40.755    -73.987               3024.0
2   34 ST-PENN STA    40.751    -73.990               3947.0
3   59 ST COLUMBUS    40.768    -73.982               1958.0
4            86 ST    40.780    -73.956               1807.0
2 Turnstile Data/Processed CSV/turnstile_181201_proc.csv
       StationName  Latitude  Longitude  2018-11-24 03:00:00
0  34 ST-HERALD SQ    40.749    -73.

In [5]:
# Formatting stuff for norm
turnstile_proc_norm_form = list(map(lambda x: x.set_index("StationName").drop(columns = {'Unnamed: 0', "Latitude", "Longitude"}), turnstile_proc_norm))
turnstile_proc_norm_form_cat = pd.concat([turnstile_proc_norm_form[i] for i in range(0, len(turnstile_proc_norm_form))], axis=1, sort=False)
turnstile_proc_norm_form_cat.transpose().to_csv("Turnstile Data/Concatenated CSV/turnstile_norm_cat.csv")

In [6]:
# Formatting stuff for dst
turnstile_proc_dst_form = list(map(lambda x: x.set_index("StationName").drop(columns = {'Unnamed: 0', "Latitude", "Longitude"}), turnstile_proc_dst))
turnstile_proc_dst_form_cat = pd.concat([turnstile_proc_dst_form[i] for i in range(len(turnstile_proc_dst_form))], axis=1, sort=False)
turnstile_proc_dst_form_cat.transpose().to_csv("Turnstile Data/Concatenated CSV/turnstile_dst_cat.csv")