# TTC Subway Ridership

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import geopandas as gpd
import os



### 1. Import and Combine Data

In [16]:
# Find all ttc subway ridership data file from the path
filenames = [file for file in os.listdir("TTC_ridership") if 'ttc-subway-ridership' in file]
filenames

['ttc-subway-ridership-2014.csv',
 'ttc-subway-ridership-2015.csv',
 'ttc-subway-ridership-2016.csv',
 'ttc-subway-ridership-2018.csv',
 'ttc-subway-ridership-2022.csv']

In [19]:
# Import the TTC subway station name list
ttc_stations = pd.read_csv("TTC_ridership\TTC_stations.csv")
ttc_stations

Unnamed: 0,Station Name,Line
0,Finch,1
1,North York Centre,1
2,Sheppard-Yonge (1 YONGE-UNIVERSITY),1
3,York Mills,1
4,Lawrence,1
...,...,...
75,Lawrence East,3
76,Ellesmere,3
77,Midland,3
78,Scarborough Centre,3


In [4]:
# Separate the TTC subway stations to 4 subway lines
line1 = ttc_stations[ttc_stations["Line"] == 1]
line2 = ttc_stations[ttc_stations["Line"] == 2]
line3 = ttc_stations[ttc_stations["Line"] == 3]
line4 = ttc_stations[ttc_stations["Line"] == 4]

In [10]:
# Start counting
count = 0

# Run through all files to put ridership and year data
for filename in filenames:
    # Read the yearly ridership data
    ridership_yearly_data = pd.read_csv(filename, header = None)
    # Get the year from the file name
    year = filename.split(".")[0][-4:]
    # Separate the name from the data
    name = [ridership_yearly_data[0][i] for i in range(0, len(ridership_yearly_data), 2)]
    # Separate the corresponding ridership count from the data
    ridership = [ridership_yearly_data[0][i] for i in range(1, len(ridership_yearly_data)+1, 2)]
    
    if count == 0:
        # Create a new ridership dataframe with data
        ttc_subway_ridership = pd.DataFrame({"Station Name": name, str(year): ridership})
    else:
        # Merge two dataframe by add a column for that year
        ttc_subway_ridership = ttc_subway_ridership.merge(pd.DataFrame({"Station Name": name, str(year): ridership}), how = "left", on = "Station Name")
    count = count + 1

ttc_subway_ridership.head()
    

Unnamed: 0,Station Name,2014,2015,2016,2018,2022
0,Bloor-Yonge (1 YONGE-UNIVERSITY),216190,216190,204630,204630,155186
1,Bathurst,35510,36460,29320,26900,26234
2,College,47940,47790,47600,44370,42883
3,Bay,31050,30860,27090,32690,24260
4,Davisville,23040,25330,24300,25990,13973


In [20]:
# Merge TTC station line to each station
ttc_subway_ridership = ttc_subway_ridership.merge(ttc_stations, how = "left", on = "Station Name")
ttc_subway_ridership.head(40)

Unnamed: 0,Station Name,2014,2015,2016,2018,2022,Line_x,Line_y
0,Bloor-Yonge (1 YONGE-UNIVERSITY),216190,216190,204630,204630.0,155186.0,1,1
1,Bathurst,35510,36460,29320,26900.0,26234.0,2,2
2,College,47940,47790,47600,44370.0,42883.0,1,1
3,Bay,31050,30860,27090,32690.0,24260.0,2,2
4,Davisville,23040,25330,24300,25990.0,13973.0,1,1
5,Bloor-Yonge (2 BLOOR-DANFORTH),193030,183240,186860,196460.0,131995.0,2,2
6,Downsview,39900,37670,40640,,,1,1
7,Broadview,34880,33460,29800,32670.0,25057.0,2,2
8,Dundas,75780,81330,77200,73560.0,72637.0,1,1
9,Castle Frank,7070,8350,8520,9760.0,12470.0,2,2


### 2. Find the busiest stations 

In [None]:
# 