### Dependencies

In [1]:
import pandas as pd
import matplotlib.pyplot as plt

from time import time

from time import strftime

import warnings
warnings.filterwarnings("ignore")

### Needed Functions

In [2]:
def sheetsTTC(xlsx_file):
    
    """ 
    Reads in an Excel file and concatenates each sheet into 
    a returned pandas DataFrame.    
    
    Note:
        - Requires: pandas
        - Requires: Each worksheet must have exact matching
                    n columns, column order, column labels
                    and column dtypes.
    """
    
    xls = pd.ExcelFile(xlsx_file)
    
    out_df = pd.DataFrame()
    
    for sheet in xls.sheet_names:
        
        df = pd.read_excel(xls, sheet_name = sheet)
        
        df = df[[
            'Report Date', 'Route', 'Time', 'Day', 'Location', 'Incident', 'Min Delay', 'Min Gap', 'Direction', 'Vehicle'
        ]]
        
        out_df = pd.concat([out_df, df])
        
    return out_df

def excelDate(excel_time):
    
    """
    Converts excel datetime float format to pandas datetime
    
    """
    
    return pd.to_datetime('1900-01-01') + pd.to_timedelta(excel_time, 'D')


### Buses

In [3]:
# Bus 2019
bus2019_xls = pd.ExcelFile('Bus_2019.xlsx')

jan = pd.read_excel(bus2019_xls, 'Jan 2019')
feb = pd.read_excel(bus2019_xls, 'Feb 2019')
mar = pd.read_excel(bus2019_xls, 'Mar 2019')
apr = pd.read_excel(bus2019_xls, 'Apr 2019')
may = pd.read_excel(bus2019_xls, 'May 2019')

# April is the only month in the 5 1/2 years of data
# with the column 'Incident ID', so it gets dropped
apr = apr.drop(columns={'Incident ID'})

# Rename the two mis-labelled columns
apr = apr.rename(columns={'Delay': 'Min Delay', 'Gap': 'Min Gap'})

bus19 = pd.concat([jan, feb])
bus19 = pd.concat([bus19, mar])
bus19 = pd.concat([bus19, apr])
bus19 = pd.concat([bus19, may])

# Apply sheetsTTC Function
bus18 = sheetsTTC('Bus_2018.xlsx')
bus17 = sheetsTTC('Bus_2017.xlsx')
bus16 = sheetsTTC('Bus_2016.xlsx')
bus15 = sheetsTTC('Bus_2015.xlsx')
bus14 = sheetsTTC('Bus_2014.xlsx')

# Add Year Columns
bus19['Year'] = 2019
bus18['Year'] = 2018
bus17['Year'] = 2017
bus16['Year'] = 2016
bus15['Year'] = 2015
bus14['Year'] = 2014

# Combine 2014 - 2019
buses = pd.concat([bus19, bus18])
buses = pd.concat([buses, bus17])
buses = pd.concat([buses, bus16])
buses = pd.concat([buses, bus15])
buses = pd.concat([buses, bus14])

# Add Bus labeled Type column
buses['Type'] = 'Bus'

In [4]:
buses.head()

Unnamed: 0,Report Date,Route,Time,Day,Location,Incident,Min Delay,Min Gap,Direction,Vehicle,Year,Type
0,43466,39,0.009028,Tuesday,NECR,Mechanical,9.0,18.0,W/B,1794.0,2019,Bus
1,43466,111,0.010417,Tuesday,Eglington,Mechanical,15.0,30.0,S/B,8065.0,2019,Bus
2,43466,35,0.0125,Tuesday,Finch,Mechanical,9.0,18.0,S/B,3275.0,2019,Bus
3,43466,25,0.020833,Tuesday,Don Mills Rd/Eglinton Ave E,Mechanical,9.0,18.0,N/B,8840.0,2019,Bus
4,43466,36,0.027778,Tuesday,Humberwood,Investigation,9.0,18.0,E/B,9119.0,2019,Bus


### Streetcar

In [5]:
# Apply sheetsTTC Function
streetcar19 = sheetsTTC('Streetcar_2019.xlsx')
streetcar18 = sheetsTTC('Streetcar_2018.xlsx')
streetcar17 = sheetsTTC('Streetcar_2017.xlsx')
streetcar16 = sheetsTTC('Streetcar_2016.xlsx')
streetcar15 = sheetsTTC('Streetcar_2015.xlsx')
streetcar14 = sheetsTTC('Streetcar_2014.xlsx')

# Add Year Columns
streetcar19['Year'] = 2019
streetcar18['Year'] = 2018
streetcar17['Year'] = 2017
streetcar16['Year'] = 2016
streetcar15['Year'] = 2015
streetcar14['Year'] = 2014

# Combine 2014 - 2019
streetcars = pd.concat([streetcar19, streetcar18])
streetcars = pd.concat([streetcars, streetcar17])
streetcars = pd.concat([streetcars,streetcar16])
streetcars = pd.concat([streetcars, streetcar15])
streetcars = pd.concat([streetcars, streetcar14])

# Add Streetcar labeled Type column
streetcars['Type'] = 'Streetcar'

In [6]:
streetcars.head()

Unnamed: 0,Report Date,Route,Time,Day,Location,Incident,Min Delay,Min Gap,Direction,Vehicle,Year,Type
0,43466,301,0.047222,Tuesday,Queen/Braodview,Held By,6.0,13.0,E/B,4193.0,2019,Streetcar
1,43466,511,0.0625,Tuesday,Bathurst/College,Investigation,5.0,10.0,N/B,1038.0,2019,Streetcar
2,43466,306,0.069444,Tuesday,Dundas West stn.,Mechanical,8.0,16.0,W/B,4146.0,2019,Streetcar
3,43466,505,0.131944,Tuesday,Lansdowne and Dundas,Mechanical,6.0,12.0,E/B,8416.0,2019,Streetcar
4,43466,310,0.163194,Tuesday,Spadina and Lakshore,Held By,20.0,30.0,N/B,4465.0,2019,Streetcar


### Surface Routes (Streetcar + Buses)

In [11]:
surface = pd.concat([buses, streetcars])

In [12]:
surface['Report Date'] = excelDate(surface['Report Date'])
surface['Time'] = round(((surface['Time'] * 86400)/60)/60, 2)
#surface['Time'] = time(surface['Time']).strftime('%-H')

In [13]:
surface.to_csv('ttc_Delays_surfaceRoutes.csv')
surface.sample(25)

Unnamed: 0,Report Date,Route,Time,Day,Location,Incident,Min Delay,Min Gap,Direction,Vehicle,Year,Type
4093,2017-05-25,68,0.201389,Tuesday,Finch,Mechanical,5.0,9.0,n/b,7606.0,2017,Bus
210,2016-07-04,16,0.583333,Saturday,Warden stn,Investigation,10.0,20.0,N/B,7847.0,2016,Bus
489,2017-10-05,198,0.583333,Tuesday,Kennedy Station,Mechanical,10.0,20.0,S/B,8647.0,2017,Bus
815,2015-03-26,501,0.295139,Tuesday,Queen and Connaught,Late Leaving Garage,2.0,5.0,W/B,4202.0,2015,Streetcar
5459,2019-02-25,32,0.520833,Saturday,Dufferin and Eglinton,General Delay,30.0,40.0,B/W,,2019,Bus
4626,2019-04-29,5,0.389583,Saturday,St Clair and Yonge,Investigation,20.0,40.0,S/B,1248.0,2019,Bus
1886,2016-01-14,62,0.25,Tuesday,Mortimer route,General Delay,10.0,1.0,B/W,,2016,Bus
3936,2019-03-23,87,0.489583,Thursday,444 Lumsden,Diversion,10.0,20.0,B/W,8461.0,2019,Bus
1308,2019-02-08,57,0.238889,Wednesday,Eglinton Yard,Utilized Off Route,8.0,16.0,O/B,8709.0,2019,Bus
4774,2017-12-24,67,0.247222,Friday,PHRA,Late Leaving Garage,7.0,33.0,N/B,8922.0,2017,Bus


### Quick Look

In [None]:
surface.info()

In [None]:
surface.describe()

In [None]:
surface['Incident'].value_counts()

In [None]:
surface['Type'].value_counts()

In [None]:
surface['Day'].value_counts()