In [1]:
import gtfs_kit as gt
import pandas as pd
import numpy as np
import tkinter as tk
import requests, zipfile, io, os
from tkinter import filedialog

In [2]:
zip_file_url=r"http://valleyregionaltransit.org/gtfs/VRT_Transit1.zip"
r = requests.get(zip_file_url)
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall("gtfs")
gtfs_filename=os.path.basename(zip_file_url)
gtfs_filename=os.path.splitext(gtfs_filename)[0]
feed=gt.read_gtfs("gtfs",dist_units='mi')

In [3]:
route_names=feed.routes['route_short_name']
route_namesArray=route_names.unique()
route_names={i:route_namesArray[i] for i in range(len(route_names.unique()))}
route_names.update([((len(route_names)+1),'All')])
route_names=route_names.values()

In [4]:
def button_gtfs_clicked():
    root.gtfs=filedialog.askdirectory(initialdir="/",title="Select GTFS Zip file")
def button_outfolder_clicked():
    root.outfolder=filedialog.askdirectory(initialdir="/",title="Select Output folder")
def close_window():
    root.route=tkvar.get()
    root.weekday=weekday.get()
    root.destroy()

root=tk.Tk()
root.title('Schedule Builder')

tkvar=tk.StringVar(root)

label_gtfs=tk.Label(root,text="Select GTFS zip file.")
label_gtfs.grid(column=0,row=0)
button_gtfs=tk.Button(root,text="Browse",command=button_gtfs_clicked)
button_gtfs.grid(column=0,row=1)

label_out=tk.Label(root,text="Select an output folder")
label_out.grid(column=0,row=2)
button_out=tk.Button(root,text="Browse",command=button_outfolder_clicked)
button_out.grid(column=0,row=3)

route_label=tk.Label(root,text="Pick a Route")
route_label.grid(column=0,row=4)
route_choice=tk.OptionMenu(root,tkvar,*route_names)
route_choice.grid(column=0,row=5)

label_weekday=tk.Label(root,text="Choose Weekday Type")
label_weekday.grid(column=0,row=6)
weekday=tk.StringVar()
tk.Radiobutton(root,text="Weekday",variable=weekday,value="Weekday").grid(column=0,row=7)
tk.Radiobutton(root,text="Saturday",variable=weekday,value="Saturday").grid(column=0,row=8)
tk.Radiobutton(root,text="All",variable=weekday,value="All").grid(column=0,row=9)

button_run=tk.Button(root,text='Run',width=25,command=close_window)
button_run.grid(column=0,row=10)
root.mainloop()

In [5]:
feed=gt.read_gtfs(root.gtfs,dist_units='mi')

In [6]:
# Function to get numbered timepoints
def getTimepoints(route_short_name):
    timepoints=feed.stop_times.merge(feed.trips)
    timepoints=timepoints.merge(feed.routes)
    timepoints=timepoints.merge(feed.stops)
    timepoints=timepoints.loc[(timepoints['route_short_name']==route_short_name)&(timepoints['timepoint']==1)]
    timepoints['stop_label']=timepoints['stop_name']
    timepoints['stop_label'].replace(r"[NSEW][NSEW][CM]","",regex=True,inplace=True)
    timepoints['number']=''
    timepoints=timepoints.groupby('stop_id',sort=False,as_index=False).first()
    timepoints.sort_values(['shape_id','direction_id','stop_sequence'],inplace=True)
    timepoints.reset_index(inplace=True)
    timepoints['number']=range(len(timepoints))
    timepoints['number']=timepoints['number']+1
    timepoints_dup=timepoints.loc[timepoints['stop_label'].duplicated()]
    timepoints_first=timepoints.drop_duplicates('stop_label')
    timepoints_firstest=timepoints.loc[timepoints['stop_label'].duplicated(keep='last')]
    timepoints_dup=timepoints_dup.merge(timepoints_firstest,on='stop_label')
    timepoints_dup['number_x']=timepoints_dup['number_y']
    timepoints_dup=timepoints_dup[[
        'index_x',
        'stop_id_x',
        'trip_id_x',
        'arrival_time_x',
        'departure_time_x',
        'stop_sequence_x',
#         'stop_headsign_x',
#         'pickup_type_x',
#         'drop_off_type_x',
#         'shape_dist_traveled_x',
        'route_id_x',
        'service_id_x',
#         'trip_headsign_x',
        'direction_id_x',
        'block_id_x',
        'shape_id_x',
        'agency_id_x',
        'route_short_name_x',
        'route_long_name_x',
#         'route_desc_x',
        'route_type_x',
#         'route_url_x',
        'route_color_x',
        'route_text_color_x',
#         'stop_code_x',
        'stop_name_x',
#         'stop_desc_x',
        'stop_lat_x',
        'stop_lon_x',
#         'zone_id_x',
        'stop_label',
        'number_x']]
#     timepoints_dup.columns=timepoints.columns
    timepoints=timepoints_first.append(timepoints_dup)
    timepoints=timepoints[['stop_id','stop_name','stop_label','number']]
    return timepoints

In [7]:
def buildSched(route_short_name,save_folder=None,dayofweek='Weekday'):
    route=feed.routes.loc[feed.routes['route_short_name']==route_short_name]
    route_id=route['route_id'].values[0]
    
    service_dates=gt.calendar.get_dates(feed)
    service_dates=pd.DataFrame(service_dates,columns=['DateStr'])
    service_dates['Date']=pd.to_datetime(service_dates['DateStr'])
    service_dates['Weekday']=service_dates['Date'].dt.dayofweek
    if dayofweek=='Weekday':
        service_dates=service_dates.loc[(service_dates['Weekday']>=0)&(service_dates['Weekday']<5)]
        savename=route_short_name+'_weekday'
    if dayofweek=='Saturday':
        service_dates=service_dates.loc[service_dates['Weekday']==5]
        savename=route_short_name+'_saturday'
    service_dates=service_dates['DateStr'].tolist()
    
    timepoints=getTimepoints(route_short_name)
    sched=gt.routes.build_route_timetable(feed=feed,route_id=route_id,dates=service_dates)
    sched.dropna(axis=0,subset=['departure_time'],inplace=True)
    sched['departure_time']=pd.to_datetime(sched['departure_time'])
    sched['departure_time']=sched['departure_time'].dt.strftime('%I:%M %p')
    sched=sched.merge(timepoints,how='left')
    
    sched_out=sched.loc[sched['direction_id']==0]
    sched_out=pd.pivot_table(sched_out,index='trip_id',columns=['stop_id','stop_name','direction_id','stop_sequence','stop_label','number'],values='departure_time',aggfunc='first')
    sched_out.sort_values(['direction_id','stop_sequence'],inplace=True,axis=1)
    sched_out.columns=sched_out.columns.droplevel('stop_id')
    sched_out.columns=sched_out.columns.droplevel('stop_name')
    sched_out.columns=sched_out.columns.droplevel('direction_id')
    sched_out.columns=sched_out.columns.droplevel('stop_sequence')
    
    sched_in=sched.loc[sched['direction_id']==1]
    sched_in=pd.pivot_table(sched_in,index='trip_id',columns=['stop_id','stop_name','direction_id','stop_sequence','stop_label','number'],values='departure_time',aggfunc='first')
    sched_in.sort_values(['stop_sequence'],inplace=True,axis=1)
    sched_in.columns=sched_in.columns.droplevel('stop_id')
    sched_in.columns=sched_in.columns.droplevel('stop_name')
    sched_in.columns=sched_in.columns.droplevel('direction_id')
    sched_in.columns=sched_in.columns.droplevel('stop_sequence')

    if save_folder:
        sched_out.to_excel(save_folder+r"\{}_outbound.xlsx".format(savename))
        sched_in.to_excel(save_folder+r"\{}_inbound.xlsx".format(savename))
    return sched_in

In [8]:
if root.weekday=='All':
    if root.route=='All':
        for i in feed.routes.route_short_name.unique():
            buildSched(i,root.outfolder,'Weekday')
            buildSched(i,root.outfolder,'Saturday')
    else:
        buildSched(root.route,root.outfolder,'Weekday')
        buildSched(root.route,root.outfolder,'Saturday')
else:
    buildSched(root.route,root.outfolder,root.weekday)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


In [9]:
buildSched(root.route,root.outfolder,root.weekday)

stop_label,Millspur Loop,University @ BSU Transit Center,Main Street Station
number,2.0,3.0,4.0
trip_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
108,06:10 AM,06:26 AM,06:34 AM
109,06:40 AM,06:56 AM,07:04 AM
111,07:11 AM,07:27 AM,07:35 AM
113,07:41 AM,07:57 AM,08:05 AM
115,08:12 AM,08:28 AM,08:36 AM
117,08:42 AM,08:58 AM,09:06 AM
119,09:12 AM,09:28 AM,09:36 AM
121,10:00 AM,10:21 AM,10:29 AM
122,10:12 AM,10:33 AM,10:40 AM
124,11:12 AM,11:33 AM,11:40 AM
