In [1]:
import gtfstk as gt # https://mrcagney.github.io/gtfstk_docs/, https://github.com/mrcagney/gtfstk/blob/master/ipynb/examples.ipynb
import pandas as pd # https://pandas.pydata.org/pandas-docs/stable/index.html
import geopandas as gpd # http://geopandas.org/
import numpy as np # https://www.numpy.org/
import folium # https://python-visualization.github.io/folium/
from shapely.geometry import Point, Polygon # https://shapely.readthedocs.io/en/latest/
import matplotlib.pyplot as plt # https://matplotlib.org/
import osmnx as ox # https://osmnx.readthedocs.io/en/stable/index.html
import os, zipfile, requests, io, shutil, re
import tkinter as tk
from tkinter import filedialog
from IPython.display import clear_output
# Enables plotting in Jupyter notebook
%matplotlib inline

In [2]:
crs={'init':'epsg:4326'} # WGS84 projection

In [5]:
def gtfs_clicked():
    root.gtfs=filedialog.askopenfilename(initialdir="/",
                                            title="Select GTFS Zip File",
                                            filetypes=(("zip files","*.zip"),("all files","*.*")))
def employers_clicked():
    root.employers=filedialog.askopenfilename(initialdir="/",
                                                title="Select Employer Data",
                                                filetypes=(("excel files","*.xlsx"),("all files","*.*")))
def save_points_clicked():
    root.points_file=filedialog.asksaveasfilename(initialdir="/",
                                                  title="Save Points",
                                                  defaultextension=".shp",
                                                  filetypes = (("shapefiles","*.shp"),("all files","*.*")))
def save_employers_clicked():
    root.empoloyers_file=filedialog.asksaveasfilename(initialdir="/",
                                                  title="Save Employers",
                                                  defaultextension=".shp",
                                                  filetypes = (("shapefiles","*.shp"),("all files","*.*")))
def close_window():
    root.num_bins=bins.get()
    root.buffer_distance=buffer.get()
    root.grid_size=grid_size.get()

    root.destroy()
    
root=tk.Tk()
root.title("Pass Program Analyzer")

bins=tk.IntVar(root)
bins.set(5)
buffer=tk.DoubleVar(root)
buffer.set(.5)
grid_size=tk.DoubleVar(root)
grid_size.set(.125)

tk.Label(root,text="Select GTFS Zip File").grid(column=0,row=0)
tk.Button(root,text="Browse",command=gtfs_clicked).grid(column=1,row=0)

tk.Label(root,text="Select Employer Data").grid(column=0,row=1)
tk.Button(root,text="Browse",command=employers_clicked).grid(column=1,row=1)

tk.Label(root,text="Save Points File").grid(column=0,row=2)
tk.Button(root,text="Browse",command=save_points_clicked).grid(column=1,row=2)

tk.Label(root,text="Save Employers File").grid(column=0,row=3)
tk.Button(root,text="Browse",command=save_employers_clicked).grid(column=1,row=3)

tk.Label(root,text="Number of Bins").grid(column=0,row=4)
tk.Entry(root,textvariable=bins).grid(column=1,row=4)

tk.Label(root,text="Stop Buffer Distance (miles)").grid(column=0,row=5)
tk.Entry(root,textvariable=buffer).grid(column=1,row=5)

tk.Label(root,text="Grid Size (miles)").grid(column=0,row=6)
tk.Entry(root,textvariable=grid_size).grid(column=1,row=6)

tk.Button(root,text="Run",width=25,command=close_window).grid(column=0,row=7)
root.mainloop()

In [30]:
def pointScorer(feed,buffer_distance,grid_size,num_bins,save_file=False,buffer_type='euclidean'):
    feed=gt.read_gtfs(feed,dist_units='mi')
    
    service_dates=gt.calendar.get_dates(feed)
    service_dates=pd.DataFrame(service_dates,columns=['DateStr'])
    service_dates['Date']=pd.to_datetime(service_dates['DateStr'])
    service_dates['Weekday']=service_dates['Date'].dt.dayofweek
    weekdays=service_dates.loc[(service_dates['Weekday']==0)]['DateStr'].tolist()
    saturdays=service_dates.loc[(service_dates['Weekday']==5)]['DateStr'].tolist()
    sundays=service_dates.loc[(service_dates['Weekday']==6)]['DateStr'].tolist()
    weekday=gt.trips.compute_busiest_date(feed,weekdays)
    saturday=gt.trips.compute_busiest_date(feed,saturdays)
    sunday=gt.trips.compute_busiest_date(feed,sundays)
    service_dates=[weekday,saturday,sunday]
    
    alpha=[]
    for letter in range(65,(65+num_bins)):
        alpha.append(chr(letter))
    alpha.reverse()
        
    output=gpd.GeoDataFrame(crs=crs)
    output2=gpd.GeoDataFrame(crs=crs)
    one_deg_lat=69.05397727272727 # miles
    one_deg_lon=48.99318181818182 # miles
    conversion_deg=(np.mean([one_deg_lat,one_deg_lon]))
    buffer_distance_deg=buffer_distance/conversion_deg
    buffer_distance_met=buffer_distance*1609.34
    grid_size_deg=grid_size/conversion_deg

    stops=feed.stop_times.merge(gt.stops.geometrize_stops(feed.stops)).merge(gt.stops.compute_stop_stats(feed,dates=service_dates),sort=False).merge(feed.trips).merge(feed.routes)
    stops=stops.groupby(['stop_id','shape_id'],as_index=False).first()
    stops=gpd.GeoDataFrame(stops,geometry='geometry',crs=crs)

    xmin,ymin,xmax,ymax=gt.miscellany.compute_bounds(feed)
    xmin=xmin*1.001
    ymin=ymin*0.999
    xmax=xmax*0.999
    ymax=ymax*1.001
    rows=int(np.ceil((ymax-ymin)/grid_size_deg))
    cols=int(np.ceil((xmax-xmin)/grid_size_deg))
    grid_points=[]
    XleftOrigin = xmin
    XrightOrigin = xmin + grid_size_deg
    YtopOrigin = ymax
    YbottomOrigin = ymax- grid_size_deg
    for i in range(cols):
        Ytop=YtopOrigin
        Ybottom=YbottomOrigin
        for j in range(rows):
            grid_points.append(Point([XleftOrigin,Ytop]))
            Ytop=Ytop-grid_size_deg
            Ybottom=Ybottom-grid_size_deg
        XleftOrigin=XleftOrigin + grid_size_deg
        XrightOrigin=XrightOrigin + grid_size_deg
    grid=gpd.GeoDataFrame({'geometry':grid_points},crs=crs)
    grid['OBJECTID']=grid.index
    grid['Coords']=list(zip(grid.geometry.y,grid.geometry.x))
    
    grid_buff=gpd.GeoDataFrame(crs=crs)
    for index,row in grid.iterrows():
        clear_output(wait=True)
        if buffer_type=='euclidean':
            df=gpd.GeoDataFrame(grid.iloc[[index]],geometry='geometry',crs=crs)
            df['geometry']=df.geometry.buffer(buffer_distance_deg)
        if buffer_type=='walkshed':
            try:
                df=ox.graph_to_gdfs(ox.graph_from_point(row['Coords'], distance=800,distance_type='network',network_type='walk'),nodes=False,edges=True)
            except Exception:
                continue
        df['OBJECTID']=index
        df=df.dissolve(by='OBJECTID')
        df['geometry']=df['geometry'].convex_hull
        grid_buff=grid_buff.append(df,sort=True)
        print('{}% Walked ({} out of {})'.format(str(round((len(grid_buff)/len(grid)*100),1)),len(grid_buff),len(grid)))
    for i in range(len(grid_buff)):
        clear_output(wait=True)
        df=grid_buff.iloc[[i]]
        df=gpd.sjoin(df,stops,how='inner')
        df['OBJECTID']=df.index
        df=df[['OBJECTID','stop_id','stop_name','shape_id','route_id','route_short_name','num_routes','num_trips']]
        df=df.groupby(['OBJECTID','shape_id'],as_index=False).min()
        output=output.append(df,sort=True)
        print('{}% Scored ({} out of {})'.format(str(round((len(output)/len(grid_buff)*100),1)),len(output),len(grid_buff)))
    output=output.groupby('OBJECTID',as_index=False).sum()
    output=output.merge(grid)
    output=gpd.GeoDataFrame(output,geometry='geometry',crs=crs)
    output=output[['OBJECTID','num_trips','geometry']]
    output['Class']=pd.cut(output['num_trips'],bins=num_bins,labels=alpha)
#     for i in range(len(output['Class'])):
#         output.replace(output['Class'].iloc[i],alpha[i],inplace=True)
#     output['Class']=alpha[:len(output['Class'])]
#     output.replace(pd.Interval(2, 85, closed='right'),'D',inplace=True)
#     output.replace(pd.Interval(85, 400, closed='right'),'C',inplace=True)
#     output.replace(pd.Interval(400, 1000, closed='right'),'B',inplace=True)
#     output.replace(pd.Interval(1000, 7927, closed='right'),'A',inplace=True)
    
    for index, row in output.iterrows():
        clear_output(wait=True)
        df=gpd.GeoDataFrame(row,crs=crs).T
        df['geometry']=df.geometry.buffer(grid_size_deg)
        output2=output2.append(df)
        print('{}% Dissolved ({} out of {})'.format(str(round(((index+1)/len(output2)*100),1)),index+1,len(output2)))
    output2['num_trips']=output2['num_trips'].astype(float)
    output2=output2.dissolve(by='Class',aggfunc='mean').reset_index()
    output2.sort_values('Class',ascending=False,inplace=True)
    output2['geometry']=output2['geometry'].difference(output2.shift(-1))
    if save_file:
        output2.to_file(save_file)
    return output2

In [4]:
def scoreEmployers(employersData,pointData,save_file=None):
    df=pd.read_excel(employersData,sheet_name='Sheet2') # Employer and ridership information
    df['Coordinates']=list(zip(df.Longitude,df.Latitude))
    df['Coordinates']=df['Coordinates'].apply(Point)
    df=df.dropna(axis=0,subset=['Latitude','Longitude'])
    df=gpd.GeoDataFrame(df,geometry='Coordinates',crs=crs)
    df=gpd.sjoin(df,pointData,how='left')
    if save_file:
        df.to_file(save_file)
    return df

In [21]:
pointData=pointScorer(root.gtfs,
                      root.buffer_distance,
                      root.grid_size,
                      root.num_bins,
                      root.points_file,
                      buffer_type='euclidean')

62.0% Scored (387 out of 624)


TypeError: 'NoneType' object is not subscriptable

In [None]:
scoreEmployers(root.employers,pointData,root.employers_file)