In [15]:
import requests, zipfile, io, os
import gtfstk as gt # https://mrcagney.github.io/gtfstk_docs/, https://github.com/mrcagney/gtfstk/blob/master/ipynb/examples.ipynb
import pandas as pd # https://pandas.pydata.org/pandas-docs/stable/index.html
import numpy as np # https://www.numpy.org/
import geopandas as gpd # http://geopandas.org/
import osmnx as ox # https://osmnx.readthedocs.io/en/stable/index.html
import tkinter as tk
import multiprocessing as mp
from tkinter import filedialog
from shapely.geometry import Point, LineString # https://shapely.readthedocs.io/en/latest/
from IPython.display import clear_output
import matplotlib.pyplot as plt
%matplotlib inline

In [16]:
crs={'init':'epsg:4326'} # WGS84 projection

In [3]:
def button_gtfs_clicked():
    root.gtfs=filedialog.askopenfilename(initialdir="/",title="Select GTFS Zip file",filetypes = (("zip files","*.zip"),("all files","*.*")))
def button_outfolder_clicked():
    root.outfolder=filedialog.askdirectory(initialdir="/",title="Select Output folder")
def button_centerline_clicked():
    root.centerline=filedialog.askopenfilename(initialdir="/",title="Select Centerline Shapefile")
root=tk.Tk()
root.title('Brochure Builder')
root.geometry('350x200')

label_gtfs=tk.Label(root,text="Select GTFS zip file.")
label_gtfs.grid(column=0,row=0)
button_gtfs=tk.Button(root,text="Browse",command=button_gtfs_clicked)
button_gtfs.grid(column=0,row=1)

label_centerline=tk.Label(root,text="Select centerline shapefile")
label_centerline.grid(column=0,row=2)
button_centerline=tk.Button(root, text="Browse", command=button_centerline_clicked)
button_centerline.grid(column=0,row=3)

label_out=tk.Label(root,text="Select an output folder")
label_out.grid(column=0,row=4)
button_out=tk.Button(root,text="Browse",command=button_outfolder_clicked)
button_out.grid(column=0,row=5)

button_run=tk.Button(root,text='Run',width=25,command=root.destroy)
button_run.grid(column=0,row=6)
root.mainloop()

In [4]:
# Read in GTFS file

if root.gtfs=='':
    zip_file_url=r"http://valleyregionaltransit.org/gtfs/VRT_Transit1.zip"
    r = requests.get(zip_file_url)
    z = zipfile.ZipFile(io.BytesIO(r.content))
    z.extractall("gtfs")
    gtfs_filename=os.path.basename(zip_file_url)
    gtfs_filename=os.path.splitext(gtfs_filename)[0]
    feed=gt.read_gtfs("gtfs",dist_units='mi')
else:
    feed=gt.read_gtfs(root.gtfs,dist_units='mi')
    gtfs_filename=os.path.basename(root.gtfs)
    gtfs_filename=os.path.splitext(gtfs_filename)[0]

In [5]:
outfolder=root.outfolder
if root.outfolder=='':
    outfolder=r"N:\Planning - New File Structure\GIS\Data\BrochureData\{}".format(gtfs_filename)
if os.path.exists(outfolder):
    outfolder=outfolder+"\{}.shp"
else:
    os.mkdir(root.outfolder)
    outfolder=outfolder+"\{}.shp"

In [17]:
def getRoutes(save_file=None):
    routes=gt.shapes.geometrize_shapes(feed.shapes)
    routes=routes.merge(feed.trips)
    routes=routes.merge(feed.routes)
    routes=routes.dissolve('route_short_name',as_index=False)
    if save_file:
        routes.to_file(save_file)
    return routes

In [18]:
# Function to get stops for each route
def getStopsRoutes(save_file=None):
    stops=gt.stops.geometrize_stops(feed.stops).to_crs(crs)
    stops['Coordinates']=list(zip(feed.stops.stop_lat,feed.stops.stop_lon)) # OSMNX uses lat/long
    stops['geometry']=[Point(xy) for xy in zip(feed.stops.stop_lon,feed.stops.stop_lat)] # Most other stuff likes long/lat
    df=feed.stop_times.merge(stops)
    df=df.merge(feed.trips)
    df=df.merge(feed.routes)
    stops_routes=df.groupby(['stop_id','route_short_name'],as_index=False).first() # Get stops for each route
    stops_routes=gpd.GeoDataFrame(stops_routes,geometry='geometry',crs='init:4326')
    df['stop_label']=df['stop_name']
    df['stop_label'].replace(r"[NSEW][NSEW][CM]","",regex=True,inplace=True)
    df=gpd.GeoDataFrame(df,geometry='geometry',crs=crs)
    if save_file:
        stops_routes=stops_routes[['stop_id','route_short_name','stop_name','geometry']]
        stops_routes.to_file(save_file)
    else:
        df=stops_routes[['stop_id','stop_sequence','departure_time','stop_name','Coordinates','direction_id','route_short_name','route_long_name','trip_id','geometry','route_color']]
    return df

In [19]:
# Function to get numbered timepoints
def getTimepoints(save_file=None):
    df=feed.stop_times
    df=df.merge(feed.trips)
    df=df.merge(feed.routes)
    df=df.merge(gt.stops.geometrize_stops(feed.stops))
    df['stop_label']=df['stop_name']
    df['stop_label'].replace(r"[NSEW][NSEW][CM]","",regex=True,inplace=True)
    if 'timepoint' in df.columns:
        timepoints=df.loc[df['timepoint']==1]
    else:
        timepoints=df.loc[df['departure_time'].notna()]
    timepoints=timepoints.sort_values(['route_short_name','trip_id','direction_id','stop_sequence'])
    timepoints['number']=''
    numbered_routes=pd.DataFrame()
    for i in feed.routes.route_short_name.unique():
        route=timepoints.loc[timepoints['route_short_name']==i]
        route=route.sort_values(['direction_id','stop_sequence'])
        route=route.groupby('stop_label',sort=False).first()
        route.reset_index(inplace=True)
        route['number']=route.index+1
        numbered_routes=numbered_routes.append(route)
        numbered_routes=numbered_routes[['stop_id','stop_label','route_short_name','number','geometry']]
        if save_file:
            numbered_routes=gpd.GeoDataFrame(numbered_routes,geometry='geometry',crs=crs)
            numbered_routes.to_file(save_file)
    return numbered_routes

In [20]:
# Create a layer for transfer locations
def getTranfers(save_file=None):
    transfers=feed.transfers.merge(gt.stops.geometrize_stops(feed.stops),left_on='from_stop_id',right_on='stop_id',how='left')
    transfers=transfers.merge(feed.stop_times,how='left')
    transfers=transfers.merge(feed.trips,how='left')
    transfers=transfers.merge(feed.routes,how='right')
    transfers=transfers.groupby(['stop_id','route_short_name'],as_index=False).first()
    transfers=transfers[['route_short_name','stop_id','geometry','stop_name']]
    transfers=gpd.GeoDataFrame(transfers,geometry='geometry',crs=crs)
    if save_file:
        transfers.to_file(save_file)
    return transfers

In [21]:
def getWater(save_file=None,water_shp=r"N:\Planning - New File Structure\GIS\Data\Environmental\WaterFeatures.shp",buffer_distance=1):
    one_deg_lat=69.05397727272727 # miles
    one_deg_lon=48.99318181818182 # miles
    conversion_deg=(np.mean([one_deg_lat,one_deg_lon]))
    buffer_distance_deg=buffer_distance/conversion_deg
    water=gpd.read_file(water_shp).to_crs(crs).unary_union
    routes=getRoutes()
    routes.geometry=routes.geometry.buffer(buffer_distance_deg)
    water_out=gpd.GeoDataFrame(crs=crs)
    for index, row in routes.iterrows():
        row_gdf=gpd.GeoDataFrame(row,crs=crs).T
        row_gdf['geometry']=row_gdf['geometry'].intersection(water)
        water_out=water_out.append(row_gdf)
    water_out=water_out.loc[(water_out.geometry.geom_type=='Polygon')|(water_out.geometry.geom_type=='MultiPolygon')]
    if save_file:
        water_out=water_out[['route_short_name','geometry']]
        water_out.to_file(save_file)
    return water_out

In [22]:
# Function to query OpenStreetMap for street networks near stops with a stop_id and route_short_name for definition querying.
def getStreets(save_file=None,distance=1600,centerline=None):
    stops_routes2=getStopsRoutes()
    streets_gdf=gpd.GeoDataFrame(crs=crs)
    for index, row in stops_routes2.iterrows():
        clear_output(wait=True)
        try:
            g=ox.graph_from_point(row['Coordinates'],distance=distance,distance_type='network',network_type='walk',truncate_by_edge=True)
            df=ox.graph_to_gdfs((g),nodes=False,edges=True)
            df['route_short_name']=row['route_short_name']
            df['stop_id']=row['stop_id']
            if centerline:
                cl=gpd.read_file(centerline).to_crs(crs)
                df=df.dissolve(by='stop_id')
                df.geometry=df.geometry.convex_hull
                df=gpd.sjoin(cl,df,how='inner')
                df['route_short_name']=row['route_short_name']
                df['stop_id']=row['stop_id']
            streets_gdf=streets_gdf.append(df)
            ox.plot_graph(g,node_size=0)
        except Exception:
            continue
        print(row['stop_name'])
        print('{}% Walked ({} out of {})'.format(str(round((((index+1)/len(stops_routes2))*100),1)),index+1,len(stops_routes2)))
    streets_gdf=gpd.GeoDataFrame(streets_gdf, geometry='geometry', crs=crs)
    streets_gdf=streets_gdf.loc[(streets_gdf.geometry.geom_type=='LineString')|(streets_gdf.geometry.geom_type=='MultiLineString')]
    streets_gdf.reset_index(inplace=True)
    if save_file:
        if centerline:
            streets_gdf=streets_gdf[['stop_id','route_short_name','StName','StSuffix','FuncClass','geometry']]
            interstate=streets_gdf.loc[streets_gdf['FuncClass']=='Interstate']
            arterial=streets_gdf.loc[(streets_gdf['FuncClass']=='Major Road')|(streets_gdf['FuncClass']=='Minor Arterial')|(streets_gdf['FuncClass']=='Principal Arterial')|(streets_gdf['FuncClass']=='State Highway')|(streets_gdf['FuncClass']=='U.S. Highway')]
            collector=streets_gdf.loc[streets_gdf['FuncClass']=='Collector']
            local=streets_gdf.loc[streets_gdf['FuncClass']=='Local']
        else:
            streets_gdf=streets_gdf[['stop_id','route_short_name','geometry','highway','osmid']]
        streets_gdf=gpd.GeoDataFrame(streets_gdf,geometry='geometry',crs=crs)
        streets_gdf.to_file(save_file)
    return streets_gdf

In [None]:
# getRoutes(outfolder.format(gtfs_filename+"_routes"))
# getStopsRoutes(outfolder.format(gtfs_filename+"_stops_routes"))
# getTimepoints(outfolder.format(gtfs_filename+"_timepoints"))
# if feed.transfers:
#     getTranfers(outfolder.format(gtfs_filename+"_transfers"))
# getWater(outfolder.format(gtfs_filename+"_water"))
# getStreets(save_file=outfolder.format(gtfs_filename+"_streets"),centerline=root.centerline)

In [24]:
stops_routes=getStopsRoutes()
num_processes=mp.cpu_count()
chunk_size=int(stops_routes.shape[0]/num_processes)
chunks=[stops_routes.iloc[stops_routes.index[i:i +chunk_size]] for i in range(0, stops_routes.shape[0], chunk_size)]

In [None]:
pool=mp.Pool(processes=num_processes)
result=pool.map(getStreets,chunks)

In [None]:
result