In [1]:
import pandas as pd
import os, sys

In [2]:
## Insert the path or directory for the GTFS folder
#gtfs_folder = "examples/IowaCity/iowacityGTFS20220822"
gtfs_folder = "examples/TwinCities"
if os.path.exists(gtfs_folder):
    model_folder = gtfs_folder+"_model"
    try:
        os.mkdir(model_folder)
        print ('Directory', model_folder,'is created by the program.')
    except:
        print ('Directory', model_folder,'already exists. The program will continue.')
else:
    print ('Direcotry', gtfs_folder, 'DOES NOT exist. The program will terminate.')
    quit(keep_kernel=True)


Directory examples/TwinCities_model is created by the program.


# Trips

In [3]:
# select the service IDs corresponding to a weekday
# change this later to specify the date and automatically select the service IDs
selected_service_ids = ['c_24568_b_55347_d_31', 'c_24568_b_55347_d_23', 'c_24565_b_55346_d_31', 'c_21961_b_30936_d_31', 'c_21742_b_30476_d_31']

trip_file_columns = ['trip_id', 'route_id', 'type', 'start_time', 'capacity', 'shape_id', 'direction_id']
# Read the trips file
# Select trips with the selected service IDs
# Replace spaces with underscores
# Save the selected trips as a tab-delimited .dat file
try:
    gtfs_trips = pd.read_csv(gtfs_folder+'/trips.txt')
    selected_trips = gtfs_trips[gtfs_trips['service_id'].isin(selected_service_ids)]
    #selected_trips.fillna("NA", inplace=True)
    selected_trips = selected_trips.replace(' ', '', regex=True)
    for cl in trip_file_columns:
        if cl not in selected_trips.head():
            print ('The column', cl,'is not available in the GTFS file trips.txt. A default value will be added.')
            selected_trips[cl] = -1
    selected_trips[trip_file_columns].to_csv(model_folder+'/ft_input_trips.dat', sep='\t', index=False)
    print (len(selected_trips), "trips are selected.")
except:
    print ('GTFS file trips.txt does not exist or is corrupted. The program will terminate.')
    quit(keep_kernel=True)    


The column type is not available in the GTFS file trips.txt. A default value will be added.
The column start_time is not available in the GTFS file trips.txt. A default value will be added.
The column capacity is not available in the GTFS file trips.txt. A default value will be added.
356 trips are selected.


# Routes

In [4]:
route_file_columns = ['route_id', 'route_short_name', 'route_long_name', 'route_type']
# Read the routes file
# Select routes if in the selected route set
# Replace spaces with underscores
# Save the selected routes as a tab-delimited .dat file
route_set = set(selected_trips['route_id'].tolist())
try:
    gtfs_routes = pd.read_csv(gtfs_folder+'/routes.txt')
    selected_routes = gtfs_routes[gtfs_routes['route_id'].isin(route_set)]
    #selected_routes.fillna("NA", inplace=True)
    selected_routes = selected_routes.replace(' ', '', regex=True)
    for cl in route_file_columns:
        if cl not in selected_routes.head():
            print ('The column', cl,'is not available in the GTFS file routes.txt. A default value will be added.')
            selected_routes[cl] = -1
    selected_routes[route_file_columns].to_csv(model_folder+'/ft_input_routes.dat', sep='\t', index=False)
    print (len(selected_routes), "routes are selected.")
except:
    print ('GTFS file routes.txt does not exist or is corrupted. The program will terminate.')
    quit(keep_kernel=True)  

14 routes are selected.


# stop-times

In [5]:
stop_time_file_columns = ['trip_id', 'arrival_time', 'departure_time', 'stop_id', 'stop_sequence']
# Read the stop_times file
# Select stop_times with the selected trip IDs
# Replace spaces with underscores
# Save the selected stop-times as a tab-delimited .dat file
trip_set = set(selected_trips['trip_id'].tolist())
try:
    gtfs_stop_times = pd.read_csv(gtfs_folder+'/stop_times.txt')
    selected_stop_times = gtfs_stop_times[gtfs_stop_times['trip_id'].isin(trip_set)]
    #selected_stop_times.fillna("NA", inplace=True)
    selected_stop_times = selected_stop_times.replace(' ', '', regex=True)
    selected_stop_times = selected_stop_times.replace(':', '', regex=True)
    for cl in stop_time_file_columns:
        if cl not in selected_stop_times.head():
            print ('The column', cl,'is not available in the GTFS file stop_times.txt. A default value will be added.')
            selected_stop_times[cl] = -1
    selected_stop_times[stop_time_file_columns].to_csv(model_folder+'/ft_input_stopTimes.dat', sep='\t', index=False)
    print (len(selected_stop_times), "stop-times are selected.")
except:
    print ('GTFS file stop_times.txt does not exist or is corrupted. The program will terminate.')
    quit(keep_kernel=True)     

11136 stop-times are selected.


# Stops

In [6]:
stop_file_columns = ['stop_id',  'stop_name', 'stop_desc', 'stop_lat', 'stop_lon', 'capacity']
# Read the stops file
# Select stops with the selected trip IDs
# Replace spaces with underscores
# Save the selected stops as a tab-delimited .dat file
stop_set = set(selected_stop_times['stop_id'].tolist())
try:
    gtfs_stops = pd.read_csv(gtfs_folder+'/stops.txt')
    selected_stops = gtfs_stops[gtfs_stops['stop_id'].isin(stop_set)]
    #selected_stops.fillna("NA", inplace=True)
    selected_stops = selected_stops.replace(' ', '_', regex=True)
    for cl in stop_file_columns:
        if cl not in selected_stops.head():
            print ('The column', cl,'is not available in the GTFS file stops.txt. A default value will be added.')
            selected_stops[cl] = -1
    selected_stops[stop_file_columns].to_csv(model_folder+'/ft_input_stops.dat', sep='\t', index=False)
    print (len(selected_stops), "stops are selected.")
except:
    print ('GTFS file stops.txt does not exist or is corrupted. The program will terminate.')
    quit(keep_kernel=True)     

The column capacity is not available in the GTFS file stops.txt. A default value will be added.
358 stops are selected.


# Transfers

In [3]:
model_folder = "examples/TwinCities"
stop = []
stopLat = []
stopLon = []
try:
    inFile = open(model_folder+'/ft_input_stops.dat', 'r')
    strIn = inFile.readline()
    i=-1
    while(1):
        strIn = inFile.readline()
        if(strIn) == "":
            break
        else:
            i = i + 1
            strSplt = strIn.split("\t")
            stop.append(strSplt[0])
            stopLat.append(float(strSplt[3]))
            stopLon.append(float(strSplt[4]))
    inFile.close()
    print (len(stop), "stops")
except:
    print ('Input file ft_input_stops.dat does not exist or is corrupted. The program will terminate.')
    quit(keep_kernel=True)     

13701 stops


generating transfers

In [5]:
import math
## insert transfer distance threshold in miles
transfer_threshold = 0.25 #miles

outFile = open(model_folder+'/ft_input_transfers.dat', "w")
outFile.write("from_stop\tto_stop\tdistance\ttime\n")
degreesToRradians = math.pi/180.0
k=0
for i in range(len(stop)):
    if i%1000 == 0: print (i, k)
    tmpLat1 = stopLat[i] * degreesToRradians
    tmpLon1 = stopLon[i] * degreesToRradians
    for j in range(i+1, len(stop)):
        if i == j:
            continue
        tmpLat2 = stopLat[j] * degreesToRradians
        tmpLon2 = stopLon[j] * degreesToRradians
        tmpDist = (math.sin(math.pi/2.0 - tmpLat1) * math.sin(math.pi/2.0 - tmpLat2) * math.cos(tmpLon1 - tmpLon2) + math.cos(math.pi/2.0 - tmpLat1) * math.cos(math.pi/2.0 - tmpLat2))
        tmpDist = max(-1.0,min(tmpDist,1.0))
        try:
            tmpDist = 3960 * math.acos(tmpDist)     #will crash with domain = 1.0
        except:
            #print (tmpDist)
            tmpDist = 0.0
        tmpDist = max(tmpDist,0.001)
        if tmpDist <= transfer_threshold:
            k = k + 1
            tmpStop1 = stop[i]
            tmpStop2 = stop[j]
            tmpTime = tmpDist / 3.0 * 60
            strOut = str(tmpStop1) + "\t" + str(tmpStop2) + "\t" + str(round(tmpDist,3)) + "\t" + str(round(tmpTime,2)) + "\t" + "1" + "\n"
            outFile.write(strOut)
            strOut = str(tmpStop2) + "\t" + str(tmpStop1) + "\t" + str(round(tmpDist,3)) + "\t" + str(round(tmpTime,2)) + "\t" + "1" + "\n"
            outFile.write(strOut)
outFile.close()
print (k, "transfers")


0 0
1000 6889
2000 13566
3000 19838
4000 25497
5000 33995
6000 39133
7000 44133
8000 48797
9000 52947
10000 56835
11000 59689
12000 62838
13000 65976
68165 transfers


# Access Links

In [6]:
node = []
nodeLat = []
nodeLon = []
try:
    inFile = open(model_folder+'/ft_input_zones.dat', 'r')
    strIn = inFile.readline()
    i=-1
    while(1):
        strIn = inFile.readline()
        if(strIn) == "":
            break
        else:
            i = i + 1
            strSplt = strIn.split("\t")
            node.append(strSplt[0])
            nodeLat.append(float(strSplt[1]))
            nodeLon.append(float(strSplt[2]))
    inFile.close()
    print (len(node), "zones!")
except:
    print ('Input file ft_input_zones.dat does not exist or is corrupted. The program will terminate.')
    quit(keep_kernel=True)     

1599 zones!


generating access links

In [7]:
import math
## insert access distance threshold in miles
access_threshold = 0.5 #miles

outFile = open(model_folder+'/ft_input_accessLinks.dat', "w")
outFile.write("TAZ\tstop\tdist\ttime\n")
degreesToRradians = math.pi/180.0
k=0
for i in range(len(node)):  ####walking access links
    if i%1000 == 0: print (i, k)
    tmpLat1 = nodeLat[i] * degreesToRradians
    tmpLon1 = nodeLon[i] * degreesToRradians
    for j in range(len(stop)):
        tmpLat2 = stopLat[j] * degreesToRradians
        tmpLon2 = stopLon[j] * degreesToRradians
        tmpDist = (math.sin(math.pi/2.0 - tmpLat1) * math.sin(math.pi/2.0 - tmpLat2) * math.cos(tmpLon1 - tmpLon2) + math.cos(math.pi/2.0 - tmpLat1) * math.cos(math.pi/2.0 - tmpLat2))
        tmpDist = max(-1.0,min(tmpDist,1.0))
        try:
            tmpDist = 3960 * math.acos(tmpDist)     #will crash with domain = 1.0
        except:
            #print (tmpDist)
            tmpDist = 0.0
        tmpDist = max(tmpDist,0.001)
        if tmpDist <= access_threshold:
            k = k + 1
            tmpNode = node[i]
            tmpStop = stop[j]
            tmpTime = tmpDist / 3.0 * 60
            strOut = str(tmpNode) + "\t" + str(tmpStop) + "\t" + str(round(tmpDist,3)) + "\t" + str(round(tmpTime,2)) + "\t" + "1" + "\n"
            outFile.write(strOut)
print (k, "walking access links!")


0 0
1000 45698
72867 walking access links!
