In [7]:
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point
import random
import numpy as np
import pandana as pdna

In [8]:
# Import input data

# Import the Travel Analysis Zones (TAZ1454) Feature Layer by MTC/ABAG (https://opendata.mtc.ca.gov/datasets/b85ba4d43f9843128d3542260d9a2f1f_0/about)
taz_gdf = gpd.read_file('Travel_Analysis_Zones_(TAZ1454)/Travel Analysis Zones.shp')

# Import the road network in the East Bay
nodes_df = pd.read_csv('nodes.csv')
edges_df = pd.read_csv('edges.csv')

# Assign ODs to highway nodes in the corresponding TAZ based on the MTC travel data

In [9]:
# Set the CRS of the TAZ file
taz_gdf = taz_gdf.to_crs("EPSG:4326")

In [10]:
# Convert the CSV dataframe to the GeoDataFrame
geometry = [Point(xy) for xy in zip(nodes_df['lon'], nodes_df['lat'])]
nodes_gdf = gpd.GeoDataFrame(nodes_df, geometry=geometry, crs = 4326)

In [11]:
# Find the highway nodes in each TAZ
contained = {}

for index, area in taz_gdf.iterrows():
    mask = nodes_gdf.within(area['geometry'])
    contained[area['TAZ1454']] = nodes_gdf[mask]['node_id'].tolist()

In [12]:
# Load the MTC/ABAG travel model data (http://data.mtc.ca.gov/data-repository/)
# The one used in this example is from the model run 2015_06_002 (https://mtcdrive.app.box.com/v/pba2040-2015-06-002)
trip = pd.read_csv('indivTripData_3.csv')

In [13]:
# Filter out the trips happening between 6 AM to 9 AM (morning rush hours)
trips = trip[trip['depart_hour'].isin([6, 7, 8])]

# Filter the rows of the dataframe based on whether the origins or destinations in contained are not empty
non_empty_keys = [k for k, v in contained.items() if v]
trips = trips[trips['orig_taz'].isin(non_empty_keys)]
trips = trips[trips['dest_taz'].isin(non_empty_keys)]

In [15]:
# Clean up the dataframe
trips = trips.reset_index()
trips['agent_id'] = trips.index
trips['origin_nid'] = 0
trips['destin_nid'] = 0
trips = trips[['agent_id', 'orig_taz', 'dest_taz', 'origin_nid', 'destin_nid', 'depart_hour']]

In [17]:
# Randomly assign ODs to highway nodes in the corresponding taz
for i in range(len(trips)):
    trips['origin_nid'][i] = random.choice(contained[trips['orig_taz'][i]])
    trips['destin_nid'][i] = random.choice(contained[trips['dest_taz'][i]])

In [18]:
# Filter out the trips with same the origin and destination
trips = trips[trips['origin_nid'] != trips['destin_nid']]

# Convert the format to od data
mtc_traffic = trips[['agent_id', 'origin_nid', 'destin_nid', 'depart_hour']]
mtc_traffic = mtc_traffic.rename(columns={'depart_hour': 'hour'})

# Assign random depart quarter
mtc_traffic['quarter'] = np.random.randint(0, 6, size=len(mtc_traffic))

In [19]:
mtc_traffic = mtc_traffic.dropna()

# Filter out the trips that cannot have routes

In [20]:
# Set node_id as the index
nodes_df.index = nodes_df['node_id']

# Convert the data types to the required data types
# nodes_df['lon'] = nodes_df['lon'].astype('float64')
# nodes_df['lat'] = nodes_df['lat'].astype('float64')
edges_df['start_nid'] = edges_df['start_nid'].astype('int64')
edges_df['end_nid'] = edges_df['end_nid'].astype('int64')
edges_df['length'] = edges_df['length'].astype('float64')

In [21]:
# Create a pandana network
net = pdna.Network(nodes_df['lon'], nodes_df['lat'], edges_df['start_nid'], edges_df['end_nid'], edges_df[['length']], twoway=False)

Generating contraction hierarchies with 10 threads.
Setting CH node vector of size 93529
Setting CH edge vector of size 222406
Range graph removed 198802 edges of 444812
. 10% . 20% . 30% . 40% . 50% . 60% . 70% . 80% . 90% . 100%


In [22]:
# Check if there is a trip between od for each agent
mtc_traffic['has_path'] = 0
for i in range(len(mtc_traffic)):
    if len(net.shortest_path(mtc_traffic['origin_nid'].iloc[i], mtc_traffic['destin_nid'].iloc[i])) == 0:
        mtc_traffic['has_path'].iloc[i] = 0
    else:
        mtc_traffic['has_path'].iloc[i] = 1

In [23]:
# Filter out the trips that do not have routes between the origin and destination
mtc_traffic = mtc_traffic[mtc_traffic['has_path'] == 1]

# Clean up the dataframe
mtc_traffic = mtc_traffic[['agent_id', 'origin_nid', 'destin_nid', 'hour', 'quarter']]

In [24]:
# Save the dataframe to csv file
mtc_traffic.to_csv('od.csv')