# Prepare ODs (TAZs)

In [None]:
import geopandas as gpd
import pandas as pd
from pathlib import Path
import json
import pickle

import src.bikewaysim_functions as bikewaysim_functions

import sys
sys.path.insert(0,str(Path.cwd().parent))
import file_structure_setup
config = file_structure_setup.filepaths()

In [None]:
taz_fp = Path("D:\RAW\Atlanta_Regional_Comission\Model_Traffic_Analysis_Zones_2020.geojson")
trip_fp = [
    r"D:\RAW\Atlanta_Regional_Comission\rtp_amd6_2030_tripData/tripData.csv",
    r"D:\RAW\Atlanta_Regional_Comission\rtp_amd6_2030_tripData/tripData_1.csv",
    r"D:\RAW\Atlanta_Regional_Comission\rtp_amd6_2030_tripData/tripData_2.csv"
]

outputs = []

# Initial Count

In [None]:
# to get how many tazs
all_tazs = gpd.read_file(taz_fp,ignore_geometry=True).shape[0]

### Import ARC Trip Data from TIP Amendment Six 2030 Model Run
### https://abmfiles.atlantaregional.com/downloads/zPrevious%20Model%20-%20Loaded%20Highway%20Networks%20-%20GIS%20-%20RTP%20Amd%206%20-%202030.zip
tripData0 = pd.read_csv(trip_fp[0],usecols=['orig_taz','dest_taz','distance','hh_id','person_id'])
tripData1 = pd.read_csv(trip_fp[1],usecols=['orig_taz','dest_taz','distance','hh_id','person_id'])
tripData2 = pd.read_csv(trip_fp[2],usecols=['orig_taz','dest_taz','distance','hh_id','person_id'])
tripData = pd.concat([tripData0,tripData1,tripData2])
del tripData0, tripData1, tripData2

outputs.append([
    all_tazs,
    tripData.groupby(['orig_taz','dest_taz']).ngroups,
    tripData.shape[0],
    tripData['hh_id'].nunique(),
    tripData['person_id'].nunique()
])

# Within Study Area

In [None]:
studyarea = gpd.read_file(config['studyarea_fp'])
tazs = gpd.read_file(taz_fp,mask=studyarea)[['MTAZ10','geometry']]
tazs.rename(columns={'MTAZ10':'tazid'},inplace=True)

tripData = tripData[tripData['orig_taz'].isin(tazs['tazid']) & tripData['dest_taz'].isin(tazs['tazid'])]

outputs.append([
    tazs.shape[0],
    tripData.groupby(['orig_taz','dest_taz']).ngroups,
    tripData.shape[0],
    tripData['hh_id'].nunique(),
    tripData['person_id'].nunique()
])

# Remove if matching distance exceeded 500 feet
Euclidean matching distance is less than or equal to 500 feet (~2.5 minutes walking at 2 mph)

In [None]:
## Snap TAZs to network
nodes = gpd.read_file(config['network_fp']/"final_network.gpkg",layer="nodes")
with (config['calibration_fp']/"calibration_network.pkl").open('rb') as fh:
    links,turns = pickle.load(fh)
del turns
nodes0 = links['A'].append(links['B'])
nodes = nodes[nodes['N'].isin(nodes0)]
tazs = bikewaysim_functions.snap_ods_to_network(tazs,nodes)

#filter
tazs = tazs[tazs['dist']<=500]
tripData = tripData[tripData['orig_taz'].isin(tazs['tazid']) & tripData['dest_taz'].isin(tazs['tazid'])]

outputs.append([
    tazs.shape[0],
    tripData.groupby(['orig_taz','dest_taz']).ngroups,
    tripData.shape[0],
    tripData['hh_id'].nunique(),
    tripData['person_id'].nunique()
])

# Remove Trips Above 3 Miles


In [None]:
tripData = tripData[tripData['distance'] <= 3]

#filter tazs
tazs = tazs[tazs['tazid'].isin(tripData['orig_taz']) | tazs['tazid'].isin(tripData['dest_taz'])]

outputs.append([
    tazs.shape[0],
    tripData.groupby(['orig_taz','dest_taz']).ngroups,
    tripData.shape[0],
    tripData['hh_id'].nunique(),
    tripData['person_id'].nunique()
])

# Remove Intrazonal Trips


In [None]:
tripData = tripData[tripData['orig_taz'] != tripData['dest_taz']]

#filter tazs
tazs = tazs[tazs['tazid'].isin(tripData['orig_taz']) | tazs['tazid'].isin(tripData['dest_taz'])]

outputs.append([
    tazs.shape[0],
    tripData.groupby(['orig_taz','dest_taz']).ngroups,
    tripData.shape[0],
    tripData['hh_id'].nunique(),
    tripData['person_id'].nunique()
])

In [None]:
#condense tripData
tripData = tripData.groupby(['orig_taz','dest_taz']).size().reset_index()
tripData.columns = ['orig_taz','dest_taz','trips']

# Final Count

In [None]:
pd.DataFrame(outputs,columns=['Number of TAZs','Number of OD Pairs','Number of Trips','Number of Households','Number of Persons']).iloc[:,-2:]

# add snapped nodes to trip data


In [None]:
snapped_ods = dict(zip(tazs['tazid'],tazs['N']))
tripData['orig_N'] = tripData['orig_taz'].map(snapped_ods)
tripData['dest_N'] = tripData['dest_taz'].map(snapped_ods)

Create trips per origin/destination visual

In [None]:
# trip_origins_per_taz = tripData.groupby('orig_taz')['trips'].sum().reset_index()
# trip_origins_per_taz.columns = ['tazid','trip_origins']
# trip_destinations_per_taz = tripData.groupby('dest_taz')['trips'].sum().reset_index()
# trip_destinations_per_taz.columns = ['tazid','trip_destinations']

# trips_per_taz = tazs.copy()
# trips_per_taz = trips_per_taz.merge(trip_origins_per_taz,on='tazid',how='left')
# trips_per_taz = trips_per_taz.merge(trip_destinations_per_taz,on='tazid',how='left')
# trips_per_taz['both'] = trips_per_taz['trip_origins'] + trips_per_taz['trip_destinations']
# trips_per_taz.fillna(0,inplace=True)
# ax = trips_per_taz.plot('both',cmap='YlGnBu',edgecolor='grey')
# plt.axis('off')
# plt.show()

In [None]:
trip_origins_per_taz = tripData.groupby('orig_taz')['trips'].sum().reset_index()
trip_origins_per_taz.columns = ['tazid','trip_origins']
trip_origins_per_taz = tazs.merge(trip_origins_per_taz,on='tazid')
trip_origins_per_taz.plot('trip_origins')
trip_origins_per_taz.to_file(config['bikewaysim_fp']/'ods.gpkg',layer='trip_origins')

In [None]:
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'Times New Roman'
plt.hist(trip_origins_per_taz['trip_origins'], bins=range(0, 50000, 1000), color='grey')
plt.grid(False)
plt.xlabel(f"Trip Origins Per TAZ (N = {trip_origins_per_taz['trip_origins'].sum():,})")
plt.ylabel(f"Number of TAZs (N = {trip_origins_per_taz.shape[0]:,})")
# Add vertical red line at the mean value
plt.axvline(trip_origins_per_taz['trip_origins'].mean(), color='red', linestyle='dashed', linewidth=1)
# Add label for the mean value
plt.text(trip_origins_per_taz['trip_origins'].mean() * 1.5, plt.gca().get_ylim()[1] * 0.9, f"Mean: {trip_origins_per_taz['trip_origins'].mean():,.0f} origins", color='red', ha='left')
plt.show()

In [None]:
trip_origins_per_taz['trip_origins'].mean()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Example DataFrame
trip_origins_per_taz = pd.DataFrame({'trip_origins': [10000, 20000, 15000, 5000, 25000]})

# Set the font to Times New Roman
plt.rcParams['font.family'] = 'Times New Roman'

# Create histogram
plt.hist(trip_origins_per_taz['trip_origins'], bins=range(0, 50000, 2000), color='grey')

# Remove grid lines
plt.grid(False)

# Calculate the sum of trip origins with comma formatting
total_trip_origins = f"{trip_origins_per_taz['trip_origins'].sum():,}"

# Add axis labels with formatted number
plt.xlabel(f"Trip Origins Per TAZ (N = {total_trip_origins})")
plt.ylabel('Frequency')

# Calculate the mean value
mean_value = trip_origins_per_taz['trip_origins'].mean()

# Add vertical red line at the mean value
plt.axvline(mean_value, color='red', linestyle='dashed', linewidth=1)

# Add label for the mean value
plt.text(mean_value, plt.gca().get_ylim()[1] * 0.9, f'Mean: {mean_value:,.0f}', color='red', ha='center')

# Show plot
plt.show()


In [None]:
# trip_destinations_per_taz = tripData.groupby('dest_taz')['trips'].sum().reset_index()
# trip_destinations_per_taz.columns = ['tazid','trip_destinations']
# trip_destinations_per_taz = tazs.merge(trip_destinations_per_taz,on='tazid')
# trip_destinations_per_taz.plot('trip_destinations')

## Get the TAZs within the study area that were filtered out or had zero trips

In [None]:
studyarea = gpd.read_file(config['studyarea_fp'])
tazs0 = gpd.read_file(taz_fp,mask=studyarea)[['MTAZ10','geometry']]
tazs0.rename(columns={'MTAZ10':'tazid'},inplace=True)

tazs0 = tazs0[tazs0['tazid'].isin(tazs['tazid'])==False]
tazs0.plot()

tazs0.to_file(config['bikewaysim_fp']/'ods.gpkg',layer='filtered_out_or_zero')

In [None]:
# # trip origins per TAZ

# #export the taz areas
# tazs.to_crs(config['projected_crs_epsg']).to_file(config['bikewaysim_fp']/'map_layers.gpkg',layer='taz_polygons')
# #export the taz centroids
# taz_centroids = tazs.copy().to_crs(config['projected_crs_epsg'])
# taz_centroids.geometry = taz_centroids.geometry.centroid
# taz_centroids.to_crs(config['projected_crs_epsg']).to_file(config['bikewaysim_fp']/'map_layers.gpkg',layer='taz_centroids')

In [None]:
#export
tripData.to_csv(config['bikewaysim_fp']/'od_matrix.csv',index=False)

## Exploration Past Here

In [None]:
tripData0 = pd.read_csv(trip_fp[0])

In [None]:
tripData0[['hh_id','person_id']].nunique()

In [None]:
tripData0['trip_mode'].value_counts()

In [None]:
tripData0.groupby('hh_id')['person_id'].nunique().value_counts()

In [None]:
tripData0[tripData0['person_num']>4].sort_values('hh_id').head(20)

In [None]:
tripData0.groupby('person_id')['tour_id'].nunique().value_counts()

In [None]:
tripData0.columns

In [None]:
# tazs.to_file(config['bikewaysim_fp']/'ods.gpkg',layer='tazs')
