## Origin Destination Big Data Analysis

In [1]:
import gcsfs as fs
import geopandas as gpd
import numpy as np
import pandas as pd
from calitp_data_analysis import get_fs, utils
from calitp_data_analysis.sql import to_snakecase
from siuba import *

fs = get_fs()

In [2]:
import altair as alt


In [3]:
import replica_utils

In [4]:
pd.set_option("display.max_columns", None)

In [5]:
shape_data_name = "replica-hta_transit_calpolyhumboldt-10_22_25-origin_layer_Humb.zip"

to_cp_file_name = "replica-hta_transit_calpolyhumboldt-10_22_25-trips_dataset_dest_as_CPH.csv"
from_cp_file_name = "replica-hta_transit_calpolyhumboldt-10_22_25-trips_dataset_origin_as_CPH.csv"

In [6]:
to_cp = replica_utils.read_in_and_prep_replica_data(to_cp_file_name, shape_data_name, file_type="to_cp")

from_cp = replica_utils.read_in_and_prep_replica_data(from_cp_file_name, shape_data_name, file_type="from_cp")

  df = to_snakecase( pd.read_csv(f"{gcs_path}{file_name}"))
  df = to_snakecase( pd.read_csv(f"{gcs_path}{file_name}"))


In [7]:
len(to_cp)

16657

In [8]:
len(from_cp)

16623

In [9]:
df_list = [to_cp, from_cp]

In [10]:
to_cp['trip_type'] = 'Traveling To Cal Poly'
from_cp['trip_type'] = 'Traveling From Cal Poly'

In [20]:
summary = replica_utils.return_score_summary(df_list)

In [26]:
summary

Unnamed: 0,Trip Type,Total Trips,Number of Auto Trips,Pct Auto Trips,Number of Tranist Trips,Pct Transit Trips,Auto Mean Min,Auto Median Min,Auto Max Min,Auto Mean Miles,Auto Median Miles,Auto Max Miles,Transit Mean Min,Transit Median Min,Transit Max Min,Transit Mean Miles,Transit Median Miles,Transit Max Miles
0,Traveling To Cal Poly,16657,9544,0.572972,37,0.002221,12.67938,5.0,413,9.585425,2.0,393.5,30.054054,22.0,75,5.051351,2.6,14.1
1,Traveling From Cal Poly,16623,9179,0.552187,22,0.001323,14.812834,5.0,530,11.49537,1.9,549.1,25.409091,21.0,68,5.75,4.3,15.7


In [22]:
summary.columns = summary.columns.str.title()

In [27]:
summary.columns

Index(['Trip Type', 'Total Trips', 'Number of Auto Trips', 'Pct Auto Trips',
       'Number of Tranist Trips', 'Pct Transit Trips', 'Auto Mean Min',
       'Auto Median Min', 'Auto Max Min', 'Auto Mean Miles',
       'Auto Median Miles', 'Auto Max Miles', 'Transit Mean Min',
       'Transit Median Min', 'Transit Max Min', 'Transit Mean Miles',
       'Transit Median Miles', 'Transit Max Miles'],
      dtype='object')

In [24]:
summary = summary.rename(
    columns={
        "Trip_Type": "Trip Type",
        "Total_Trips": "Total Trips",
        "N_Auto_Trips": "Number of Auto Trips",
        "Pct_Auto_Trips": "Pct Auto Trips",
        "N_Tranist_Trips": "Number of Tranist Trips",
        "Pct_Transit_Trips": "Pct Transit Trips",
        "Auto_Mean_Miles": "Auto Mean Miles",
        "Auto_Median_Miles": "Auto Median Miles",
        "Auto_Max_Miles": "Auto Max Miles",
        "Transit_Mean_Miles": "Transit Mean Miles",
        "Transit_Median_Miles": "Transit Median Miles",
        "Transit_Max_Miles": "Transit Max Miles",
        "Auto_Mean_Minutes": "Auto Mean Min",
        "Auto_Median_Minutes": "Auto Median Min",
        "Auto_Max_Minutes": "Auto Max Min",
        "Transit_Mean_Minutes": "Transit Mean Min",
        "Transit_Median_Minutes": "Transit Median Min",
        "Transit_Max_Minutes": "Transit Max Min",
    }
)

In [35]:
summary_melt =  pd.melt(
        summary,
        id_vars=["Trip Type"],
        value_vars=['Trip Type', 'Total Trips', 'Number of Auto Trips', 'Pct Auto Trips',
                       'Number of Tranist Trips', 'Pct Transit Trips', 'Auto Mean Min',
                       'Auto Median Min', 'Auto Max Min', 'Auto Mean Miles',
                       'Auto Median Miles', 'Auto Max Miles', 'Transit Mean Min',
                       'Transit Median Min', 'Transit Max Min', 'Transit Mean Miles',
                       'Transit Median Miles', 'Transit Max Miles'],
        var_name="Metric",  # New column for original measurement names

    )

In [63]:
summary_long_all_miles = pd.melt(
    summary,
    id_vars=["Trip Type"],
    value_vars=[
        'Auto Mean Miles', 'Auto Median Miles', 'Auto Max Miles', 
        'Transit Mean Miles', 'Transit Median Miles', 'Transit Max Miles'],
        var_name="Metric",  # New column for original measurement names
        value_name="Miles")

In [62]:
summary_long_all_min = pd.melt(
    summary,
    id_vars=["Trip Type"],
    value_vars=[
        'Auto Mean Min', 'Auto Median Min', 'Auto Max Min',
        'Transit Mean Min', 'Transit Median Min', 'Transit Max Min'],
        var_name="Metric",  # New column for original measurement names
        value_name="Mintues")

In [59]:
alt.Chart(summary_long_all_min).mark_bar().encode(
    x="Mintues:Q", y="Metric:O", color="Metric:N", row="Trip Type:O"
).properties(title="Travel Length by Trip[ Type & Mode]")

In [52]:
alt.Chart(summary_long_all_miles).mark_bar().encode(
    x="Miles:Q", y="Metric:O", color="Metric:N", row="Trip Type:O"
).properties(title="Travel Distance by Trip Type & Mode")

In [18]:
from_cp_mode = replica_utils.get_mode_split(from_cp)
to_cp_mode = replica_utils.get_mode_split(to_cp)

modes_breakdown = pd.concat([to_cp_mode, from_cp_mode])

In [19]:
alt.Chart(modes_breakdown).mark_bar(size=150).encode(
    x=alt.X("df_name:O", title = "Trip Type"),
    y=alt.Y("total_trips:Q", title="Total Number of Trips"),
    color="mode:N"
).properties(
    width=600,  
    height=300 )


In [20]:
alt.Chart(modes_breakdown).mark_bar().encode(
    x=alt.X('df_name:O', title ="Trip Type"),
    y= alt.Y('pct_trips:Q', title="Pct of Total Trips"),
    color=alt.Color('df_name:N', legend=alt.Legend(title='Trip Type')),
    column= alt.Column('mode:N', title="Mode")
).properties(width = 100, height = 400)