## Summary of Study Results

With the data downloads from Replica, create a single table with a row for each origin station and columns for “car trips to areas around other destination stations on the other lines” and another column for “train trips”

In [1]:
import pandas as pd
import geopandas as gpd
from siuba import *
import numpy as np

from calitp_data_analysis.sql import to_snakecase
from calitp_data_analysis import utils
from calitp_data_analysis import get_fs
import gcsfs as fs
fs = get_fs()

In [2]:
import replica_utils

In [3]:
pd.set_option('display.max_columns', None)

In [4]:
gcs_path = "calitp-analytics-data/data-analyses/big_data/MetroLink_LinkUS/"

In [12]:
analysis_result_names = [ "line_origin_union_station",
                         "riversideline_origin_montebello", "riversideline_origin_industry", "riversideline_origin_pomona",  
                         "riversideline_origin_ontario", "riversideline_origin_pedley",  "riversideline_origin_riverside",
                         
                         "sbline_origin_calstate_la", "sbline_origin_el_monte",  "sbline_origin_baldwin_park",
                         "sbline_origin_covina",  "sbline_origin_pomona_north", "sbline_origin_fontana",
                         "sbline_origin_claremont", "sbline_origin_montclair", "sbline_origin_upland", "sbline_origin_rancho_cucamonga", 
                         "sbline_origin_rialto", "sbline_origin_san_bernardino_depot", "sbline_origin_san_bernardino_dt" 
                        ]

In [13]:
# analysis_result_names_subset_test = ["sbline_origin_covina", "sbline_origin_calstate_la", 
#                                 "riversideline_origin_industry",  "riversideline_origin_montebello"]

In [None]:
results = []
for analysis in analysis_result_names:
    with get_fs().open(f"{gcs_path}replica_data_downloads/replica-socal_travel_analysis_{analysis}-trips_dataset.csv") as f:
        df = to_snakecase(pd.read_csv(f, dtype={25: str, 26:str}))
        
        station = analysis.split('_origin_')[1]  # Get the part after '_origin_'
        station_name = station.replace('_', ' ').upper()
        
        line = analysis.split('_origin_')[0]
        
        metrolink_line = replica_utils.define_transit_line(line) 
        n_total_trips = len(df)
        n_private_auto_trips = len(df[df.primary_mode=="private_auto"])
        pct_private_auto_trips = ((len(df[df.primary_mode=="private_auto"])) / (len(df)))
        n_public_transit_trips = (len(df[df.primary_mode=="public_transit"]))
        pct_public_transit_trips = ((len(df[df.primary_mode=="public_transit"])) / (len(df)))

        auto_mean_min, auto_median_min, auto_mean_miles, auto_median_miles = replica_utils.calc_auto_travel_info(df)
        transit_mean_min, transit_median_min, transit_mean_miles, transit_median_miles = replica_utils.calc_transit_travel_info(df)

        ## set up the table for all the results
        results.append({
                'station_name': station_name,
                'metrolink_line': metrolink_line,
                'total_trips': n_total_trips,
                'n_auto_trips_to_other_station_areas': n_private_auto_trips,
                'pct_auto_trips_to_other_station_areas': pct_private_auto_trips,
                'n_tranist_trips_to_other_station_areas': n_public_transit_trips,
                'pct_transit_trips_to_other_station_areas': pct_public_transit_trips,
                'auto_mean_minutes': auto_mean_min,
                'auto_median_minutes': auto_median_min,
                'auto_mean_miles': auto_mean_miles,
                'auto_median_miles': auto_median_miles,
                'transit_mean_minutes': transit_mean_min,
                'transit_median_minutes': transit_median_min,
                'transit_mean_miles': transit_mean_miles,
                'transit_median_miles': transit_median_miles,
                })

        
result_summary = pd.DataFrame(results)        

        

In [None]:
result_summary

In [9]:
## export
# result_summary.to_csv("MetroLink_Stations_Replica_Summaries.csv")

In [10]:
# df_auto.columns

In [11]:
# df.primary_mode.value_counts()