# Corridor Comparison: I-680

In [1]:
import pandas as pd
from siuba import *
from calitp_data_analysis.sql import to_snakecase

In [2]:
from calitp_data_analysis import get_fs
import gcsfs as fs
fs = get_fs()
import geopandas as gpd

### Importing the data
For this analysis, we need to read in data from:
* Caltrans Traffic Census Program: Using 2022 AADT Data
* Streetlight: Using Segment Analysis, Network Performance, and AADT for 2022
* Replica: Using AADT Count Application

In [3]:
ct_aadt = to_snakecase(pd.read_excel(
    "https://dot.ca.gov/-/media/dot-media/programs/traffic-operations/documents/census/2022/2022-traffic-volumes-ca.xlsx",
    sheet_name = "2022 AADT DATA"
    ))
ct_aadt = ct_aadt>>filter(_.route==680)

In [4]:
ct_aadt.sample()

Unnamed: 0,district,route,route_sfx,county,pm_pfx,pm,pm_sfx,location_description,back_peak_hour,back_peak_madt,back_aadt,ahead_peak_hour,ahead_peak_madt,ahead_aadt
6915,4,680,,ALA,M,0.0,,SANTA CLARA/ALAMEDA COUNTY LINE,,,,8900.0,124000.0,108000.0


In [5]:
len(ct_aadt)

65

In [6]:
## Streetlight AADT
sl_aadt = to_snakecase(pd.read_csv("gs://calitp-analytics-data/data-analyses/big_data/i680/1751074_I680_AADT_Test_estimated_aadt.csv"))

In [7]:
## Streetlight Network Performance Test
sl_npt = to_snakecase(pd.read_csv("gs://calitp-analytics-data/data-analyses/big_data/i680/1751071_I680_network_performance_test_network_performance.csv"))

In [8]:
## Streetlight Segment Analysis
sl_sa = to_snakecase(pd.read_csv("gs://calitp-analytics-data/data-analyses/big_data/i680/1751075_i680_Segment_Analysis_test_sa_all.csv"))

In [9]:
## Joining the streetlight segments so that all STreetlight Analyses are together

In [10]:
sl_aadt = (sl_aadt>>select(_.data_periods, _.zone_name,
                           _.estimated_2022_aadt,
                           # _.lower_95_percent_prediction_interval,
                           # _.upper_95_percent_prediction_interval
                          ))

In [11]:
sl_aadt.columns = ['aadt_' + str(col) for col in sl_aadt.columns]


In [12]:
sl_aadt = sl_aadt.rename(columns={"aadt_zone_name": "zone_name"}) 

In [13]:
sl_npt = (sl_npt>>filter(_.day_part=="0: All Day (12am-12am)")>>select(_.data_periods, _.zone_name,
                                            _.average_daily_segment_traffic__stl_volume_))

In [14]:
sl_npt.columns = ['npt_' + str(col) for col in sl_npt.columns]

In [15]:
sl_npt = sl_npt.rename(columns={"npt_zone_name": "zone_name"}) 

In [16]:
sl_sa = ((sl_sa>>filter(_.day_part=="0: All Day (12am-12am)")>>select(_.data_periods, 
                                                                    _.zone_name,
                                                                     _.average_daily_segment_traffic__stl_volume_)))

In [17]:
sl_sa.columns = ['sa_' + str(col) for col in sl_sa.columns]

In [18]:
sl_sa = sl_sa.rename(columns={"sa_zone_name": "zone_name"}) 

In [19]:
sl = pd.merge(sl_aadt, sl_npt, on="zone_name")


In [20]:
sl = pd.merge(sl, sl_sa, on = "zone_name")

In [21]:
sl.head(1)

Unnamed: 0,aadt_data_periods,zone_name,aadt_estimated_2022_aadt,npt_data_periods,npt_average_daily_segment_traffic__stl_volume_,sa_data_periods,sa_average_daily_segment_traffic__stl_volume_
0,"Jan 01, 2022 - Dec 31, 2022",Benicia-Martinez Bridge / 123867334 / 1,39493,"Oct 01, 2021 - Sep 30, 2022",35593,"Oct 01, 2021 - Apr 30, 2022",38336


In [22]:
## reading in shapefiles

In [23]:
with get_fs().open("gs://calitp-analytics-data/data-analyses/big_data/i680/1751074_I680_AADT_Test_analysis_line.zip") as f: 
        shp = gpd.read_file(f)

In [24]:
shp_extent = shp.to_crs(epsg=3857)

In [25]:
##checking to see if these will merge nicely
shp_extent>>filter(_.name.str.contains("394112478"))

Unnamed: 0,id,name,direction,is_pass,is_bidi,road_type,geom_len,gate_lat,gate_lon,gate_width,geometry
210,,I 680 / 394112478 / 1,163.931,1,0,,0.04,,,,"LINESTRING (-13571929.272 4535791.034, -135719..."


In [26]:
df  = shp_extent.merge(sl, left_on='name', right_on = "zone_name", how='outer', indicator=True)
# df  = tab_df.merge(spatial_df, on='mukey', how='right')
gdf = gpd.GeoDataFrame(df)

In [27]:
gdf.sample()

Unnamed: 0,id,name,direction,is_pass,is_bidi,road_type,geom_len,gate_lat,gate_lon,gate_width,geometry,aadt_data_periods,zone_name,aadt_estimated_2022_aadt,npt_data_periods,npt_average_daily_segment_traffic__stl_volume_,sa_data_periods,sa_average_daily_segment_traffic__stl_volume_,_merge
378,,Senator Daniel E Boatwright Highway / 31300303...,317.601,1,0,,0.563,,,,"LINESTRING (-13588520.986 4578255.854, -135885...","Jan 01, 2022 - Dec 31, 2022",Senator Daniel E Boatwright Highway / 31300303...,45338,"Oct 01, 2021 - Sep 30, 2022",46522,"Oct 01, 2021 - Apr 30, 2022",44288,both


In [28]:
gdf.explore(
    column=gdf["aadt_estimated_2022_aadt"],
    legend=True,
    tiles="CartoDB positron",
    cmap="YlOrBr",
    vmax=140000,
    vmin=0,
)

In [29]:
gdf.explore(
    column=gdf["npt_average_daily_segment_traffic__stl_volume_"],
    legend=True,
    tiles="CartoDB positron",
    cmap="YlOrBr",
    vmax=140000,
    vmin=0,
)

In [30]:
gdf.explore(
    column=gdf["sa_average_daily_segment_traffic__stl_volume_"],
    legend=True,
    tiles="CartoDB positron",
    cmap="YlOrBr",
    vmax=140000,
    vmin=0,
)

In [31]:
## Want to read in CT Geodata instead of the regular AADT that we read in earlier. 


In [32]:
with get_fs().open("gs://calitp-analytics-data/data-analyses/big_data/CT_AADT/Traffic_Volumes_AADT.zip") as f: 
        shp_ct = gpd.read_file(f)
        
shp_ct = shp_ct.to_crs(epsg=3857)


In [33]:
shp_ct = shp_ct>>filter(_.ROUTE=='680')

In [34]:
shp_ct.head(6)

Unnamed: 0,OBJECTID,DISTRICT,ROUTE,ROUTE_SFX,COUNTY,PM_PFX,PM,PM_SFX,LOCATION_D,BACK_PEAK_,BACK_PEA_1,BACK_AADT,AHEAD_PEAK,AHEAD_PE_1,AHEAD_AADT,geometry
13126,13127,4,680,,SCL,M,0.0,,"SAN JOSE, JCT. RTES. 101/280",,,,11100,155000,138000,POINT (-13564494.747 4486541.707)
13127,13128,4,680,,SCL,M,0.0,,"SAN JOSE, JCT. RTES. 101/280",,,,11100,155000,138000,POINT (-13564515.446 4486568.226)
13128,13129,4,680,,SCL,M,0.385,,"SAN JOSE, KING ROAD",11100.0,155000.0,138000.0,17300,242000,215000,POINT (-13563895.595 4487039.306)
13129,13130,4,680,,SCL,M,0.385,,"SAN JOSE, KING ROAD",11100.0,155000.0,138000.0,17300,242000,215000,POINT (-13563916.953 4487063.313)
13130,13131,4,680,,SCL,M,1.189,,"SAN JOSE, JACKSON AVENUE",17300.0,242000.0,215000.0,15800,222000,197000,POINT (-13562927.950 4488304.116)
13131,13132,4,680,,SCL,M,1.189,,"SAN JOSE, JACKSON AVENUE",17300.0,242000.0,215000.0,15800,222000,197000,POINT (-13562949.232 4488334.354)


In [35]:
## looks like duplicate data per each location. one for each side of the road, yet the same numbers? 
## will look at one row per location

In [36]:
shp_ct = shp_ct.drop_duplicates(subset=['PM', 'COUNTY', 'LOCATION_D', 'BACK_PEAK_','BACK_AADT','AHEAD_PEAK','AHEAD_AADT'])

In [37]:
shp_ct= shp_ct.groupby(['LOCATION_D'], as_index=False).first()

In [38]:
shp_ct>>filter(_.LOCATION_D=="SANTA CLARA/ALAMEDA COUNTY LINE")

Unnamed: 0,LOCATION_D,OBJECTID,DISTRICT,ROUTE,ROUTE_SFX,COUNTY,PM_PFX,PM,PM_SFX,BACK_PEAK_,BACK_PEA_1,BACK_AADT,AHEAD_PEAK,AHEAD_PE_1,AHEAD_AADT,geometry
52,SANTA CLARA/ALAMEDA COUNTY LINE,13151,4,680,,SCL,M,9.935,,10200,143000,124000,8900,124000,108000,POINT (-13570286.578 4504062.281)
