## Long Range Transportation Plan

In [1]:
import _categorizing_utils as categorizing_utils
import _harmonization_utils as harmonization_utils
import _lrtp_utils as lrtp_utils
import _specific_list_utils as list_utils
import geopandas as gpd
import numpy as np
import pandas as pd
from calitp_data_analysis.sql import to_snakecase

In [2]:
import fsspec

In [3]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

### Ambag

In [5]:
ambag = lrtp_utils.ambag_lrtp()

In [6]:
lrtp_utils.give_info(ambag, "project", "project_description")

Citywide Bike Lanes                         2
The Alameda - Salinas Road Bike Route       2
Citywide Sidewalk Program                   2
Carmel to Pebble Beach Bike/Ped Facility    1
Second Street Bike Lane                     1
Name: project, dtype: int64
# of unique project titles: 277
After dropping duplicates using project and project_description: 280
Df shape: (280, 6)
Index(['ambag_id', 'project', 'project_description', 'total_cost_\n_$_000s_',
       'county', 'total_cost_millions'],
      dtype='object', name=0)


In [7]:
ambag.loc[ambag.project == "Citywide Bike Lanes"]

Unnamed: 0,ambag_id,project,project_description,total_cost_\n_$_000s_,county,total_cost_millions
133,MON-SOL075-SO,Citywide Bike Lanes,"Bike Lanes (2007 TIF M2, 2013 TIF M2); construct bike lanes citywide",1440,Santa Cruz,1440000
52,SC-SV-P41-SCV,Citywide Bike Lanes,Construction of additional bike lanes and paths citywide (including Green Hills).,2060,San Benito,2060000


### BCAG 

In [8]:
bcag = lrtp_utils.bcag_lrtp()

In [9]:
bcag[['fund_estimate','total_project_cost','rtp_id']].sample(3)

Unnamed: 0,fund_estimate,total_project_cost,rtp_id
235,$ 0.077 million,4812.5,236
67,$1.5 million,750000.0,69
216,$0.006 million,5760.0,217


In [10]:
lrtp_utils.give_info(bcag, "project_descr", "total_project_cost")

New Traffic Signal                                                                                                                                   6
Turn lane capacity expansion, storage length expansion, channelization improvements, pedestrian safety due to increased traffic volumes.             4
East Gridley Rd. At Feather River, 1.0 mile east of Larkin Rd. Scope is to address cracks with a Methacrylate Deck treatment. Bridge No. 12C0022.    2
Roundabout (within existing ROW)                                                                                                                     2
operational flow improvments (traffic signals or roundabouts)                                                                                        2
Name: project_descr, dtype: int64
# of unique project titles: 239
After dropping duplicates using project_descr and total_project_cost: 247
Df shape: (250, 23)
Index(['rtp_id', 'agency', 'ftip', 'rtp', 'project_type', 'title',
       'project_de

In [11]:
# bcag.loc[bcag.project_descr == "Turn lane capacity expansion, storage length expansion, channelization improvements, pedestrian safety due to increased traffic volumes."]

### Fresno COG

In [12]:
fresno_cog = lrtp_utils.fresnocog_lrtp()

In [13]:
lrtp_utils.give_info(fresno_cog, "project_title", "estimated\ntotal_cost\n_$1,000_")

None                                               3
SR 41 at Elkhorn Ave Roundabout                    2
SR 41 at Mount Whitney Ave Roundabout              2
Ventura, SR 41 to SR 99  Widen from 4LU to 4 LD    2
Herndon-Polk to Weber : 4Ld to 6LD                 2
Name: project_title, dtype: int64
# of unique project titles: 3129
After dropping duplicates using project_title and estimated
total_cost
_$1,000_: 3143
Df shape: (3147, 10)
Index(['agency', 'project_id', 'project_title', 'project_description',
       'project_type', 'estimated_open_to_traffic',
       'estimated\ntotal_cost\n_$1,000_', 'est__o2t_year',
       'est_total_project_cost', 'financial_constraint'],
      dtype='object')


In [14]:
# fresno_cog.loc[fresno_cog.project_type.str.contains("Ac")]

In [15]:
lrtp_utils.group_duplicates(
    fresno_cog,
    "project_title",
    "estimated\ntotal_cost\n_$1,000_",
    "project_description",
)

Unnamed: 0_level_0,Unnamed: 1_level_0,project_description
project_title,"estimated total_cost _$1,000_",Unnamed: 2_level_1
"Ventura, SR 41 to SR 99 Widen from 4LU to 4 LD",3427.0,2
Herndon-Polk to Weber : 4Ld to 6LD,2931.0,2
10th Ave / Kamm Ave Traffic Signal,400.0,1
Millerton Road From Auberry Rd to SR 168 Shoulder Widening,5910.0,1
Midtown Trail: Millbrook - Michigan to McKinley,680.0,1
Milburn & Spruce: New Traffic Signal,620.0,1
Mill and Orange Park & Ride Lot,505.0,1
Millbrook & Nees: Left Turn Phasing,380.0,1
Millbrook & Olympic: New Traffic Signal,500.0,1
Millbrook - Alluvial to Shepherd: AC Overlay,656.0,1


In [16]:
fresno_cog[fresno_cog.project_title.str.contains("Ventura, SR 41")]

Unnamed: 0,agency,project_id,project_title,project_description,project_type,estimated_open_to_traffic,"estimated\ntotal_cost\n_$1,000_",est__o2t_year,est_total_project_cost,financial_constraint
989,Fresno,FRE111312,"Ventura, SR 41 to SR 99 Widen from 4LU to 4 LD",Widen to 4 LN Divided Arterial,Streets & Roads - Capacity Increasing,2027-2031,3427.0,2028-01-01,3427000.0,constrained
2071,Fresno,FRE111312,"Ventura, SR 41 to SR 99 Widen from 4LU to 4 LD",Widen to 4 LN Divided Arterial(Measure C Project F in the Urban Regional Program),Streets & Roads-Capacity Increasing,2028,3427.0,NaT,,constrained


In [17]:
3427*1_000

3427000

### KCAG

In [18]:
kcag = lrtp_utils.kcag_lrtp()

In [19]:
kcag.sample()

Unnamed: 0,category,jurisdiction,state_route,post_mile,location,project_limits,description,title,total_cost
18,Proposed Improvements,Lemoore,,,Spring Lane,100 ft. east of Beverly Dr.,Overlay,,0


In [20]:
# kcag.title.nunique()

In [21]:
lrtp_utils.give_info(kcag, "description", "category")

Overlay                                                                                        33
Widen from 2 to 4 lanes with left turns                                                         4
Reconstruct and improve curb/ramps                                                              3
Intersection Improvements                                                                       2
Overlay of various thicknesses.  Some shoulder work and dig-out and patch prior to overlay.     2
Name: description, dtype: int64
# of unique project titles: 18
After dropping duplicates using description and category: 20
Df shape: (84, 9)
Index(['category', 'jurisdiction', 'state_route', 'post_mile', 'location',
       'project_limits', 'description', 'title', 'total_cost'],
      dtype='object')


In [22]:
lrtp_utils.group_duplicates(kcag, "location", "description", "category")

Unnamed: 0_level_0,Unnamed: 1_level_0,category
location,description,Unnamed: 2_level_1
10th Avenue,Widen from 2 to 4 lanes with left turns,1
Purchase CNG Refuse Vehicle,Vehicle Purchase,1
Lemoore Avenue,"Widen to 5 lanes, add 2 lanes with continuous left turn lane",1
Linda Lee Circle,Overlay,1
Madera Street,Reconstruct and improve curb/ramps,1
Magnolia Street,Overlay,1
Major Roads,Overlay of various thicknesses. Some shoulder work and dig-out and patch prior to overlay.,1
Merced Street,Reconstruct and improve curb/ramps,1
Minor Roads,Overlay of various thicknesses. Some shoulder work and dig-out and patch prior to overlay.,1
Montecito Court,Overlay,1


In [23]:
kcag.total_cost.describe()

count   84.00
mean     0.00
std      0.00
min      0.00
25%      0.00
50%      0.00
75%      0.00
max      0.00
Name: total_cost, dtype: float64

In [24]:
kcag.sample()

Unnamed: 0,category,jurisdiction,state_route,post_mile,location,project_limits,description,title,total_cost
42,Proposed Improvements,Lemoore,,,Magnolia Street,Lemoore Ave. to Smith Ave.,Overlay,,0


### KernCog

In [25]:
kern = lrtp_utils.kern_lrtp()

In [26]:
kern.sample(2)

Unnamed: 0,project_title,scope,yoe_w__new_revenue,yoe_w_o_new_reven,maint__inflation_savings,cost
360,Metro Passenger Rail,"Amtrak Station ‐ Phase II $ 13,000",13000000,13000000,,13000000
1126,Mendiburu Path / California City Blvd‐88,Class I Shared Use Path ‐ 1.6 mile ‐ Add new off‐St class I shared use path,1445000,1445000,,1445000


In [27]:
lrtp_utils.give_info(kern, "project_title", "cost")

Kern County ‐ Class II Bike Ln        85
Bakersfield ‐ Class III Bike Blvd     85
Bakersfield ‐ Class II Bike Ln        78
Kern County ‐ Sidewalk Improvement    71
Kern County ‐ Class III Bike Blvd     35
Name: project_title, dtype: int64
# of unique project titles: 371
After dropping duplicates using project_title and cost: 1180
Df shape: (1411, 6)
Index(['project_title', 'scope', 'yoe_w__new_revenue', 'yoe_w_o_new_reven',
       'maint__inflation_savings', 'cost'],
      dtype='object')


In [28]:
# kern.loc[kern.project_title == "M St / Belshaw St‐Park St"]

In [29]:
lrtp_utils.group_duplicates(kern, "project_title", "scope", "cost")

Unnamed: 0_level_0,Unnamed: 1_level_0,cost
project_title,scope,Unnamed: 2_level_1
County Areas ‐ transit,"Senior/disabled & advanced technology transit, vanpools, shared ride, aviation",9
County Areas ‐ active transportation,"Safe complete streets, pedestrian enhancements",5
County Areas ‐ bike facilities,"Construct Class I (trails), II (lanes) or Class III (routes) Bike Paths; striping; signage",3
Kern County ‐ Complete Streets/ITS I,Other Future developments funded by a transportation impact fee and mitigation,3
Kern County ‐ Corridor Improvement,Hall Rd (San Emidio St to Habecker Rd) .5 mi.,2
Route 58,2Future Rt 58 from I‐5 to Heath Rd at Stockdale Hwy ‐ construct new freeway,2
Kern County ‐ Corridor Improvement,Myrtle Av (Panama Ln to Wharton Av) .9 mi.,2
Caltrans ‐ Class II Bike Ln,E Bear Mtn Blvd SR 223 (Comanche Dr to Union St) 12.6 mi.,2
Santa Fe Way,1Hageman to Los Angeles Ave ‐ widen to four lanes,2
Kern County ‐ Corridor Improvement,San Diego St (Burgundy Av to Wharton Av) .7 mi.,2


In [30]:
kern[kern.project_title == "US 395"].sort_values(["cost"])

Unnamed: 0,project_title,scope,yoe_w__new_revenue,yoe_w_o_new_reven,maint__inflation_savings,cost
155,US 395,Between Rt 178 and China Lake Blvd ‐ construct passing lanes,15117000,191000000,4883000,15117000
161,US 395,San Bdo County Line to Rt 14 ‐ widen to four lanes,184422000,451035000,59578000,184422000


In [31]:
kern[kern.project_title == "Teh. Willow Springs Rd"].sort_values(["cost"])

Unnamed: 0,project_title,scope,yoe_w__new_revenue,yoe_w_o_new_reven,maint__inflation_savings,cost
1115,Teh. Willow Springs Rd,Rt 58 to Rosamond Blvd ‐ widen to four lanes,114101000,150961000,36861000,114101000
1322,Teh. Willow Springs Rd,Rt 58 to Rosamond Blvd ‐ widen to four lanes,377914000,377914000,122086000,377914000


### Madera CTC

In [32]:
madera = lrtp_utils.madera_lrtp()

In [33]:
lrtp_utils.group_duplicates(madera, "project_name", "total_cost", "description")

Unnamed: 0_level_0,Unnamed: 1_level_0,description
project_name,total_cost,Unnamed: 2_level_1
Purchase Transit Vehicle,300000,3
Road 36,563000,3
Avenue 12,10000000,3
Various,1000000,3
Sunset Avenue & N Westberry Blvd,$,2
Pecan Avenue,665000,2
Avenue 9,567000,2
James Madison Elementary School SRTS Pedestrian Improvements,$,2
Northwest Downtown Pedestrian Access,$,2
SR 41,$ -,2


In [34]:
lrtp_utils.give_info(madera, "project_name", "total_cost")

Various                            20
Avenue 12                          10
Avenue 9                            8
State Route 41                      6
Purchase Transit Van (Electric)     5
Name: project_name, dtype: int64
# of unique project titles: 515
After dropping duplicates using project_name and total_cost: 735
Df shape: (765, 8)
Index(['category', 'agency', 'project_name', 'location', 'project_id',
       'description', 'opening_year', 'total_cost'],
      dtype='object')


In [35]:
madera[madera.project_name == "Purchase Transit Vehicle"]

Unnamed: 0,category,agency,project_name,location,project_id,description,opening_year,total_cost
2,Public Transit,City of Madera,Purchase Transit Vehicle,,MAD213201,Purchase new transit vehicle,,300000
3,Public Transit,City of Madera,Purchase Transit Vehicle,,MAD213202,Purchase new transit vehicle,,300000
4,Public Transit,City of Madera,Purchase Transit Vehicle,,MAD213203,Purchase new transit vehicle,,300000
23,Public Transit,Madera County,Purchase Transit Vehicle,,MAD115006,Purchase new transit vehicle,,825000
24,Public Transit,Madera County,Purchase Transit Vehicle,,MAD115010,Purchase new transit vehicle,,57000


In [36]:
madera[madera.project_name == "Avenue 12"].head()

Unnamed: 0,category,agency,project_name,location,project_id,description,opening_year,total_cost
46,Streets and Roads,Madera County,Avenue 12,Road 38 to Road 40,,2 Lanes to 4 Lanes,2027.0,10000000
47,Streets and Roads,Madera County,Avenue 12,Road 40 to Riverwalk\nBlvd,,2 Lanes to 6 Lanes,2027.0,10000000
48,Streets and Roads,Madera County,Avenue 12,Riverwalk Blvd to SR\n41,,4 Lanes to 8 Lanes,2027.0,10000000
49,Streets and Roads,Madera County,Avenue 12,SR 41 To Flagbarn Rd,,2 Lanes to 4 Lanes,2030.0,4250000
51,Streets and Roads,Madera County,Avenue 12,Road 30 1/2 to Road 36,,2 Lanes to 4 Lanes,2030.0,21000000


### MCAGOV

In [37]:
mcagov = lrtp_utils.mcagov_lrtp()

In [38]:
lrtp_utils.give_info(mcagov, "title", "total_cost\n_$1,000s_")

Bellevue Rd Realignment/Reconstruction    1
Minturn Road Rehabilitation               1
MCAG Vanpool Program (dibs)               1
Measure V Transit Free Ride Program       1
Planada Route Improvements                1
Name: title, dtype: int64
# of unique project titles: 108
After dropping duplicates using title and total_cost
_$1,000s_: 108
Df shape: (108, 8)
Index(['agency', 'title', 'limits_description', 'type', 'completion\nyear',
       'total_cost\n_$1,000s_', 'funding_sources', 'total_cost_millions'],
      dtype='object')


In [39]:
mcagov.sample()

Unnamed: 0,agency,title,limits_description,type,completion\nyear,"total_cost\n_$1,000s_",funding_sources,total_cost_millions
54,Merced,SR-59 Merced Widening Phase 4,Widen 2 to 4 lanes from Cardella Rd to Bellevue Rd,Road Capacity,2045,30000,"SB-1, Measure V, Local, SHOPP",30000000


### MTC
* 133 projects but point and line map returns 240 ish rows.
* The geospatial projects are split into various phases while the regular pandas df is just the project in its entirety.
* https://www.planbayarea.org/2050-plan/final-plan-bay-area-2050/final-supplemental-reports/interactive-transportation-project-list
* https://opendata.mtc.ca.gov/datasets/MTC::2023-transportation-improvement-program-projects-point/about
* https://opendata.mtc.ca.gov/datasets/MTC::plan-bay-area-2050-transportation-projects-line/about
* https://data.bayareametro.gov/Projects/Plan-Bay-Area-2050-Transportation-Project-List/y7ka-jksz

In [40]:
mtc_og = lrtp_utils.mtc_lrtp()

In [41]:
# mtc_og.drop(columns = ['geometry','scope']).sample()

In [42]:
#layer_list = ["planbayarea2050_rtp_line", "planbayarea2050_rtp_point"]
#url_pt_1 = "https://services3.arcgis.com/i2dkYWmb4wHvYPda/ArcGIS/rest/services/"
#url_pt_2 = "/FeatureServer/0/query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&resultType=none&distance=0.0&units=esriSRUnit_Meter&relationParam=&returnGeodetic=false&outFields=*+&returnGeometry=true&featureEncoding=esriDefault&multipatchOption=xyFootprint&maxAllowableOffset=&geometryPrecision=&outSR=&defaultSR=&datumTransformation=&applyVCSProjection=false&returnIdsOnly=false&returnUniqueIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&returnQueryGeometry=false&returnDistinctValues=false&cacheHint=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&having=&resultOffset=&resultRecordCount=&returnZ=false&returnM=false&returnExceededLimitFeatures=true&quantizationParameters=&sqlFormat=none&f=pgeojson&token="
#gdf = lrtp_utils.open_rest_server(url_pt_1, url_pt_2, layer_list)

In [43]:
#gdf.columns

In [44]:
#gdf.loc[gdf.rtp_id == "21-T10-088"]

In [45]:
#all_projects_url = "https://data.bayareametro.gov/resource/y7ka-jksz.json?$query=SELECT%0A%20%20%60plan_strategy%60%2C%0A%20%20%60rtpid%60%2C%0A%20%20%60title%60%2C%0A%20%20%60scope%60%2C%0A%20%20%60open_period%60%2C%0A%20%20%60funding_millions_yoe%60%2C%0A%20%20%60county%60"
#all_projects = pd.read_json(all_projects_url)

In [46]:
#all_projects.loc[all_projects.rtpid == "21-T10-088"]

In [47]:
#mtc_og.loc[mtc_og.rtpid == "21-T10-088"].drop(columns = ['scope','geometry'])

In [48]:
#mtc_og.loc[mtc_og.proj_title == "Light Rail | Service Expansion | VTA | Stevens Creek Blvd"].drop(columns = ['scope','geometry'])

### Sacramento Area Council of Governments (SACOG)
* https://www.sacog.org/sites/main/files/file-attachments/2020_mtp-scs.pdf?1580330993

In [49]:
sacog = lrtp_utils.sacog_lrtp()

96 rows are headers


In [50]:
lrtp_utils.give_info(sacog, "title", "description")

Safety Improvements                            2
Connect Card Implementation                    2
Aux Lane Project: EB Latrobe Road              1
Bus Maintenance Facility #1 Rehabilitation     1
Name: title, dtype: int64
# of unique project titles: 1599
After dropping duplicates using title and description: 1601
Df shape: (1601, 10)
Index(['id', 'status__planned,_programmed_or_project_development_only_',
       'county', 'lead_agency', 'budget_category', 'title', 'description',
       'total_project_cost__2018_dollars_',
       'year_of_expenditure_cost_for_planned_projects', 'completion_timing'],
      dtype='object')


In [51]:
sacog.sample()

Unnamed: 0,id,"status__planned,_programmed_or_project_development_only_",county,lead_agency,budget_category,title,description,total_project_cost__2018_dollars_,year_of_expenditure_cost_for_planned_projects,completion_timing
723,SAC24111,Programmed,SAC,City of Elk Grove,B- Road & Highway Capacity,Lotz Parkway,"In Elk Grove, Lotz Parkway from Whitelock Parkway to Poppy Ridge Road: Construct new 4-lane roadway; and Lotz Parkway from Poppy Ridge Road to\n0.5 miles south of Whitelock Pkwy at the northern boundary of the Sterling\nMeadows development area: Construct new 2-lane roadway.",8662500,,2020-2025


### SANDAG
* https://www.arcgis.com/home/item.html?id=ba8b2a6247ef463cb7411f039959f3ee
* https://www.arcgis.com/apps/mapviewer/index.html?webmap=ba8b2a6247ef463cb7411f039959f3ee

In [52]:
sandag = lrtp_utils.sandag_lrtp()

In [53]:
lrtp_utils.group_duplicates(sandag, "project_name", "description", "total_project_cost")

Unnamed: 0_level_0,Unnamed: 1_level_0,total_project_cost
project_name,description,Unnamed: 2_level_1
LRT 530,Green Line (Santee to Downtown; Double/Third-tracking and Grade Separations),2
Rapid 630,Iris Trolley/Palomar to Kearny Mesa via I-5/163 and City College,1
I-5 (Cassidy St to Harbor Dr.),6 Freeway Lanes and 4 Managed Lanes,1
Rapid 471,Downtown Escondido to East Escondido,1
Rapid 450 Phase 2,Oceanside to Escondido via Palomar Airport Rd and SR 78 (Full version of Rapid),1
Rapid 450 Phase 1,Oceanside to Escondido via Palomar Airport Rd and SR 78 (Light version of Rapid),1
Rapid 440,Carlsbad to Escondido Transit Center via Palomar Airport Rd,1
Rapid 41,Fashion Valley to UTC/UC San Diego via Linda Vista and Clairemont,1
Rapid 30,"Balboa Station to Sorrento Mesa via Pacific Beach, La Jolla, UTC",1
Rapid 295,Spring Valley to Clairemont via La Mesa & Kearny Mesa,1


In [54]:
sandag.loc[sandag.project_name == "LRT 530"][
    ["project_name", "total_project_cost", "description"]
]

Unnamed: 0,project_name,total_project_cost,description
159,LRT 530,192000000.0,Green Line (Santee to Downtown; Double/Third-tracking and Grade Separations)
160,LRT 530,192000000.0,Green Line (Santee to Downtown; Double/Third-tracking and Grade Separations)


### SBCAG

In [55]:
sbcag = lrtp_utils.sbcag_lrtp()

65 rows are headers


In [56]:
lrtp_utils.give_info(sbcag, "project_title", "total_cost__$000s_")

SB-PL-15: Upper De la Vina St Gap Closure and Safe Crossings              2
CT-1: SR 246 Passing Lanes – Planting Mitigation (FTIP CT93)(EA 0C641)    1
SM-PL-10: A Street Widening                                               1
Sol-PL-6: Fredensborg Canyon Rd/Adobe Creek Culvert Replacement           1
Sol-PL-5: Solvang School Sidewalk Project                                 1
Name: project_title, dtype: int64
# of unique project titles: 418
After dropping duplicates using project_title and total_cost__$000s_: 419
Df shape: (419, 8)
Index(['type', 'project_title', 'phase', 'description',
       'primary_funding_source_s_', 'year', 'total_cost__$000s_',
       'total_cost_millions'],
      dtype='object')


In [57]:
sbcag.sample()

Unnamed: 0,type,project_title,phase,description,primary_funding_source_s_,year,total_cost__$000s_,total_cost_millions
367,VMT Reducing,SB-PL-15: Upper De la Vina St Gap Closure and Safe Crossings,"PA&ED, PS&E,\nConstruction",Implement a road diet on De La Vina Street from Constance Avenue to Padre Street. Crossing enhancements included.,,2050,1988,1988000


### Shasta County Regional Transportation Planning Agency (SCRTPA)

In [58]:
scrtpa = lrtp_utils.scrtpa_lrtp()

In [59]:
lrtp_utils.give_info(scrtpa, "regional_transportation_projects", "cost")

E Cypress Ave-Alfreda Way-Victor Ave                      2
GROVE ST-B ST-WALNUT ST                                   2
Churn Creek Rd-Rancho Rd-Churn Creek Rd/S Bonnyview Rd    2
Hartnell Ave-Victor Ave-Shasta View Dr                    2
Hawley Rd-Hawley Rd (e/o Norwich Ct)-Collyer Dr           2
Name: regional_transportation_projects, dtype: int64
# of unique project titles: 1051
After dropping duplicates using regional_transportation_projects and cost: 1066
Df shape: (1066, 7)
Index(['regional_transportation_projects',
       'short_term_total_est_cost_of_project',
       'long_term_total_est_cost_of_project', 'project_band',
       'project_type\n_project_intent_', 'expected_funding_sources', 'cost'],
      dtype='object')


In [60]:
scrtpa[
    scrtpa.regional_transportation_projects == "E Cypress Ave-Alfreda Way-Victor Ave"
]

Unnamed: 0,regional_transportation_projects,short_term_total_est_cost_of_project,long_term_total_est_cost_of_project,project_band,project_type\n_project_intent_,expected_funding_sources,cost
831,E Cypress Ave-Alfreda Way-Victor Ave,,29913,(2026-2040),Buffered Bike Lane,unknown,29913
833,E Cypress Ave-Alfreda Way-Victor Ave,,529884,(2026-2040),Sidewalk,unknown,529884


### San Luis Obispo Council of Governments (SLOCOG)

In [61]:
slocog = lrtp_utils.slocog_lrtp()

In [62]:
lrtp_utils.give_info(slocog, "project_name", "total_cost")

Hazel Ln. SRTS Improvements                          2
Main St. Sidewalks                                   2
Los Berros Rd. Widening                              2
Octagon Barn Park-and-Ride Lot and shared-use lot    1
Augusta Neighborhood SRTS Improvements               1
Name: project_name, dtype: int64
# of unique project titles: 416
After dropping duplicates using project_name and total_cost: 420
Df shape: (420, 12)
Index(['index', 'project_name', 'project_description', 'sponsor', 'community',
       'time_period', 'project_type', '_2023_rtp_project_id', 'time_horizon',
       'total_cost', 'project_id', 'geometry'],
      dtype='object')


In [63]:
# slocog[slocog.project_name == "Hazel Ln. SRTS Improvements"].sample()

### Southern California Association of Governments (SCAG) 

In [64]:
scag = lrtp_utils.scag_lrtp()

360 rows are headers


In [65]:
lrtp_utils.give_info(scag, "description", "route_name")

WIDEN FROM 2 TO 4 LANES      170
WIDEN FROM 4 TO 6 LANES       64
WIDEN FROM 2 TO 6 LANES       43
CONSTRUCT 4 LANE ARTERIAL     21
WIDEN FROM 3 TO 4 LANES       10
Name: description, dtype: int64
# of unique project titles: 2607
After dropping duplicates using description and route_name: 2824
Df shape: (2952, 13)
Index(['system', 'lead_agency', 'rtp_id', 'route_#', 'route_name', 'from',
       'to', 'description', 'completion_year', 'project_cost__$1,000s_',
       'county', 'project_title', 'project_cost_millions'],
      dtype='object')


In [66]:
lrtp_utils.group_duplicates(
    scag, "description", "project_cost__$1,000s_", "project_title"
)

Unnamed: 0_level_0,Unnamed: 1_level_0,project_title
description,"project_cost__$1,000s_",Unnamed: 2_level_1
WIDEN FROM 2 TO 4 LANES,3000,5
WIDEN FROM 2 TO 4 LANES,6000,3
WIDEN FROM 2 TO 4 LANES,3210,2
WIDEN FROM 2 TO 4 LANES,1878,2
WIDEN FROM 4 TO 6 LANES,10000,2
WIDEN FROM 2 TO 4 LANES,6901,2
WIDEN FROM 2 TO 4 LANES,8000,2
WIDEN FROM 4 TO 6 LANES,5000,2
WIDEN FROM 4 TO 6 LANES,20000,2
WIDEN FROM 2 TO 4 LANES,9571,2


In [67]:
3000*1_000

3000000

In [68]:
scag.loc[
    (scag.description == "WIDEN FROM 4 TO 6 LANES")
    & (scag["project_cost__$1,000s_"] == 20000)
]

Unnamed: 0,system,lead_agency,rtp_id,route_#,route_name,from,to,description,completion_year,"project_cost__$1,000s_",county,project_title,project_cost_millions
1441,LOCAL HIGHWAY,PERRIS,3A01WT105,0,PERRIS BLVD,RAMONA EXPWY,NUEVO RD,WIDEN FROM 4 TO 6 LANES,2025,20000,,No Title,20000000
1595,LOCAL HIGHWAY,"RIVERSIDE, CITY OF",3A07315,0,VAN BUREN BLVD,AUDREY AVE,GARFIELD,WIDEN FROM 4 TO 6 LANES,2026,20000,,No Title,20000000


In [69]:
scag.loc[scag.description.str.contains("Rail Capital Projects", case=False)]


Unnamed: 0,system,lead_agency,rtp_id,route_#,route_name,from,to,description,completion_year,"project_cost__$1,000s_",county,project_title,project_cost_millions
812,TRANSIT,LOS ANGELES COUNTY MTA (METRO),1TL0703,0,METRO RAIL TRANSIT CAPITAL,COUNTYWIDE,,RAIL CAPITAL PROJECTS,2040,19151000,,No Title,19151000000


In [70]:
38960000*1_000

38960000000

###  SJCOG

In [None]:
sjcog = lrtp_utils.sjcog_lrtp()

In [None]:
lrtp_utils.give_info(sjcog, "project_description", "total")

In [None]:
lrtp_utils.group_duplicates(sjcog, "project_description", "total", "project_name")

In [None]:
sjcog.loc[
    sjcog.project_description == "Widen from 6 to 8 lanes (inside/outside)"
].head()

In [None]:
sjcog[~sjcog.completion_date.isna()].sample()

### Stancog

In [None]:
stancog = lrtp_utils.stancog_lrtp()

In [None]:
stancog.columns

In [None]:
lrtp_utils.give_info(stancog, "description", "total_cost")

In [None]:
lrtp_utils.group_duplicates(stancog, "project_limits", "description", "total_cost")

In [None]:
stancog.loc[(stancog.description == "Install Traffic Signal")
]

### TMPO (Tahoe)
* Open data portal: https://www.tahoeopendata.org/search?tags=transportation
* https://www.tahoeopendata.org/datasets/TRPA::2020-regional-transportation-plan-projects/about

In [None]:
tahoe = lrtp_utils.tmpo_lrtp()

### TCAG Tulare County Association of Governments (TCAG)

In [None]:
tcag = lrtp_utils.tcag_lrtp()

In [None]:
tcag.groupby(["project_title", "cost_x_1,000"]).agg(
    {"project_category": "count"}
).sort_values("project_category", ascending=False).head()

In [None]:
lrtp_utils.group_duplicates(
    tcag, "project_title", "project_description", "cost_x_1,000"
)

In [None]:
tcag.loc[
    (tcag.project_title == "Santa Fe Trail Regional")
    & (tcag.project_description == "Santa Fe Trail Regional")
]

## Stack

### Add Lost

In [None]:
all_mpo_df, all_mpo_gdf = lrtp_utils.all_mpo(True)

In [None]:
# los_df = harmonization_utils.load_lost()

In [None]:
# los_df.sample()

In [None]:
len(all_mpo_df)

In [None]:
all_mpo_gdf.shape

In [None]:
all_mpo_gdf.data_source.unique()

In [None]:
all_mpo_df.shape

In [None]:
all_mpo_df.data_source.value_counts()

In [None]:
# all_mpo_df.drop(columns = ['notes']).sample(3)

In [None]:
gcs_test = pd.read_excel("gs://calitp-analytics-data/data-analyses/project_list/LRTP/all_LRTP_LOST.xlsx")

In [None]:
gcs_test.sample()

In [None]:
"""
all_mpo_gdf.explore(
    "project_title",
    cmap="tab20c",
    height=400,
    width=600,
    style_kwds={"weight": 4},
    legend=False,
)
"""

### Cordon Pricing

In [None]:
congest_keywords = [
    "congestion pricing",
    "variable tolls",
    "express lane",
    "value pricing",
    "rush hour",
    "cordon",
    "dynamic pricing",
    "dynamically  priced",
    "high occupancy",
    "mobility pricing",
    "occupancy",
    "toll lane",
    "performance pricing",
    "peak travel",
    "managed lane",
    "tollway",
    "express toll",
    "fixed pricing",
    "hot lane",
    "hov lane",
    "expressed toll lane",
]

In [None]:
cordon_gdf, cordon_df = list_utils.filter_projects(
    all_mpo_df,
    [
        "project_title",
        "project_description",
    ],
    congest_keywords,
    "congestion_pricing_lrtp_lost",
    True,
)

In [None]:
len(cordon_df), len(cordon_gdf)

In [None]:
len(cordon_df) / len(all_mpo_df)

In [None]:
cordon_df.project_title.nunique(), cordon_gdf.project_title.nunique()

In [None]:
cordon_df.groupby(["data_source"]).agg({"project_title": "nunique"})

In [None]:
gdf_map_subset = [
    "project_title",
    "lead_agency",
    "project_year",
    "total_project_cost",
    "geometry",
]

In [None]:
"""
cordon_gdf[gdf_map_subset].explore(
    cmap="tab20c",
    height=400,
    width=600,
    style_kwds={"weight": 4},
    legend=False,
)
"""

### Categorizing

In [None]:
all_mpo_df = all_mpo_df.fillna("None")

In [None]:
categorized_df = categorizing_utils.add_categories(all_mpo_df)

In [None]:
drop_cols = [
    "lower_case_project_title",
    "lower_case_project_description",
    "lower_case_project_title_keyword_search",
    "lower_case_project_description_keyword_search",
    "geometry",
    "city",
    "county",
]

In [None]:
categorized_df = categorized_df.drop(columns=drop_cols)

In [None]:
categorized_df.n_categories.value_counts()

In [None]:
all_projects_categorized_by_mpo = categorizing_utils.find_categories_by_mpo(categorized_df)

In [None]:
all_projects_categorized_by_mpo.head()

In [None]:
categorizing_utils.count_all_categories(categorized_df)