## TIRCP Calsta
* California State Transportation Agency emailed a request asking for TIRCP outcomes for cycles 3-5.  
* [Cycles 1-6](https://calsta.ca.gov/subject-areas/transit-intercity-rail-capital-prog)
* Cycle 1: 2015
* Cycle 2: 2016
* Cycle 3: 2018
* Cycle 4: 2020
* Cycle 5: 2022
* Cycle 6: 2023

In [1]:
import A1_data_prep
import A2_tableau
import A6_zev
import numpy as np
import pandas as pd
from babel.numbers import format_currency
from calitp import *



In [2]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [3]:
# GCS File Path:
GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/tircp/"

### Manipulate TIRCP
#### Filter out for cycles of interest

In [4]:
df_tircp = to_snakecase(A2_tableau.tableau_dashboard())



In [5]:
# Drop irrelevant years. 
df_tircp2 = df_tircp.loc[df_tircp["award_year"] >= 2018].reset_index(drop=True)

In [6]:
# Sort df by award year and number
df_tircp2 = df_tircp2.sort_values(["award_year", "#"])

In [7]:
# Double check that the right cycles are selected. 
df_tircp2.award_year.value_counts(),

(2018    28
 2022    23
 2020    17
 Name: award_year, dtype: int64,)

In [8]:
df_tircp2.ppno.nunique(), df_tircp2.title.nunique(), len(df_tircp2)

(66, 67, 68)

#### Duplicate Project Title Fix

In [9]:
# Find duplicate project title 
df_tircp2.title.value_counts().head()

North State Intercity Bus System                                                                    2
Purchase Zero Emission High Capacity Buses to Support Transbay Tomorrow and Clean Corridors Plan    1
Expansion of WETA Ferry Services                                                                    1
South Bay Microtransit Expansion                                                                    1
Sacramento Valley Station (SVS) Transit Center: Priority Project                                    1
Name: title, dtype: int64

In [10]:
# Create a detailed title column to avoid duplicates by combining  year,title, and recipient into one line.
df_tircp2["award_year"] = df_tircp2["award_year"].astype("object")

In [11]:
# https://stackoverflow.com/questions/39291499/how-to-concatenate-multiple-column-values-into-a-single-column-in-pandas-datafra'
detailed_title_cols = [
    "award_year",
    "title",
    "grant_recipient",
]

In [12]:

df_tircp2["detailed_title_col"] = df_tircp2[detailed_title_cols].apply(
    lambda row: "-".join(row.values.astype(str)), axis=1
)

In [13]:
df_tircp2.columns

Index(['award_year', '#', 'grant_recipient', 'title', 'ppno', 'district',
       'county', 'description', 'master_agreement_number',
       'master_agreement_expiration_date', 'manager', 'regional_coordinator',
       'technical_assistance_calitp__y_n', 'technical_assistance_fleet__y_n',
       'technical_assistance_network_integration__y_n',
       'technical_assistance_priority_population__y_n', 'total__cost', 'tircp',
       'allocated_amount', 'unallocated_amount', 'percentage_allocated',
       'expended_amount', 'other_funds_involved', 'award_cycle', 'is_sb1?',
       'is_ggrf?', 'is_iija?', 'on_shs?', 'calitp',
       'estimated_tircp_ghg_reductions', 'estimated_tircp_ghg_reductions2',
       'increased_ridership', 'service_integration', 'improve_safety',
       'readiness', 'funding_leverage',
       'multi_agency_coordination_integration', 'ab_1550_community_benefits',
       'housing_co_benefits', 'local_agency_address', 'local_agency_city',
       'local_agency_zip', 'local_

In [14]:
# Subset df to columns of interest
df_tircp2 = df_tircp2[
    [
        "award_year",
        "#",
        "ppno",
        "tircp",
        "title",
        "detailed_title_col",
        "grant_recipient",
        "district",
        "county",
        "description",
        "total__cost",
        "estimated_tircp_ghg_reductions",
        "increased_ridership",
        "service_integration",
        "improve_safety",
    ]
]

#### Add Project Number
* Year + Number

In [15]:
df_tircp2["project_number_use"] = (
    df_tircp2["award_year"].astype(str) + "-" + df_tircp2["#"].astype(str)
)

In [16]:
# df_tircp2[["project_number_use","award_year","#","title"]]

### Recreate SCCP's output example with TIRCP data.
Columns to include: 

> Project ID	Project Name	Implementing Agency	Program	Project Description	 Total Cost 	 SB 1 Funds 	Fiscal Year	Is SB 1?	Project Status	Assembly Districts	Senate Districts	Counties	Cities	Caltrans Districts	Is on SHS?	Date Updated	Cycle


#### GIS Template 
* Although Linda provided me with more updated/complete GIS information directly from the Tracking Sheet 2.0, using this to glean project statuses. 

In [17]:
# Read in sheet with Assembly info.
gis = to_snakecase(
    pd.read_excel(
        f"{GCS_FILE_PATH}TIRCP_GIS_Template_Requirements 6-1-2022.xlsx",
        sheet_name="Projects Table",
    )
)

In [18]:
# Clean some column names
gis = gis.rename(
    columns={
        "ppno_": "ppno",
    }
)

In [19]:
# Clean PPNO
gis = A1_data_prep.ppno_slice(gis)

In [20]:
# Subset for only cols of interest
gis2 = gis[
    [
        "project_number",
        "ppno",
        "projecttitle",
        "projectstatus",
    ]
]

In [21]:
gis2.ppno.nunique()

45

In [22]:
# There are mulitple entries for each ppno.
gis2.ppno.value_counts().head()

CP033    60
CP035    21
CP042    18
CP032    14
CP031    11
Name: ppno, dtype: int64

In [23]:
# Inglewood Transit Center coded as CP063, should be CP062 per TIRCP Tracking sheet.
gis2.loc[
    (gis2["projecttitle"] == "Inglewood Transit Center (2020:04)"), "ppno"
] = "CP062"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [24]:
# North State Intercity Bus System coded as CP063 per TIRCP Tracking sheet.
gis2.loc[
    (
        gis2["projecttitle"]
        == "North State Intercity Bus System-Lake County Interregional Transit Center (2020:05)"
    ),
    "ppno",
] = "CP063"

In [25]:
# Clean project_number, only keep year
gis2["project_number"] = gis2["project_number"].str.split(":").str[0]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [26]:
gis2["project_number"] = gis2["project_number"].fillna(0).astype("int64")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [27]:
project_status_gis = A6_zev.summarize_rows(gis2, "ppno", "projectstatus")

In [28]:
# Check that each row matches the number of unique ppno
len(project_status_gis) == gis2.ppno.nunique()

True

In [29]:
# Drop old project status from original dataframe
gis2 = gis2.drop(columns=["projectstatus"])

In [30]:
# Merge with original gis, so there is only one row for each PPNO
final_gis_template = (
    pd.merge(project_status_gis, gis2, how="left", on=["ppno"])
    .drop_duplicates("ppno")
    .reset_index(drop=True)
)

In [31]:
# Checks
len(final_gis_template), final_gis_template.ppno.nunique(), gis.ppno.nunique()

(45, 45, 45)

##### Merge GIS information 

In [32]:
# Merge with df_tircp2
merge1 = pd.merge(
    df_tircp2,
    final_gis_template,
    how="left",
    left_on=["ppno", "award_year"],
    right_on=["ppno", "project_number"],
    indicator=True,
)

In [33]:
merge1._merge.value_counts()

both          43
left_only     25
right_only     0
Name: _merge, dtype: int64

In [34]:
# Double Check that titles & years correspond with one another
merge1[["title", "projecttitle","projectstatus"]].sample(2)

Unnamed: 0,title,projecttitle,projectstatus
53,City of Wasco Improving Air Quality and Economic Growth with Bus Electrification,,
37,West Valley Connector Bus Rapid Transit Phase 1 & ZEB Initiative,West Valley Connector Bus Rapid Transit Phase 1 & Zero Emission Bus Initiative (2020:10),Ops./Procure


In [35]:
# Drop merge & GIS Template projects
merge1 = merge1.drop(columns=["_merge", "projecttitle"])

#### GIS Info from Tracking Sheet 2.0 

In [36]:
gis_tracking_sheet = to_snakecase(
    pd.read_excel(f"{GCS_FILE_PATH}{A1_data_prep.FILE_NAME }", sheet_name="GIS Info")
)

In [37]:
# Drop certain cols
gis_tracking_sheet = gis_tracking_sheet[
    [
        "award_year",
        "project_title",
        "caltransdistrict",
        "assembly\ndistricts",
        "senate\ndistricts",
        "city_code",
        "county_code",
        "_implementing_agency__id_",
    ]
]

In [38]:
# Only keep the years wanted
gis_tracking_sheet2 = gis_tracking_sheet.loc[
    gis_tracking_sheet["award_year"] >= 2018
].reset_index(drop=True)

In [39]:
# Merge with df_tircp2
merge2 = pd.merge(
    merge1,
    gis_tracking_sheet2,
    how="left",
    left_on=["award_year", "title"],
    right_on=["award_year", "project_title"],
    indicator=True,
)

In [40]:
# Merge
merge2._merge.value_counts()

both          67
left_only      1
right_only     0
Name: _merge, dtype: int64

In [41]:
merge2.shape, df_tircp2.shape

((68, 26), (68, 16))

In [42]:
merge2 = merge2.drop(
    columns=["project_number", "project_title", "_merge"]
)

### Project Sheet 
* Recreating "projects" tab of SCCP Excel workbook. 

In [43]:
# Copy merge 2
projects = merge2.copy()

In [44]:
# Fill in empty values with NA
projects = projects.fillna(
    projects.dtypes.replace({"float64": 0.0, "object": "None", "int64": 0})
)

In [45]:
# Format moentary cols
monetary_cols = ["total__cost", "tircp"]
for i in monetary_cols:
    projects[i] = projects[i].apply(
        lambda x: format_currency(x, currency="USD", locale="en_US")
    )

In [46]:
# Clean up column names 
projects = A1_data_prep.clean_up_columns(projects)

In [47]:
projects = projects.rename(
    columns={
        "Number Use": "Project Number",
        "Assembly\nDistricts": "Assembly Districts",
        "Senate\nDistricts": "Senate Districts",
        "Caltransdistrict": "CT Districts",
        "Assembly\nDistricts": "Assembly Districts",
    }
)

In [48]:
# Rearrange columns
right_order = [
    "Award Year",
    "#",
    "Project Number",
    "Ppno",
    "Title",
    "Grant Recipient",
    "Tircp",
    "Total  Cost",
    "Description",
    "District",
    "County",
    "Status",
    "CT Districts",
    "Assembly Districts",
    "Senate Districts",
    "City Code",
    "County Code",
    "Implementing Agency  Id",
]

In [49]:
projects = projects[right_order]

### Outcomes Sheet

In [50]:
# Measure columns
measure_cols = [
    "estimated_tircp_ghg_reductions",
    "cost_per_ghg_ton_reduced",
    "increased_ridership",
    "service_integration",
    "improve_safety",
]

In [51]:
# Turn estimated GHG reductions into a number
merge2["estimated_tircp_ghg_reductions"] = (
    merge2["estimated_tircp_ghg_reductions"]
    .str.replace("MTCO2e", "")
    .str.replace("None", "")
    .str.replace(",", "")
)

In [52]:
merge2["estimated_tircp_ghg_reductions"] = (
    merge2["estimated_tircp_ghg_reductions"]
    .apply(pd.to_numeric, errors="coerce")
    .fillna(0)
)

In [53]:
# Subset to cols similar to SCCP
outcomes = merge2[
    [
        "award_year",
        'detailed_title_col',
        "estimated_tircp_ghg_reductions",
        "increased_ridership",
        "service_integration",
        "improve_safety",
    ]
].sort_values(["award_year", 'detailed_title_col',])

In [54]:
outcomes = A1_data_prep.clean_up_columns(outcomes)

In [55]:
outcomes.head(1)

Unnamed: 0,Award Year,Detailed Title Col,Estimated Tircp Ghg Reductions,Increased Ridership,Service Integration,Improve Safety
1,2018,2018-#Electrify Anaheim: Changing the Transit Paradigm in Southern California-Anaheim Transportation Network,61000.0,Medium-High,Medium-High,Medium


##### Version 1

In [56]:
# Drop award year
outcomes_transformed = outcomes.drop(columns=["Award Year"]).T

In [57]:
# Make first row to column names
outcomes_transformed.columns = outcomes_transformed.iloc[0]

In [58]:
# Del first row
outcomes_transformed = outcomes_transformed.iloc[1:]

In [59]:
outcomes_transformed.head(1)

Detailed Title Col,2018-#Electrify Anaheim: Changing the Transit Paradigm in Southern California-Anaheim Transportation Network,2018-Accelerating Rail Modernization and Expansion in the Capital Region-Sacramento Regional Transit District,2018-All Aboard 2018: Transforming SoCal Rail Travel-Los Angeles-San Diego-San Luis Obispo Rail Corridor Agency,2018-Blue Line Rail Corridor Transit Enhancements-San Diego Metropolitan Transit System,2018-Building Up: LOSSAN North Improvement Program-Los Angeles-San Diego-San Luis Obispo Rail Corridor Agency,2018-Coastal Express/Pacific Surfliner Peak Hour Service Expansion and Integration Project-Santa Barbara County Association Of Governments,2018-Diesel Multiple Unit Vehicle to Zero- or Low-Emission Vehicle Conversion and West Valley Connector Bus Rapid Transit-San Bernardino County Transportation Authority,2018-Dublin/Pleasanton Capacity Improvement and Congestion Reduction Program-Livermore Amador Valley Transit Authority,2018-Electric Blue: Electrification of City of Santa Monica's Big Blue Bus-City Of Santa Monica,2018-Extend rail service to Monterey County-Transportation Agency For Monterey County,2018-From the Desert to the Sea: Antelope Valley Transit Authority and Long Beach Transit Zero Emission Bus Initiative-Antelope Valley Transit Authority,2018-Goleta Train Depot-Santa Barbara County Association Of Governments,2018-Los Angeles City: Leading the Transformation to Zero-Emission Electric Bus Transit Service-City Of Los Angeles,2018-Los Angeles Region Transit System Integration and Modernization Program of Projects-Los Angeles County Metropolitan Transportation Authority,2018-North State Intercity Bus System-Shasta Regional Transportation Agency,2018-Peninsula Corridor Electrification Expansion Project-Peninsula Corridor Joint Powers Board,2018-Purchase Zero Emission High Capacity Buses to Support Transbay Tomorrow and Clean Corridors Plan-Alameda Contra Costa Transit District,2018-Ride Between the Line: Enhancing Access to Transit in San Diego-San Diego Association Of Governments,2018-SMART Larkspur to Windsor Corridor-Sonoma-Marin Area Rail Transit District,2018-SamTrans Express Bus Pilot-San Mateo County Transit District,2018-Solano Regional Transit Improvements-Solano Transportation Authority,2018-Southern California Optimized Rail Expansion (SCORE)-Southern California Regional Rail Authority,2018-Southwest Fresno Community Connector-City Of Fresno,2018-The Northern California Corridor Enhancement Program-Capitol Corridor Joint Powers Authority,2018-The Transbay Corridor Core Capacity Program: Vehicle Acquistion and Communications-Based Train Control System-Bay Area Rapid Transit,2018-Transit Capacity Expansion Program-San Francisco Municipal Transportation Agency,"2018-VTA’s BART Silicon Valley Extension, Phase II-Santa Clara Valley Transportation Authority",2018-Valley Rail-San Joaquin Joint Powers Authority,2020-Building Up Control: LOSSAN Service Enhancement Program-Los Angeles-San Diego-San Luis Obispo Rail Corridor Agency,2020-Core Capacity Program-San Francisco Municipal Transportation Agency,2020-Expansion of WETA Ferry Services-San Francisco Bay Area Water Emergency Transportation Authority,"2020-For People, Place and Planet: Connecting Inglewood to Regional Opportunities-Santa Monica Big Blue Bus","2020-Improving Air Quality & Economic Growth with Electric Buses in Merced County, the Gateway to Yosemite-Transit Joint Powers Authority Of Merced County",2020-Inglewood Transit Connector Project-City Of Inglewood,2020-LBT/UCLA Electric Commuter Express-Long Beach Transit,2020-Light Rail Modernization and Expansion of Low-Floor Fleet-Sacramento Regional Transit District,2020-Metrolink Antelope Valley Line Capital and Service Improvements-Los Angeles County Metropolitan Transportation Authority,2020-North State Intercity Bus System-Lake Transit Authority,"2020-Reaching the Most Transit-Vulnerable: AVTA's Zero Emission ""Microtransit"" & Bus Expansion Proposal-Antelope Valley Transit Authority",2020-SDConnect: San Diego Rail Improvement Program-San Diego Association Of Governments,2020-Sacramento Valley Station (SVS) Transit Center-Capitol Corridor Joint Powers Authority,2020-Solano Regional Transit Improvements Phase 2-Solano Transportation Authority,2020-The Transbaby Corridor Core Capacity Program: Vehicle Acquisition-Bay Area Rapid Transit,2020-Torrance Transit Bus Service Enhancement Program-Torrance Transit Department,2020-West Valley Connector Bus Rapid Transit Phase 1 & ZEB Initiative-San Bernardino County Transportation Authority,2022-ATN FAST (Family of Advanced Solutions for Transit): Revolutionizing Transit for a Global Audience-Anaheim Transportation Network,2022-City of Wasco Improving Air Quality and Economic Growth with Bus Electrification-City Of Wasco,2022-East Bay Transit-Oriented Development Mobility Enhancement Project-Bay Area Rapid Transit,2022-Expanding Transit Services and Introducing Zero-Emission Fleets on California’s North Coast-Humboldt Transit Authority,2022-Fleet Modernization Project-Sacramento Regional Transit District,2022-Fresno County Rural Transit Agency Resiliency Hub-Fresno County Rural Transit Agency,2022-I-680 Express Bus Program-Contra Costa Transportation Authority,2022-Los Angeles Nextgen and Zero Emission Bus Implementation Project-Los Angeles County Metropolitan Transportation Authority,2022-Making a Beeline for Electrification - City of Glendale and Arroyo Verdugo Communities Zoom towards Cleaner Transportation-City Of Glendale,2022-Metrolink Perris Valley Line Capacity Improvements-Southern California Regional Rail Authority,2022-Next Wave: Expanding MTD's Electric Legacy on the South Coast-Santa Barbara Metropolitan Transit District,2022-Oakland Waterfront Mobility Hub-City Of Oakland,2022-SFMTA Core Capacity Program-San Francisco Municipal Transportation Agency,2022-SURF! Busway and Bus Rapid Transit-Monterey-Salinas Transit District,2022-Sacramento Valley Station (SVS) Transit Center: Priority Project-Capitol Corridor Joint Powers Authority,2022-San Francisco Zero Emissions High-Frequency Ferry Network-San Francisco Bay Area Water Emergency Transportation Authority,2022-Sonoma Regional Bus and Rail Connectivity Improvements-Sonoma County Transportation Authority,2022-South Bay Microtransit Expansion-City Of Cupertino,"2022-Sweet Home Antelope Valley, Where the Skies are so Blue-Antelope Valley Transit Authority",2022-The Regional Connectivity Improvement Bus Program-City Of Torrance,2022-Tulare Cross-Valley Corridor ZEB Expansion-Tulare County Regional Transit Agency,2022-Valley Rail Expansion: Altamont Corridor Express (ACE) Ceres to Turlock Extension-San Joaquin Regional Rail Commission,2022-Zero-Emission Transit Enhancement Project-San Diego Metropolitan Transit System
Estimated Tircp Ghg Reductions,61000.0,234000.0,957000.0,68000.0,1160000.0,7000.0,67000.0,0.0,17000.0,81000.0,23000.0,73000.0,196000.0,7966000.0,26000.0,737000.0,14000.0,7000.0,134000.0,47000.0,138000.0,5714000.0,9000.0,1348000.0,4272000.0,156000.0,4063000.0,4369000.0,325000.0,369000.0,41000.0,18000.0,31000.0,772000.0,9000.0,85000.0,584000.0,14000.0,12000.0,34000.0,39000.0,125000.0,2495000.0,30000.0,33000.0,159000.0,1000.0,242000.0,389000.0,44000.0,14000.0,53000.0,781000.0,71000.0,157000.0,14000.0,10000.0,1186000.0,31000.0,156000.0,125000.0,63000.0,76000.0,139000.0,51000.0,475000.0,61000.0,34000.0


##### Outputs: Measures except GHG Reductions.

In [90]:
outcomes_melt = pd.melt(
    outcomes,
    id_vars=[
        "Award Year",
        "Detailed Title Col",
    ],
    value_vars=[
        "Increased Ridership",
        "Service Integration",
        "Improve Safety",
    ],
)

In [91]:
outcomes_melt = A1_data_prep.clean_up_columns(outcomes_melt)

In [92]:
year_summary = (
    outcomes_melt.groupby(["Award Year", "Variable", "Value"])
    .agg({"Detailed Title Col": "nunique"})
    .rename(
        columns={"Detailed Title Col": "Number of Projects in this Value Category"}
    )
)

In [93]:
year_summary

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Number of Projects in this Value Category
Award Year,Variable,Value,Unnamed: 3_level_1
2018,Improve Safety,High,9
2018,Improve Safety,Medium,12
2018,Improve Safety,Medium-High,7
2018,Increased Ridership,High,13
2018,Increased Ridership,Medium,10
2018,Increased Ridership,Medium-High,5
2018,Service Integration,High,5
2018,Service Integration,Medium,8
2018,Service Integration,Medium-High,15
2020,Improve Safety,High,4


##### GHG Reductions.

In [94]:
GHG_by_year = outcomes.groupby(["Award Year"]).agg(
    {"Estimated Tircp Ghg Reductions": "sum"}
)

In [95]:
GHG_by_year

Unnamed: 0_level_0,Estimated Tircp Ghg Reductions
Award Year,Unnamed: 1_level_1
2018,31944000.0
2020,5016000.0
2022,4332000.0


#### Save

In [96]:
"""
with pd.ExcelWriter(f"{GCS_FILE_PATH}calsta_draft.xlsx") as writer:
    outcomes.to_excel(writer, sheet_name="outcomes_unpivoted", index=True)
    outcomes_transformed.to_excel(writer, sheet_name="outcomes_transformed", index=True)
    projects.to_excel(writer, sheet_name="projects", index=True)
    year_summary.to_excel(writer, sheet_name="year_summary", index=True)
    GHG_by_year.to_excel(writer, sheet_name="GHG_reduction_year", index=True)
    """

'\nwith pd.ExcelWriter(f"{GCS_FILE_PATH}calsta_draft.xlsx") as writer:\n    outcomes.to_excel(writer, sheet_name="outcomes_unpivoted", index=True)\n    outcomes_transformed.to_excel(writer, sheet_name="outcomes_transformed", index=True)\n    projects.to_excel(writer, sheet_name="projects", index=True)\n    year_summary.to_excel(writer, sheet_name="year_summary", index=True)\n    GHG_by_year.to_excel(writer, sheet_name="GHG_reduction_year", index=True)\n    '