# ZEV purchased through LCTOP & TIRCP
* An agency here is a recipient of LCTOP/TIRCP grants.

In [59]:
import A1_data_prep
import A2_tableau
import A6_zev
import pandas as pd
from calitp import *

pd.options.display.max_columns = 100
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)
pd.options.display.float_format = "{:.2%}".format

## LCTOP Clean Up 
* Subset for only zero emission adjacent work.

In [60]:
# Read in sheet
df_lctop = pd.read_excel(
    "gs://calitp-analytics-data/data-analyses/lctop/LCTOP_cleaned.xlsx",
    sheet_name="cleaned",
)

In [61]:
# Only keep certain cols
lctop_cols_to_keep = [
    "funding_year",
    "project_sub_type_ii",
    "project_description__short_",
    "puc_99313_funds",
    "puc_99314_funds",
    "total_project_request_99314_+_99313",
    "total_lctop_funds",
    "total_cci_funds",
    "total_project_cost",
    "status",
    "lead_agency",
]

In [62]:
# Subset df
df_lctop2 = df_lctop[lctop_cols_to_keep]

### Strangely enough LCTOP funds column is not completely populated. 
* total_project_request_99314_+_99313 is the "total awarded" column used in the LCTOP dashboards Courtney Williams emailed me

In [63]:
(
    df_lctop2.groupby(["funding_year"]).agg(
        {
            "total_project_request_99314_+_99313": "sum",
            "total_lctop_funds": "sum",
            "total_cci_funds": "sum",
        }
    )
)

Unnamed: 0_level_0,total_project_request_99314_+_99313,total_lctop_funds,total_cci_funds
funding_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
14-15,2416559300.00%,0.00%,0.00%
15-16,7470076000.00%,0.00%,0.00%
16-17,3453910500.00%,8745515600.00%,13881832000.00%
17-18,9686456450.00%,15177754474.00%,26666039174.00%
18-19,14694940600.00%,21386709100.00%,30378383100.00%
19-20,14605435400.00%,26437474800.00%,31345197100.00%


In [64]:
# Preview project types to see which ones are ZEV adjacent.
df_lctop2["project_sub_type_ii"].value_counts()

New expanded/enhanced transit service                                 283
Free or reduced fares                                                 186
New transit related amenities                                         141
New zero-emission vehicles                                            123
Infrastructure to support zero-emission vehicle(s)                     34
New vehicles for new expanded/enhanced transit service                 29
Network/fare integration                                               24
Infrastructure to support new expanded/enhanced transit service        16
Renewable energy/fuel for transit service                               9
New Transit related amenities                                           3
Alternative transportation services                                     2
Vehicles upgrades to support new expanded/enhanced transit service      1
Name: project_sub_type_ii, dtype: int64

In [65]:
# df_lctop2.loc[df_lctop2['project_sub_type_ii'] == 'New expanded/enhanced transit service']

In [66]:
# Keep only zero emission adjacent project sub type ii
list_lctop_zev = [
    "New vehicles for new expanded/enhanced transit service",
    "New zero-emission vehicles",
]

In [67]:
# filter them all out at once.
df_lctop3 = df_lctop2[df_lctop2.project_sub_type_ii.isin(list_lctop_zev)]

In [68]:
# Compare lengths
len(df_lctop2), len(df_lctop3)

(851, 152)

In [69]:
# Make sure I filtered it correctly
df_lctop3["project_sub_type_ii"].unique()

array(['New vehicles for new expanded/enhanced transit service',
       'New zero-emission vehicles'], dtype=object)

In [70]:
# def extract_zev

In [71]:
# Change short description to lower case
df_lctop3["project_description__short_"] = df_lctop3[
    "project_description__short_"
].str.lower()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [72]:
# Replace numbers that are written out into integers
df_lctop3["project_description__short_"] = (
    df_lctop3["project_description__short_"]
    .str.replace("seven", "7")
    .str.replace("two", "2")
    .str.replace("eight", "8")
    .str.replace("five", "5")
    .str.replace("fifteen", "15")
    .str.replace("twenty", "20")
    .str.replace("three", "3")
    .str.replace("four", "4")
    .str.replace("eleven", "11")
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [73]:
# Extract numbers from project desc into a new column, cast as float, fill in zeroes
df_lctop3["number_of_zev"] = (
    df_lctop3.project_description__short_.str.extract("(\d+)")
    .astype("float64")
    .fillna(0)
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


## TIRCP Clean Up

In [74]:
df_tircp = to_snakecase(A2_tableau.tableau_dashboard())



In [75]:
# Subset for only cols of interest
tircp_columns = [
    "award_year",
    "grant_recipient",
    "title",
    "description",
    "tircp",
    "expended_amount",
    "expended_percent",
    "progress",
]

In [76]:
df_tircp2 = df_tircp[tircp_columns]

In [77]:
# Lower project description to search for ZEV projects
df_tircp2["description"] = df_tircp2["description"].str.lower()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [78]:
# Extract ZEV adjacent keywords
df_tircp2["zev_yes_no"] = (
    df_tircp2["description"]
    .str.extract(
        "(electric|cng|zero|emission|zero-emission|battery|hydrogen|hydrogen fuel cell|cell)",
        expand=False,
    )
    .fillna("Not ZEV")
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [79]:
# Extract ZEV only projects
df_tircp_zev = df_tircp2.loc[df_tircp2["zev_yes_no"] != "Not ZEV"]

In [80]:
len(df_tircp_zev)

45

In [81]:
# Extract numbers out of description
df_tircp_zev["description"] = (
    df_tircp_zev["description"]
    .str.replace("seven", "7")
    .str.replace("two", "2")
    .str.replace("eight", "8")
    .str.replace("five", "5")
    .str.replace("fifteen", "15")
    .str.replace("twenty", "20")
    .str.replace("three", "3")
    .str.replace("four", "4")
    .str.replace("eleven", "11")
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [82]:
# Extract numbers from project desc
df_tircp_zev["number_of_zev"] = (
    df_tircp_zev.description.str.extract("(\d+)").astype("float64").fillna(0)
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [83]:
df_tircp_zev = df_tircp_zev.drop(columns=["zev_yes_no"])

## Manual Clean Up
* Although I extracted integers from the description columns to find the number of ZEV purchased, some of the values were not populated/correct.
* I had to manually eyeball through each description and populate the ZEV purchased column.
* [Results are here.](https://docs.google.com/spreadsheets/d/1pmSPrP7A4H1RChW7zknubqmzgodWxc62jPMYgBN3rhA/edit#gid=0)

In [84]:
# Export LCTOP and TIRCP to fill in the # of zev buses manually for any rows that didn't pick up the number
"""
with pd.ExcelWriter(
    "gs://calitp-analytics-data/data-analyses/tircp/LCTOP_ZEV.xlsx"
) as writer:
    df_lctop3.to_excel(writer, sheet_name="lctop", index=True)
    df_tircp_zev.to_excel(writer, sheet_name="tircp", index=True)
    """

'\nwith pd.ExcelWriter(\n    "gs://calitp-analytics-data/data-analyses/tircp/LCTOP_ZEV.xlsx"\n) as writer:\n    df_lctop3.to_excel(writer, sheet_name="lctop", index=True)\n    df_tircp_zev.to_excel(writer, sheet_name="tircp", index=True)\n    '

## Analysis

In [85]:
# Open up Google Sheets workbook that I manually filled in
# Sheets I want
sheets_list = ["lctop", "tircp"]

# Open the workbook in a dictionary
dict_df1 = pd.read_excel(
    "gs://calitp-analytics-data/data-analyses/tircp/zev_manual.xlsx",
    sheet_name=sheets_list,
)

In [86]:
# Grab each sheet
lctop_clean = to_snakecase(dict_df1.get("lctop"))
tircp_clean = to_snakecase(dict_df1.get("tircp"))

### TIRCP

In [87]:
tircp_sum = ["tircp", "number_of_zev"]
tircp_count = ["title"]
tircp_group = ["award_year"]
tircp_mon = ["tircp"]

In [88]:
# Create summary table for all ZEV projects
tircp_summary = A6_zev.zev_summary(
    tircp_clean, df_tircp, tircp_group, tircp_sum, tircp_count, tircp_mon
)



In [89]:
# Calculate out projects that are ZEV vs total projects
tircp_summary["Percent of Projects that are ZEV Adjacent"] = (
    tircp_summary["Title X"] / tircp_summary["Title Y"]
)

In [90]:
# Clean columns
tircp_summary = tircp_summary.rename(
    columns={
        "Title X": "Total ZEV Projects",
        "Number Of Zev": "Total ZEV",
        "Title Y": "Total Projects in Cycle",
        "Tircp": "Total Amount Awarded",
    }
)

In [91]:
# Rename Award Year to Cycles
tircp_summary["Award Year"] = tircp_summary["Award Year"].replace(
    {
        2015: "Cycle 1",
        2016: "Cycle 2",
        2018: "Cycle 3",
        2020: "Cycle 4",
        2022: "Cycle 5",
    }
)

In [92]:
# Edit so Grand total row is obvious 
tircp_summary["Award Year"].loc[5] = "Grand Total"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [93]:
# Manually calclate out total percent of ZEV projects
tircp_summary["Percent of Projects that are ZEV Adjacent"].loc[5] = (
    tircp_summary["Total ZEV Projects"].sum()
    / tircp_summary["Total Projects in Cycle"].sum()
)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [94]:
tircp_summary

Unnamed: 0,Award Year,Total Amount Awarded,Total ZEV,Total ZEV Projects,Total Projects in Cycle,Percent of Projects that are ZEV Adjacent
0,Cycle 1,$74745000.0,54,4,14,28.57%
1,Cycle 2,$41930000.0,2,3,14,21.43%
2,Cycle 3,$386220000.0,209,13,28,46.43%
3,Cycle 4,$247425000.0,39,9,17,52.94%
4,Cycle 5,$439762000.0,450,15,23,65.22%
5,Grand Total,$1190082000.0,754,44,96,45.83%


In [95]:
A6_zev.basic_bar_chart(
    tircp_summary.iloc[:-1],
    "Award Year",
    "Total ZEV Projects",
    "Award Year",
    "Total ZEV Projects by FY (TIRCP)",
)

In [96]:
A6_zev.basic_bar_chart(
    tircp_summary.iloc[:-1],
    "Award Year",
    "Total ZEV",
    "Award Year",
    "Total ZEV to be/already Purchased by FY (TIRCP)",
)

#### How many  ZEV already purchased/in the pipeline?

In [97]:
# Find out how many buses already purchased by looking at only projects with 100% of allocated amount spent.
tircp_zev_done = tircp_clean.loc[
    tircp_clean["progress"] == "100% of allocated funds spent"
]
f"Approximately {tircp_zev_done.number_of_zev.sum()} ZEV already purchased and ${tircp_zev_done.tircp.sum()} spent"

'Approximately 91 ZEV already purchased and $108711000 spent'

In [98]:
# Look at projects that are done - says 0 when number of buses is unknown.
tircp_zev_done[["award_year", "grant_recipient", "number_of_zev", "tircp"]].sort_values(
    "number_of_zev", ascending=False
)

Unnamed: 0,award_year,grant_recipient,number_of_zev,tircp
8,2018,Anaheim Transportation Network,40,28617000
0,2015,Antelope Valley Transit Authority,29,24403000
3,2015,San Joaquin Regional Transit District,12,6841000
1,2015,Orange County Transportation Authority,5,2320000
17,2018,Santa Barbara County Association Of Governments,5,9600000
4,2016,Antelope Valley Transit Authority,0,8930000
6,2016,Orange County Transportation Authority,0,28000000


#### How many  ZEV in the pipeline?

In [99]:
tircp_zev_pipeline = tircp_clean.loc[
    tircp_clean["progress"] != "100% of allocated funds spent"
]
f"Approximately {tircp_zev_pipeline.number_of_zev.sum()} ZEV in the pipeline to be purchased and ${tircp_zev_pipeline.tircp.sum()} in projected funds."

'Approximately 663 ZEV in the pipeline to be purchased and $1081371000 in projected funds.'

In [100]:
tircp_zev_pipeline.groupby(["grant_recipient", "award_year"]).agg(
    {"number_of_zev": "sum", "tircp": "sum"}
).reset_index().sort_values("number_of_zev", ascending=False)

Unnamed: 0,grant_recipient,award_year,number_of_zev,tircp
20,Los Angeles County Metropolitan Transportation,2022,261,177500000
9,City Of Los Angeles,2018,112,36104000
1,Anaheim Transportation Network,2022,42,22778000
33,Sonoma County Transportation Authority,2022,30,24825000
36,Tulare County Regional Transit Agency,2022,30,33769000
7,City Of Glendale And Arroyo Verdugo Communities,2022,27,34648000
23,San Diego Metropolitan Transit System,2018,15,40098000
32,Solano Transportation Authority,2018,13,10788000
5,City Of Cupertino,2022,12,8465000
3,Antelope Valley Transit Authority,2020,11,6503000


### LCTOP

In [101]:
lctop_count = ["project_sub_type_ii"]
lctop_group = ["funding_year"]
lctop_sum = [
    "total_project_request_99314_+_99313",
    "total_lctop_funds",
    "number_of_zev",
]
lctop_monetary = [
    "total_project_request_99314_+_99313",
    "total_lctop_funds",
]

In [102]:
# Create summary table for all ZEV projects
lctop_summary = A6_zev.zev_summary(
    lctop_clean, df_lctop2, lctop_group, lctop_sum, lctop_count, lctop_monetary
)



In [103]:
# Calculate out projects that are ZEV vs total projects
lctop_summary["Percent of Projects that are ZEV Adjacent"] = (
    lctop_summary["Sub Type Ii X"] / lctop_summary["Sub Type Ii Y"]
)

In [104]:
# Clean columns
lctop_summary = lctop_summary.rename(
    columns={
        "Number Of Zev": "Total ZEV",
        "Sub Type Ii X": "Total ZEV Projects",
        "Sub Type Ii Y": "Total Projects in Cycle",
        "Total  Request 99314 + 99313": "Total Amount Awarded",
    }
)

In [105]:
# Change certain values from float to int
lctop_summary[
    ["Total ZEV", "Total ZEV Projects", "Total Projects in Cycle"]
] = lctop_summary[
    ["Total ZEV", "Total ZEV Projects", "Total Projects in Cycle"]
].astype(
    "int64"
)

In [106]:
# Make the grand total row more obvious
lctop_summary["Funding Year"].loc[6] = "Grand Total"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [107]:
# Manually calc out total ZEV projects
lctop_summary['Percent of Projects that are ZEV Adjacent'].loc[6] = (lctop_summary['Total ZEV Projects'].sum()/lctop_summary['Total Projects in Cycle'].sum())

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [108]:
lctop_summary

Unnamed: 0,Funding Year,Total Amount Awarded,Total Lctop Funds,Total ZEV,Total ZEV Projects,Total Projects in Cycle,Percent of Projects that are ZEV Adjacent
0,14-15,$557747.0,$0.0,0,6,95,6.32%
1,15-16,$20338361.0,$0.0,73,20,132,15.15%
2,16-17,$8252467.0,$48464014.0,144,17,126,13.49%
3,17-18,$25577902.0,$68958118.0,166,33,152,21.71%
4,18-19,$38261454.0,$85768916.0,179,36,180,20.00%
5,19-20,$26236482.0,$105699301.0,148,40,166,24.10%
6,Grand Total,$119224412.0,$308890349.0,710,152,851,17.86%


In [109]:
A6_zev.basic_bar_chart(
    lctop_summary.iloc[:-1],
    "Funding Year",
    "Total ZEV",
    "Funding Year",
    "Total ZEV to be/already Purchased by FY (LCTOP)",
)

In [110]:
A6_zev.basic_bar_chart(
    lctop_summary.iloc[:-1],
    "Funding Year",
    "Total ZEV Projects",
    "Funding Year",
    "Total ZEV Projects by FY (LCTOP)",
)

#### ZEV in the pipeline
* Status column as shown below has only one "closed", 50 null values, and 101 open.

In [111]:
lctop_clean["status"].value_counts()

open      101
None       50
closed      1
Name: status, dtype: int64

In [112]:
lctop_done = lctop_clean.loc[lctop_clean["status"] == "closed"]
f"Approximately {lctop_done.number_of_zev.sum()} ZEV already purchased and ${lctop_done['total_project_request_99314_+_99313'].sum()} spent"

'Approximately 2 ZEV already purchased and $244602.0 spent'

In [113]:
lctop_pipeline = lctop_clean.loc[lctop_clean["status"] != "closed"]
f"Approximately {lctop_clean.number_of_zev.sum()} ZEV to be purchased and ${lctop_clean['total_project_request_99314_+_99313'].sum()} in projected funds"

'Approximately 710 ZEV to be purchased and $119224412.5 in projected funds'

In [114]:
lctop_clean.groupby(["funding_year", "lead_agency"]).agg(
    {"number_of_zev": "sum"}
).reset_index().sort_values("number_of_zev", ascending=False)

Unnamed: 0,funding_year,lead_agency,number_of_zev
81,18-19,Golden Gate Bridge Highway and Transportation District,65
50,17-18,Golden Gate Bridge Highway and Transportation District,64
31,16-17,Golden Gate Bridge Highway and Transportation District,64
41,16-17,Sonoma County Transit,25
134,19-20,San Diego Metropolitan Transit System,25
24,15-16,Sonoma County Transit,24
99,18-19,Santa Clara Valley Transportation Authority,20
60,17-18,San Diego Metropolitan Transit System,16
137,19-20,Santa Clara Valley Transportation Authority,15
129,19-20,North County Transit District,14
