# ZEV purchased through LCTOP & TIRCP
* An agency here is a recipient of LCTOP/TIRCP grants.

In [None]:
import A1_data_prep
import A2_tableau
import A6_zev
import pandas as pd
from babel.numbers import format_currency
from calitp import *

pd.options.display.max_columns = 100
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)
pd.options.display.float_format = "{:.2%}".format

## LCTOP Clean Up 
* Subset for only zero emission adjacent work.

In [None]:
# Read in sheet
df_lctop = pd.read_excel(
    "gs://calitp-analytics-data/data-analyses/lctop/LCTOP_cleaned.xlsx",
    sheet_name="cleaned",
)

In [None]:
# Only keep certain cols
lctop_cols_to_keep = [
    "funding_year",
    "project_sub_type_ii",
    "project_description__short_",
    "puc_99313_funds",
    "puc_99314_funds",
    "total_project_request_99314_+_99313",
    "total_lctop_funds",
    "total_cci_funds",
    "total_project_cost",
    "status",
    "lead_agency",
]

In [None]:
# Subset df
df_lctop2 = df_lctop[lctop_cols_to_keep]

### Strangely enough LCTOP funds column is not completely populated. 
* total_project_request_99314_+_99313 is the "total awarded" column used in the LCTOP dashboards Courtney Williams emailed me

In [None]:
(
    df_lctop2.groupby(["funding_year"]).agg(
        {
            "total_project_request_99314_+_99313": "sum",
            "total_lctop_funds": "sum",
            "total_cci_funds": "sum",
        }
    )
)

In [None]:
# Preview project types to see which ones are ZEV adjacent.
df_lctop2["project_sub_type_ii"].value_counts()

In [None]:
# df_lctop2.loc[df_lctop2['project_sub_type_ii'] == 'New expanded/enhanced transit service']

In [None]:
# Keep only zero emission adjacent project sub type ii
list_lctop_zev = [
    "New vehicles for new expanded/enhanced transit service",
    "New zero-emission vehicles",
]

In [None]:
# filter them all out at once.
df_lctop3 = df_lctop2[df_lctop2.project_sub_type_ii.isin(list_lctop_zev)]

In [None]:
# Compare lengths
len(df_lctop2), len(df_lctop3)

In [None]:
# Make sure I filtered it correctly
df_lctop3["project_sub_type_ii"].unique()

In [None]:
# def extract_zev

In [None]:
# Change short description to lower case
df_lctop3["project_description__short_"] = df_lctop3[
    "project_description__short_"
].str.lower()

In [None]:
# Replace numbers that are written out into integers
df_lctop3["project_description__short_"] = (
    df_lctop3["project_description__short_"]
    .str.replace("seven", "7")
    .str.replace("two", "2")
    .str.replace("eight", "8")
    .str.replace("five", "5")
    .str.replace("fifteen", "15")
    .str.replace("twenty", "20")
    .str.replace("three", "3")
    .str.replace("four", "4")
    .str.replace("eleven", "11")
)

In [None]:
# Extract numbers from project desc into a new column, cast as float, fill in zeroes
df_lctop3["number_of_zev"] = (
    df_lctop3.project_description__short_.str.extract("(\d+)")
    .astype("float64")
    .fillna(0)
)

## TIRCP Clean Up

In [None]:
df_tircp = to_snakecase(A2_tableau.tableau_dashboard())

In [None]:
# Subset for only cols of interest
tircp_columns = [
    "award_year",
    "grant_recipient",
    "title",
    "description",
    "tircp",
    "expended_amount",
    "expended_percent",
    "progress",
]

In [None]:
df_tircp2 = df_tircp[tircp_columns]

In [None]:
# Lower project description to search for ZEV projects
df_tircp2["description"] = df_tircp2["description"].str.lower()

In [None]:
# Extract ZEV adjacent keywords
df_tircp2["zev_yes_no"] = (
    df_tircp2["description"]
    .str.extract(
        "(electric|cng|zero|emission|zero-emission|battery|hydrogen|hydrogen fuel cell|cell)",
        expand=False,
    )
    .fillna("Not ZEV")
)

In [None]:
# Extract ZEV only projects
df_tircp_zev = df_tircp2.loc[df_tircp2["zev_yes_no"] != "Not ZEV"]

In [None]:
len(df_tircp_zev)

In [None]:
# Extract numbers out of description
df_tircp_zev["description"] = (
    df_tircp_zev["description"]
    .str.replace("seven", "7")
    .str.replace("two", "2")
    .str.replace("eight", "8")
    .str.replace("five", "5")
    .str.replace("fifteen", "15")
    .str.replace("twenty", "20")
    .str.replace("three", "3")
    .str.replace("four", "4")
    .str.replace("eleven", "11")
)

In [None]:
# Extract numbers from project desc
df_tircp_zev["number_of_zev"] = (
    df_tircp_zev.description.str.extract("(\d+)").astype("float64").fillna(0)
)

In [None]:
df_tircp_zev = df_tircp_zev.drop(columns=["zev_yes_no"])

## Manual Clean Up
* Although I extracted integers from the description columns to find the number of ZEV purchased, some of the values were not populated/correct.

In [None]:
# Export LCTOP and TIRCP to fill in the # of zev buses manually for any rows that didn't pick up the number
"""
with pd.ExcelWriter(
    "gs://calitp-analytics-data/data-analyses/tircp/LCTOP_ZEV.xlsx"
) as writer:
    df_lctop3.to_excel(writer, sheet_name="lctop", index=True)
    df_tircp_zev.to_excel(writer, sheet_name="tircp", index=True)
    """

## Analysis

In [None]:
# Open up workbook that I manually filled in
# Sheets I want
sheets_list = ["lctop", "tircp"]

# Open the workbook in a dictionary
dict_df1 = pd.read_excel(
    "gs://calitp-analytics-data/data-analyses/tircp/LCTOP_TIRCP_ZEV_manual.xlsx",
    sheet_name=sheets_list,
)

In [None]:
# Grab each sheet
lctop_clean = to_snakecase(dict_df1.get("lctop"))
tircp_clean = to_snakecase(dict_df1.get("tircp"))

### TIRCP

In [None]:
tircp_sum = ["tircp", "number_of_zev"]
tircp_count = ["title"]
tircp_group = ["award_year"]
tircp_mon = ["tircp"]

In [None]:
# Create summary table for all ZEV projects
tircp_summary = A6_zev.zev_summary(
    tircp_clean, df_tircp, tircp_group, tircp_sum, tircp_count, tircp_mon
)

In [None]:
# Calculate out projects that are ZEV vs total projects
tircp_summary["Percent of Projects that are ZEV Adjacent"] = (
    tircp_summary["Title X"] / tircp_summary["Title Y"]
)

In [None]:
# Clean columns
tircp_summary = tircp_summary.rename(
    columns={
        "Title X": "Total ZEV Projects",
        "Number Of Zev": "Total ZEV",
        "Title Y": "Total Projects in Cycle",
        "Tircp": "Total Amount Awarded",
    }
)

In [None]:
# Rename Award Year to Cycles
tircp_summary["Award Year"] = tircp_summary["Award Year"].replace(
    {
        2015: "Cycle 1",
        2016: "Cycle 2",
        2018: "Cycle 3",
        2020: "Cycle 4",
        2022: "Cycle 5",
    }
)

In [None]:
# Edit so Grand total row is obvious
tircp_summary["Award Year"].loc[5] = "Grand Total"

In [None]:
# Manually calclate out total percent of ZEV projects
tircp_summary["Percent of Projects that are ZEV Adjacent"].loc[5] = (
    tircp_summary["Total ZEV Projects"].sum()
    / tircp_summary["Total Projects in Cycle"].sum()
)

In [None]:
tircp_summary

In [None]:
A6_zev.basic_bar_chart(
    tircp_summary.iloc[:-1],
    "Award Year",
    "Total ZEV Projects",
    "Award Year",
    "Total ZEV Projects by FY (TIRCP)",
)

In [None]:
A6_zev.basic_bar_chart(
    tircp_summary.iloc[:-1],
    "Award Year",
    "Total ZEV",
    "Award Year",
    "Total ZEV to be or already Purchased by FY (TIRCP)",
)

#### How many  ZEV already purchased/in the pipeline?

In [None]:
# Find out how many buses already purchased by looking at only projects with 100% of allocated amount spent.
tircp_zev_done = tircp_clean.loc[
    tircp_clean["progress"] == "100% of allocated funds spent"
]
f"Approximately {tircp_zev_done.number_of_zev.sum()} ZEV already purchased and ${tircp_zev_done.tircp.sum()} spent"

In [None]:
# Look at projects that are done - says 0 when number of buses is unknown.
tircp_zev_done = tircp_zev_done[["award_year", "grant_recipient", "number_of_zev", "tircp"]].sort_values(
    "number_of_zev", ascending=False
)

tircp_zev_done['tircp'] = tircp_zev_done['tircp'].apply(lambda x: format_currency(x, currency="USD", locale="en_US"))

In [None]:
tircp_zev_done

#### How many  ZEV in the pipeline?

In [None]:
tircp_zev_pipeline = tircp_clean.loc[
    tircp_clean["progress"] != "100% of allocated funds spent"
]
f"Approximately {tircp_zev_pipeline.number_of_zev.sum()} ZEV in the pipeline to be purchased and ${tircp_zev_pipeline.tircp.sum()} in projected funds."

In [None]:
tircp_zev_pipeline = tircp_zev_pipeline.groupby(["grant_recipient", "award_year"]).agg(
    {"number_of_zev": "sum", "tircp": "sum"}
).reset_index().sort_values("number_of_zev", ascending=False)

In [None]:
tircp_zev_pipeline['tircp'] = tircp_zev_pipeline['tircp'].apply(lambda x: format_currency(x, currency="USD", locale="en_US"))

In [None]:
tircp_zev_pipeline

### LCTOP

In [None]:
lctop_count = ["project_sub_type_ii"]
lctop_group = ["funding_year"]
lctop_sum = [
    "total_project_request_99314_+_99313",
    "total_lctop_funds",
    "number_of_zev",
]
lctop_monetary = [
    "total_project_request_99314_+_99313",
    "total_lctop_funds",
]

In [None]:
# Create summary table for all ZEV projects
lctop_summary = A6_zev.zev_summary(
    lctop_clean, df_lctop2, lctop_group, lctop_sum, lctop_count, lctop_monetary
)

In [None]:
# Calculate out projects that are ZEV vs total projects
lctop_summary["Percent of Projects that are ZEV Adjacent"] = (
    lctop_summary["Sub Type Ii X"] / lctop_summary["Sub Type Ii Y"]
)

In [None]:
# Clean columns
lctop_summary = lctop_summary.rename(
    columns={
        "Number Of Zev": "Total ZEV",
        "Sub Type Ii X": "Total ZEV Projects",
        "Sub Type Ii Y": "Total Projects in Cycle",
        "Total  Request 99314 + 99313": "Total Amount Awarded",
    }
)

In [None]:
# Change certain values from float to int
lctop_summary[
    ["Total ZEV", "Total ZEV Projects", "Total Projects in Cycle"]
] = lctop_summary[
    ["Total ZEV", "Total ZEV Projects", "Total Projects in Cycle"]
].astype(
    "int64"
)

In [None]:
# Make the grand total row more obvious
lctop_summary["Funding Year"].loc[6] = "Grand Total"

In [None]:
# Manually calc out total ZEV projects
lctop_summary["Percent of Projects that are ZEV Adjacent"].loc[6] = (
    lctop_summary["Total ZEV Projects"].sum()
    / lctop_summary["Total Projects in Cycle"].sum()
)

In [None]:
# lctop_summary

In [None]:
A6_zev.basic_bar_chart(
    lctop_summary.iloc[:-1],
    "Funding Year",
    "Total ZEV",
    "Funding Year",
    "Total ZEV to be or already Purchased by FY (LCTOP)",
)

In [None]:
A6_zev.basic_bar_chart(
    lctop_summary.iloc[:-1],
    "Funding Year",
    "Total ZEV Projects",
    "Funding Year",
    "Total ZEV Projects by FY (LCTOP)",
)

#### ZEV in the pipeline
* Status column as shown below has only one "closed", 50 null values, and 101 open.

In [None]:
lctop_clean["status"].value_counts()

In [None]:
lctop_done = lctop_clean.loc[lctop_clean["status"] == "closed"]
f"Approximately {lctop_done.number_of_zev.sum()} ZEV already purchased and ${lctop_done['total_project_request_99314_+_99313'].sum()} spent"

In [None]:
lctop_pipeline = lctop_clean.loc[lctop_clean["status"] != "closed"]
f"Approximately {lctop_clean.number_of_zev.sum()} ZEV to be purchased and ${lctop_clean['total_project_request_99314_+_99313'].sum()} in projected funds"

#### All Applicants

In [None]:
lctop_clean["total_project_request_99314_+_99313"] = lctop_clean[
    "total_project_request_99314_+_99313"
].astype("int64")

In [None]:
lctop_applicants = (
    lctop_clean.groupby(["funding_year", "lead_agency"])
    .agg({"number_of_zev": "sum", "total_project_request_99314_+_99313": "sum"})
    .reset_index()
    .sort_values("number_of_zev", ascending=False)
)

In [None]:
lctop_applicants["total_project_request_99314_+_99313"] = lctop_applicants[
    "total_project_request_99314_+_99313"
].apply(lambda x: format_currency(x, currency="USD", locale="en_US"))

In [None]:
# lctop_applicants

## Export

In [None]:
"""
with pd.ExcelWriter(
    "gs://calitp-analytics-data/data-analyses/tircp/LCTOP_ZEV_final.xlsx"
) as writer:
    tircp_summary.to_excel(writer, sheet_name="tircp_summary", index=False)
    tircp_zev_done.to_excel(writer, sheet_name="tircp_finished_projects", index=False)
    tircp_zev_pipeline.to_excel(writer, sheet_name="tircp_inprogress", index=False)
    lctop_summary.to_excel(writer, sheet_name="lctop_summary", index=False)
    lctop_applicants.to_excel(writer, sheet_name="lctop_applicants", index=False)
    """