In [1]:
import A1_data_prep
import A2_tableau
import pandas as pd
from calitp import *

pd.options.display.max_columns = 100
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)
pd.options.display.float_format = "{:.2f}".format



## LCTOP

In [2]:
# Read in sheet
df_lctop = pd.read_excel(
    "gs://calitp-analytics-data/data-analyses/lctop/LCTOP_cleaned.xlsx",
    sheet_name="cleaned",
)

In [3]:
# Only keep certain cols
lctop_cols_to_keep = [
    "funding_year",
    "project_sub_type_ii",
    "project_description__short_",
    "puc_99313_funds",
    "puc_99314_funds",
    "total_project_request_99314_+_99313",
    "total_lctop_funds",
    "total_cci_funds",
    "total_project_cost",
    "status",
    "lead_agency",
]

In [4]:
# Subset df
df_lctop2 = df_lctop[lctop_cols_to_keep]

In [5]:
# Keep only zero emission adjacent project sub type ii
list_lctop_zev = [
    "New vehicles for new expanded/enhanced transit service",
    "New zero-emission vehicles",
]

In [6]:
# filter them all out at once.
df_lctop3 = df_lctop2[df_lctop2.project_sub_type_ii.isin(list_lctop_zev)]

In [7]:
# Compare lengths
len(df_lctop3), len(df_lctop2)

(152, 851)

In [8]:
# Make sure I filtered it correctly
df_lctop3["project_sub_type_ii"].unique()

array(['New vehicles for new expanded/enhanced transit service',
       'New zero-emission vehicles'], dtype=object)

In [9]:
# Change short description to lower case
df_lctop3["project_description__short_"] = df_lctop3[
    "project_description__short_"
].str.lower()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [10]:
# Replace numbers
df_lctop3["project_description__short_"] = (
    df_lctop3["project_description__short_"]
    .str.replace("seven", "7")
    .str.replace("two", "2")
    .str.replace("eight", "8")
    .str.replace("five", "5")
    .str.replace("fifteen", "15")
    .str.replace("twenty", "20")
    .str.replace("three", "3")
    .str.replace("four", "4")
    .str.replace("eleven", "11")
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [11]:
# Extract numbers from project desc
df_lctop3["number_of_zev"] = (
    df_lctop3.project_description__short_.str.extract("(\d+)")
    .astype("float64")
    .fillna(0)
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [12]:
# df = to_snakecase(pd.read_csv(url))

## TIRCP

In [13]:
df_tircp = to_snakecase(A2_tableau.tableau_dashboard())



In [14]:
# Subset for only cols of interest
tircp_columns = [
    "award_year",
    "grant_recipient",
    "title",
    "description",
    "tircp",
    "expended_amount",
    "expended_percent",
    "progress",
]

In [15]:
df_tircp2 = df_tircp[tircp_columns]

In [16]:
# Lower project description to search for ZEV projects
df_tircp2["description"] = df_tircp2["description"].str.lower()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [17]:
df_tircp2["zev_yes_no"] = (
    df_tircp2["description"]
    .str.extract(
        "(electric|cng|zero|emission|zero-emission|battery|hydrogen|hydrogen fuel cell)",
        expand=False,
    )
    .fillna("Not ZEV")
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [18]:
# Extract ZEV only projects
df_tircp_zev = df_tircp2.loc[df_tircp2["zev_yes_no"] != "Not ZEV"]

In [19]:
len(df_tircp_zev)

45

In [20]:
# Extract numbers out of description
df_tircp_zev["description"] = (
    df_tircp_zev["description"]
    .str.replace("seven", "7")
    .str.replace("two", "2")
    .str.replace("eight", "8")
    .str.replace("five", "5")
    .str.replace("fifteen", "15")
    .str.replace("twenty", "20")
    .str.replace("three", "3")
    .str.replace("four", "4")
    .str.replace("eleven", "11")
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [21]:
# Extract numbers from project desc
df_tircp_zev["number_of_zev"] = (
    df_tircp_zev.description.str.extract("(\d+)").astype("float64").fillna(0)
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [22]:
df_tircp_zev = df_tircp_zev.drop(columns=["zev_yes_no"])

In [23]:
# Export LCTOP and TIRCP to fill in the # of zev buses manually for any rows that didn't pick up the number
"""
with pd.ExcelWriter(
    "gs://calitp-analytics-data/data-analyses/tircp/LCTOP_ZEV.xlsx"
) as writer:
    df_lctop3.to_excel(writer, sheet_name="lctop", index=True)
    df_tircp_zev.to_excel(writer, sheet_name="tircp", index=True)
    """

'\nwith pd.ExcelWriter(\n    "gs://calitp-analytics-data/data-analyses/tircp/LCTOP_ZEV.xlsx"\n) as writer:\n    df_lctop3.to_excel(writer, sheet_name="lctop", index=True)\n    df_tircp_zev.to_excel(writer, sheet_name="tircp", index=True)\n    '

## Analysis

In [24]:
# Open up a bunch of sheets all in the same workbook
# Sheets I want
sheets_list = ["lctop", "tircp"]

# Open the workbook in a dictionary
dict_df1 = pd.read_excel(
    "gs://calitp-analytics-data/data-analyses/tircp/zev_manual.xlsx",
    sheet_name=sheets_list,
)

In [25]:
# Grab each sheet
lctop_clean = to_snakecase(dict_df1.get("lctop"))
tircp_clean = to_snakecase(dict_df1.get("tircp"))

### TIRCP

In [26]:
tircp_sum = ["tircp", "number_of_zev"]
tircp_count = ["title"]
tircp_group = ["award_year"]
tircp_mon = ["tircp"]

In [36]:
def zev_summary(
    df_zev,
    df_all_projects,
    group_by_cols: list,
    sum_cols: list,
    count_cols: list,
    monetary_cols: list,
):
    # Group by
    zev_summary = df_zev.groupby(group_by_cols).agg(
        {**{e: "sum" for e in sum_cols}, **{e: "count" for e in count_cols}}
    )

    zev_summary = zev_summary.reset_index()

    # Aggregate ALL projects in grant program
    all_projects = (
        df_all_projects.groupby(group_by_cols)
        .agg({**{e: "count" for e in count_cols}})
        .reset_index()
    )

    # Merge the summaries together to calculate % of zev projects out of total projects
    m1 = pd.merge(zev_summary, all_projects, how="inner", on=group_by_cols)

    # Get grand totals
    m1 = m1.append(m1.sum(numeric_only=True), ignore_index=True)

    # Format to currency
    m1 = A1_data_prep.currency_format(m1, monetary_cols)

    # Clean cols
    m1 = A1_data_prep.clean_up_columns(m1)

    return m1

In [37]:
tircp_summary = zev_summary(tircp_clean, df_tircp, tircp_group, tircp_sum, tircp_count,tircp_mon)



In [38]:
tircp_summary["Percent of Projects that are ZEV Adjacent"] = (
    tircp_summary["Title X"] / tircp_summary["Title Y"]
)

In [39]:
tircp_summary = tircp_summary.drop(columns="Title Y").rename(
    columns={"Title X": "Total ZEV Projects", "Number Of Zev": "Total ZEV"}
)

In [40]:
tircp_summary

Unnamed: 0,Award Year,Tircp,Total ZEV,Total ZEV Projects,Percent of Projects that are ZEV Adjacent
0,2015,$74745000.0,54,4,0.29
1,2016,$41930000.0,2,3,0.21
2,2018,$386220000.0,209,13,0.46
3,2020,$247425000.0,39,9,0.53
4,2022,$439762000.0,450,15,0.65
5,10091,$1190082000.0,754,44,0.46


### LCTOP

In [41]:
lctop_count = ["project_sub_type_ii"]
lctop_group = ["funding_year"]
lctop_sum = [
    "puc_99313_funds",
    "puc_99314_funds",
    "total_project_request_99314_+_99313",
    "total_cci_funds",
    "total_lctop_funds",
    "total_project_cost",
    "number_of_zev",
]
lctop_monetary = [
    "puc_99313_funds",
    "puc_99314_funds",
    "total_project_request_99314_+_99313",
    "total_cci_funds",
    "total_lctop_funds",
    "total_project_cost",
]

In [42]:
lctop_summary = zev_summary(lctop_clean, df_lctop2, lctop_group, lctop_sum, lctop_count, lctop_monetary)



In [43]:
lctop_summary["Percent of Projects that are ZEV Adjacent"] = (
    lctop_summary["Sub Type Ii X"] / lctop_summary["Sub Type Ii Y"]
)

In [44]:
lctop_summary = lctop_summary.drop(columns=["Sub Type Ii Y"]).rename(
    columns={"Number Of Zev": "Total ZEV", "Sub Type Ii X": "Total ZEV Projects"}
)

In [45]:
# lctop_summary = A1_data_prep.currency_format(lctop_summary, lctop_monetary)

In [46]:
lctop_summary

Unnamed: 0,Funding Year,Puc 99313 Funds,Puc 99314 Funds,Total Request 99314 + 99313,Total Cci Funds,Total Lctop Funds,Total Cost,Total ZEV,Total ZEV Projects,Percent of Projects that are ZEV Adjacent
0,14-15,$471384.0,$86363.0,$557747.0,$0.0,$0.0,$0.0,0.0,6.0,0.06
1,15-16,$6369789.0,$13968572.0,$20338361.0,$0.0,$0.0,$127797962.0,73.0,20.0,0.15
2,16-17,$2356132.0,$5896335.0,$8252467.0,$96361014.0,$48464014.0,$2134159974.0,144.0,17.0,0.13
3,17-18,$9035292.0,$16542610.0,$25577902.0,$135629460.0,$68958118.0,$2251368555.0,166.0,33.0,0.22
4,18-19,$15791003.0,$22470451.0,$38261454.0,$125282499.0,$85768916.0,$2224989656.0,179.0,36.0,0.2
5,19-20,$17974901.0,$8261581.0,$26236482.0,$123509258.0,$105699301.0,$199976608.0,148.0,40.0,0.24
6,,$51998501.0,$67225912.0,$119224412.0,$480782231.0,$308890349.0,$6938292755.0,710.0,152.0,0.18


## Applicants

In [55]:
lctop_clean.groupby(['lead_agency']).agg({'number_of_zev':'sum'}).reset_index().sort_values('number_of_zev', ascending = False).head(10)

Unnamed: 0,lead_agency,number_of_zev
22,Golden Gate Bridge Highway and Transportation District,197
44,San Diego Metropolitan Transit System,68
50,Santa Clara Valley Transportation Authority,57
55,Sonoma County Transit,53
36,North County Transit District,28
48,San Mateo County Transit District,19
66,Yolo County Transportation District,16
34,Napa Valley Transportation Authority,14
26,Livermore Amador Valley Transit Authority,14
18,Fresno County Rural Transit,14


In [87]:
tircp_zev_done = (tircp_clean.loc[tircp_clean['progress'] == '100% of allocated funds spent'])
tircp_zev_done[['award_year','grant_recipient','number_of_zev','tircp']]

Unnamed: 0,award_year,grant_recipient,number_of_zev,tircp
0,2015,Antelope Valley Transit Authority,29,24403000
1,2015,Orange County Transportation Authority,5,2320000
3,2015,San Joaquin Regional Transit District,12,6841000
4,2016,Antelope Valley Transit Authority,0,8930000
6,2016,Orange County Transportation Authority,0,28000000
8,2018,Anaheim Transportation Network,40,28617000
17,2018,Santa Barbara County Association Of Governments,5,9600000


In [88]:
f'{tircp_zev_done.number_of_zev.sum()} ZEV already purchased and {tircp_zev_done.tircp.sum()} spent'

'91 ZEV already purchased and 108711000 spent'

In [95]:
tircp_clean['progress'].value_counts()

No expenditures recorded         26
100% of allocated funds spent     7
Behind                            5
On Track                          3
Ahead                             3
Name: progress, dtype: int64

In [96]:
tircp_zev_pipeline =  tircp_clean[tircp_clean["progress"].isin(["No expenditures recorded", "Behind"])]

In [97]:
tircp_zev_pipeline.groupby(['grant_recipient', 'award_year']).agg({'number_of_zev':'sum','tircp':'sum'})

Unnamed: 0_level_0,Unnamed: 1_level_0,number_of_zev,tircp
grant_recipient,award_year,Unnamed: 2_level_1,Unnamed: 3_level_1
Alameda Contra Costa Transit District,2018,0,14000000
Anaheim Transportation Network,2022,42,22778000
Antelope Valley Transit Authority,2020,11,6503000
Antelope Valley Transit Authority,2022,6,4829000
City Of Cupertino,2022,12,8465000
City Of Fresno,2018,6,7798000
City Of Glendale And Arroyo Verdugo Communities,2022,27,34648000
City Of Los Angeles,2018,112,36104000
City Of Santa Monica,2018,10,3050000
City Of Torrance,2022,10,9600000


In [98]:
f'{tircp_zev_pipeline.number_of_zev.sum()} ZEV in the pipeline to be purchased and {tircp_zev_pipeline.tircp.sum()} left to spend.'

'641 ZEV in the pipeline to be purchased and 754218000 left to spend.'