# SB125 Fund Split Analysis

## Question:
- How did RTAs split SB125 funds between operations and capital?

## Methodology:
- upload all avilable `SB125 fund request template` files to gcs
- examine all files for consistencies:
    - come with cleaning plan for inconsistent examples (files withot capital/operating columns)
- concat all rows across all files


## Notes:
- some RTPAs did not submit a `SB125 fund request template.xlsx` file, but instead included an quivilent file their allocation package

In [1]:
import pandas as pd
import os
import altair as alt

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

GCS_PATH = "gs://calitp-analytics-data/data-analyses/sb125/fund_split/"

In [2]:
file_list = [
    "sierra_fund_request.xlsx",
    "slocog_fund_request.xlsx",
    "tehema_fund_request.xlsx",
    "tuolumne_fund_request.xlsx",
    "ventura_fund_request.xlsx",
    "alpine_fund_request.xlsx",
    "amador_fund_request.xlsx",
    "butte_fund_request.xlsx",
    "calaveras_fund_request.xlsx",
    "del_norte_fund_request.xlsx",
    "el_dorado_fund_request.xlsx",
    "humboldt_fund_request.xlsx",
    "kern_fund_request.xlsx",
    "kings_fund_request.xlsx",
    "la_metro_fund_request.xlsx",
    "lake_fund_request.xlsx",
    "lassen_fund_request.xlsx",
    "madera_fund_request.xlsx",
    "mariposa_fund_request.xlsx",
    "mendocino_fund_request.xlsx",
    "merced_fund_request.xlsx",
    "mtc_fund_request.xlsx",
    "nevada_fund_request.xlsx",
    "orange_fund_request.xlsx",
    "placer_fund_request.xlsx",
    "plumas_fund_request.xlsx",
    "riverside_fund_request.xlsx",
    "san_benito_fund_request.xlsx",
    "san_diego_mts_fund_request.xlsx",
    "santa_cruz_fund_request.xlsx",
    "shasta_fund_request.xlsx",
]

file_list.sort()

In [3]:
def clean_fund_request(file:str) -> pd.DataFrame:
    """
    reads in the file from GCS, maps col_names list to df columns, drops all the blank rows.
    returns df.
    """
    col_names = [
    "rtpa",
    "implementing agenc-y/-ies",
    "project",
    "fund source",
    "capital_FY23-24",
    "capital_FY24-25",
    "capital_FY25-26",
    "capital_FY26-27",
    "operating_FY23-24",
    "operating_FY24-25",
    "operating_FY25-26",
    "operating_FY26-27",
    "total",
]
    
    df = pd.read_excel(f"{GCS_PATH}{file}", header=2, nrows=40, names=col_names).drop(columns="total")
    row_drop = df["rtpa"].isin(["Grand Total", "RTPA"])
    df = df.drop(df[row_drop].index)
    df = df.dropna(how= "all")
    df[["rtpa", "implementing agenc-y/-ies","project"]] = df[["rtpa", "implementing agenc-y/-ies","project"]].ffill()
    
    return df


first iteration of fund_request_checker func.
>def read_in(file:str) -> pd.DataFrame:
>    df = pd.read_excel(f"{GCS_PATH}{file}", nrows=40)
>    df = df.dropna(how= "all")
>   
>    if len(df.columns) == 13:
>        print(f"{file} can use clean_fund_request func.")
>    else: 
>        print(f"needs manual check, {file}")
       

old iteration
>def fund_request_checker(file_list:list) -> pd.DataFrame:
>
>    for file in file_list:
>    
>        df = pd.read_excel(f"{GCS_PATH}{file}", nrows=40)
>        df = df.dropna(how= "all")
>    
>        if len(df.columns) == 13:
>            print(f"{file} can use clean_fund_request func.")
>        else: 
>            print(f"needs manual check, {file}")
       

In [4]:
#fund_request_checker(file_list)

old iteration
>def fund_request_checker_v2(file_list:list):
>    gtg_files = []
>    manual_review = []
>    for file in file_list:
>    
>        df = pd.read_excel(f"{GCS_PATH}{file}", nrows=40)
>        df = df.dropna(how= "all")
>    
>        if len(df.columns) == 13:
>            gtg_files.append(f"{file}")
>        else: 
>            manual_review.append(f"{file}")
>    return display(
>        "good to go files",
>        list(gtg_files), 
>        "needs manual check", 
>        list(manual_review)
>    )

In [5]:
#fund_request_checker_v2(file_list)

# Cleaning the easy DFs

In [6]:
def fund_request_checker_v3(file_list:list) -> tuple:
    """takes in list of fund_request excel file name. reads in each file, checks if DF has 13 columns.
    if yes, appends do good-to-go list. else, appends to needs-manual-review.
    output is a tuple of the 2 list.
    assign 2 variables to use this func.
    """
    gtg_files = []
    manual_review = []
    for file in file_list:
    
        df = pd.read_excel(f"{GCS_PATH}{file}", nrows=40)
        df = df.dropna(how= "all")
    
        if len(df.columns) == 13:
            gtg_files.append(f"{file}")
        else: 
            manual_review.append(f"{file}")
    return gtg_files, manual_review

In [7]:
good_list, review_list = fund_request_checker_v3(file_list)

In [8]:
display(
    len(good_list),
    len(review_list)
)

24

7

In [9]:
def cleaner_loop(gtg_list:list) -> dict: 
    """
    takes in good-to-go list from fund_request_checker.
    applies the clean_fund_request function to each item on the list, then appends to dictionary.
    key is name of the file, value is the cleaned dataframe.
    output is dictionary. 
    """
    cleaned_df ={}

    for name in gtg_list:
        cleaned_df[name] = clean_fund_request(name)
    return cleaned_df

In [10]:
cleaned_fund_request = cleaner_loop(good_list)

In [11]:
display(
    type(cleaned_fund_request),
    len(cleaned_fund_request),
    list(cleaned_fund_request.keys()),
)

dict

24

['alpine_fund_request.xlsx',
 'amador_fund_request.xlsx',
 'calaveras_fund_request.xlsx',
 'del_norte_fund_request.xlsx',
 'el_dorado_fund_request.xlsx',
 'humboldt_fund_request.xlsx',
 'kings_fund_request.xlsx',
 'la_metro_fund_request.xlsx',
 'lake_fund_request.xlsx',
 'madera_fund_request.xlsx',
 'mariposa_fund_request.xlsx',
 'mendocino_fund_request.xlsx',
 'merced_fund_request.xlsx',
 'nevada_fund_request.xlsx',
 'placer_fund_request.xlsx',
 'plumas_fund_request.xlsx',
 'riverside_fund_request.xlsx',
 'san_benito_fund_request.xlsx',
 'san_diego_mts_fund_request.xlsx',
 'shasta_fund_request.xlsx',
 'sierra_fund_request.xlsx',
 'slocog_fund_request.xlsx',
 'tehema_fund_request.xlsx',
 'tuolumne_fund_request.xlsx']

In [12]:
# view all the good-to-go df

from IPython.display import display

# .items() creates tuples of each element in the dict. key:value maps to key:df

for key, df in cleaned_fund_request.items():
    print(f"DataFrame: {key}")
    #display(df)


DataFrame: alpine_fund_request.xlsx
DataFrame: amador_fund_request.xlsx
DataFrame: calaveras_fund_request.xlsx
DataFrame: del_norte_fund_request.xlsx
DataFrame: el_dorado_fund_request.xlsx
DataFrame: humboldt_fund_request.xlsx
DataFrame: kings_fund_request.xlsx
DataFrame: la_metro_fund_request.xlsx
DataFrame: lake_fund_request.xlsx
DataFrame: madera_fund_request.xlsx
DataFrame: mariposa_fund_request.xlsx
DataFrame: mendocino_fund_request.xlsx
DataFrame: merced_fund_request.xlsx
DataFrame: nevada_fund_request.xlsx
DataFrame: placer_fund_request.xlsx
DataFrame: plumas_fund_request.xlsx
DataFrame: riverside_fund_request.xlsx
DataFrame: san_benito_fund_request.xlsx
DataFrame: san_diego_mts_fund_request.xlsx
DataFrame: shasta_fund_request.xlsx
DataFrame: sierra_fund_request.xlsx
DataFrame: slocog_fund_request.xlsx
DataFrame: tehema_fund_request.xlsx
DataFrame: tuolumne_fund_request.xlsx


# Cleaning individual good DFs

## Humboldt

In [13]:
cleaned_fund_request["humboldt_fund_request.xlsx"][[
    "operating_FY24-25",	
    "operating_FY25-26",	
    "operating_FY26-27"]
] = cleaned_fund_request["humboldt_fund_request.xlsx"][[
    "operating_FY24-25",	
    "operating_FY25-26",	
    "operating_FY26-27"]
].replace("-", 0)

## Amador

In [14]:
cleaned_fund_request["amador_fund_request.xlsx"] = cleaned_fund_request["amador_fund_request.xlsx"].drop([2])

## Merced

In [15]:
cleaned_fund_request["merced_fund_request.xlsx"] = cleaned_fund_request["merced_fund_request.xlsx"].drop([1,2,34,36,37])

In [16]:
cleaned_fund_request["merced_fund_request.xlsx"].at[13, "fund source"] = "`5339"
cleaned_fund_request["merced_fund_request.xlsx"].at[19, "fund source"] = "`5307"

In [17]:
display(
    cleaned_fund_request["merced_fund_request.xlsx"].at[13, "fund source"],
    cleaned_fund_request["merced_fund_request.xlsx"].at[19, "fund source"],
    cleaned_fund_request["merced_fund_request.xlsx"].dtypes
)

'`5339'

'`5307'

rtpa                         object
implementing agenc-y/-ies    object
project                      object
fund source                  object
capital_FY23-24              object
capital_FY24-25              object
capital_FY25-26              object
capital_FY26-27              object
operating_FY23-24            object
operating_FY24-25            object
operating_FY25-26            object
operating_FY26-27            object
dtype: object

## San Benito

In [18]:
cleaned_fund_request["san_benito_fund_request.xlsx"] = cleaned_fund_request["san_benito_fund_request.xlsx"].drop([6,9])

## San Diego MTS

In [19]:
row_drops = [
    1,
    2,
    3,
    4,
    9,
    10,
    11,
]
cleaned_fund_request["san_diego_mts_fund_request.xlsx"] = cleaned_fund_request["san_diego_mts_fund_request.xlsx"].drop(row_drops)

## Sierra

In [20]:
cleaned_fund_request["sierra_fund_request.xlsx"] = cleaned_fund_request["sierra_fund_request.xlsx"].drop(list(range(24,32)))

## Nevada

In [21]:
cleaned_fund_request["nevada_fund_request.xlsx"] = cleaned_fund_request["nevada_fund_request.xlsx"].drop([8,9])

## Plumas

In [22]:
cleaned_fund_request["plumas_fund_request.xlsx"]= cleaned_fund_request["plumas_fund_request.xlsx"].drop(list(range(10,14)))

# Attempt to clean bad DFs

In [23]:
    col_names = [
    "rtpa",
    "implementing agenc-y/-ies",
    "project",
    "fund source",
    "capital_FY23-24",
    "capital_FY24-25",
    "capital_FY25-26",
    "capital_FY26-27",
    "operating_FY23-24",
    "operating_FY24-25",
    "operating_FY25-26",
    "operating_FY26-27",
    "total",
    ]

In [24]:
# list of bad DFs that didnt match the other layouts
review_list

['butte_fund_request.xlsx',
 'kern_fund_request.xlsx',
 'lassen_fund_request.xlsx',
 'mtc_fund_request.xlsx',
 'orange_fund_request.xlsx',
 'santa_cruz_fund_request.xlsx',
 'ventura_fund_request.xlsx']

## Lassen

In [25]:
#CLEAN COMPLETED

lassen = pd.read_excel(f"{GCS_PATH}lassen_fund_request.xlsx", 
                       sheet_name = "D.2. Detailed Fund Request",
                       skiprows = 6,
                       header=0, 
                       skipfooter=12,
                      ).drop(columns=["Unnamed: 0", "Project Type", "Operator"])

# can work with this. may be able to remove the top rows then use cleaner loop
lassen.columns=col_names
lassen_cleaned = lassen.drop(columns="total")
# lassen_cleaned

## Butte

In [26]:
# CLEANED COMPLETE
butte = pd.read_excel(f"{GCS_PATH}butte_fund_request.xlsx",
                      skiprows= 2,
                      header= 0,
                      skipfooter= 17,
                     )

butte_cleaned = butte.copy()

butte_cleaned[["RTPA","Implementing Agenc-y/-ies", "Project"]] = butte_cleaned[["RTPA","Implementing Agenc-y/-ies", "Project"]].ffill()

butte_cleaned.insert(6, "operations FY25-26",0)
butte_cleaned.insert(7, "operations FY26-27",0)
butte_cleaned.columns = [
    "rtpa",
    "implementing agenc-y/-ies",
    "project",
    "fund source",
    "operating_FY23-24",
    "operating_FY24-25",
    "operating_FY25-26",
    "operating_FY26-27",
    "capital_FY23-24",
    "capital_FY24-25",
    "capital_FY25-26",
    "capital_FY26-27",
    "total",
    
]
butte_cleaned = butte_cleaned.drop(columns="total")
# missing the operations columns. may not need if we melt just the capital columns
display(
    butte.shape,
    butte_cleaned.shape,
    #butte_cleaned,
)

(5, 11)

(5, 12)

## MTC

In [27]:
# CLEANED COMPLETE

mtc = pd.read_excel(f"{GCS_PATH}mtc_fund_request.xlsx",
                    skiprows = 2,
                    header= 0,
                    skipfooter= 21
                   ).drop(columns= ["Unnamed: 13","Unnamed: 14","Unnamed: 15"])

mtc_cleaned = mtc.copy()

mtc_cleaned.columns = col_names
mtc_cleaned = mtc_cleaned.drop(columns="total")
# can work with this. may be able to remove the top rows then use cleaner loop
display(
    mtc_cleaned.shape,
    mtc_cleaned.columns,
    #mtc_cleaned
)

(16, 12)

Index(['rtpa', 'implementing agenc-y/-ies', 'project', 'fund source',
       'capital_FY23-24', 'capital_FY24-25', 'capital_FY25-26',
       'capital_FY26-27', 'operating_FY23-24', 'operating_FY24-25',
       'operating_FY25-26', 'operating_FY26-27'],
      dtype='object')

## Orange

In [28]:
#CLEANED COMPLETE 
orange = pd.read_excel(f"{GCS_PATH}orange_fund_request.xlsx",
                       skiprows=3,
                       header=0,
                       skipfooter=1
                      )

orange_cleaned = orange.copy()

orange_cleaned.rename(columns={"Unnamed: 0":"RTPA"}, inplace = True)
orange_cleaned["RTPA"] = "OCTA"
orange_cleaned = orange_cleaned.drop(columns= [
    "FY27-28",
    "FY28-29",
    "FY27-28.1",
    "FY28-29.1"
])

orange_cleaned.columns = col_names
orange_cleaned = orange_cleaned.drop(columns="total")
# FY goes to 28-29. dont think i need those columns, there are no numbers in them anyways
# can drop
#orange_cleaned

## Santa Cruz

In [29]:
# CLEANED COMPLETE
santa_cruz = pd.read_excel(f"{GCS_PATH}santa_cruz_fund_request.xlsx",
                           skiprows= 4,
                           header= 0,
                           skipfooter= 5
                          ).iloc[:,0:13]

santa_cruz_cleaned = santa_cruz.copy()

santa_cruz_cleaned.columns = col_names
santa_cruz_cleaned.drop(columns="total", inplace=True)
# can remove the extra columns past unnamed 12, then remove top and bottom rows, then use cleaner loop
display(
    santa_cruz_cleaned.shape,
    #santa_cruz_cleaned
)

(8, 12)

## Ventura

In [30]:
ventura_tircp_capital = pd.read_excel(f"{GCS_PATH}ventura_fund_request.xlsx", 
                                      sheet_name = "Project Breakdown",
                                      skiprows = 2,
                                      header = 0,
                                      skipfooter = 40
                                     )

# needs a lot of work, may have to do in excel
# try breaking up the DF into multiple seperate DFs, then add them all together at the end
# will have to add capital or operations columns as needed. 
ventura_tircp_capital.shape

(26, 9)

In [31]:
ventura_tircp_operating = pd.read_excel(f"{GCS_PATH}ventura_fund_request.xlsx", 
                                      sheet_name = "Project Breakdown",
                                      skiprows = 51,
                                      header = 0,
                                      skipfooter = 1
                                     )
ventura_tircp_operating.shape

(16, 9)

In [32]:
ventura_merge = ventura_tircp_capital.merge(
    ventura_tircp_operating,
    how= "outer",
    on = ["Implementing Agenc-y/-ies","Project Category",	"Project"],
    suffixes = ["_capital", "_operating"]
).drop(columns=[
    "Year Requested",
    "Unnamed: 8_capital",
    "Unnamed: 8_operating",
    "Project Category"
])
ventura_merge

Unnamed: 0,Implementing Agenc-y/-ies,Project,FY23-24_capital,FY24-25_capital,FY25-26_capital,FY26-27_capital,Fund Source,FY23-24_operating,FY24-25_operating,FY25-26_operating,FY26-27_operating
0,Gold Coast Transit District,Energy Storage - Hydrogen Station,,1124000.0,33543.0,,,,,,
1,Gold Coast Transit District,Energy Storage - Hydrogen Station,,,468504.0,,,,,,
2,Simi Valley,EV Charging,,651953.0,,,,,,,
3,County of Ventura,Bus Stop Improvements,,,1000.0,,,,,,
4,Thousand Oaks,EV Charging,,1000000.0,,,,,,,
5,Thousand Oaks,EV Support,,500000.0,,,,,,,
6,Valley Express,Bus Stop Improvements,,,50000.0,,,,,,
7,Ojai,Facility Upgrades & EV Charging,,15000.0,,,,,,,
8,Ojai,Facility Upgrades & EV Charging,,200000.0,,,,,,,
9,Ventura County Transporation Commission,New Fare System,,4000000.0,,,,,,,


In [33]:
ventura_merge["rtpa"] = "VCTC"
ventura_merge["Fund Source"] = "TIRCP"

ventura_col_dict = {
    "Implementing Agenc-y/-ies":"implementing agenc-y/-ies",
    "Project":"project",
    "Fund Source":"fund source",
    "FY23-24_capital":"capital_FY23-24",
    "FY24-25_capital":"capital_FY24-25",
    "FY25-26_capital":"capital_FY25-26",
    "FY26-27_capital":"capital_FY26-27",
    "FY23-24_operating":"operating_FY23-24",
    "FY24-25_operating":"operating_FY24-25",
    "FY25-26_operating":"operating_FY25-26",
    "FY26-27_operating":"operating_FY26-27",
}

col_order =[
    "rtpa",
    "implementing agenc-y/-ies",
    "project",
    "fund source",
    "capital_FY23-24",	
    "capital_FY24-25",	
    "capital_FY25-26",	
    "capital_FY26-27",	
    "operating_FY23-24",	
    "operating_FY24-25",
    "operating_FY25-26",
    "operating_FY26-27"
]

ventura_merge.rename(columns= ventura_col_dict, inplace = True)

ventura_merge = ventura_merge[col_order]

display(
    ventura_merge.shape,
    list(ventura_merge.columns)
)

(42, 12)

['rtpa',
 'implementing agenc-y/-ies',
 'project',
 'fund source',
 'capital_FY23-24',
 'capital_FY24-25',
 'capital_FY25-26',
 'capital_FY26-27',
 'operating_FY23-24',
 'operating_FY24-25',
 'operating_FY25-26',
 'operating_FY26-27']

In [34]:
ventura_zetcp_capital = pd.read_excel(f"{GCS_PATH}ventura_fund_request.xlsx", 
                                      sheet_name = "Project Breakdown",
                                      skiprows = 32,
                                      header = 0,
                                      skipfooter = 21
                                     ).drop(columns =["Unnamed: 7", "Unnamed: 8"])
fund_dict = {
    "GGRF Y1":"ZETCP (GGRF)",
    "GGRF Y2":"ZETCP (GGRF)",
    "GGRF Y3":"ZETCP (GGRF)",
    "GGRF Y4":"ZETCP (GGRF)",
    "PTA": "ZETCP (PTA)",
}

ven_col= {
    'Implementing Agenc-y/-ies':"implementing agenc-y/-ies",
    'Project':"project",
    'Fund Source':"fund source",
    'FY23-24':"capital_FY23-24", 
    'FY24-25':"capital_FY24-25",
    'FY25-26':"capital_FY25-26",
    'FY26-27':"capital_FY26-27",
}

ventura_zetcp_capital["Fund Source"] = ventura_zetcp_capital["Fund Source"].replace(fund_dict)

ventura_zetcp_capital["rtpa"] = "VCTC"

ventura_zetcp_capital.rename(columns= ven_col, inplace = True)

ventura_zetcp_capital = ventura_zetcp_capital[col_order[0:8]]

display(
    ventura_zetcp_capital.shape,
    list(ventura_zetcp_capital.columns),
    ventura_zetcp_capital
)

(15, 8)

['rtpa',
 'implementing agenc-y/-ies',
 'project',
 'fund source',
 'capital_FY23-24',
 'capital_FY24-25',
 'capital_FY25-26',
 'capital_FY26-27']

Unnamed: 0,rtpa,implementing agenc-y/-ies,project,fund source,capital_FY23-24,capital_FY24-25,capital_FY25-26,capital_FY26-27
0,VCTC,Simi Valley,EV Charging,ZETCP (PTA),,2348047.0,,
1,VCTC,Gold Coast Transit District,Hydrogen Station - Energy Storage,ZETCP (GGRF),,666953.0,,
2,VCTC,Gold Coast Transit District,Relief vehicles,ZETCP (GGRF),,51998.0,,
3,VCTC,Moorpark,EV Charging,ZETCP (GGRF),,200000.0,,
4,VCTC,Thousand Oaks,DAR vehicle replacement,ZETCP (GGRF),,1800000.0,,
5,VCTC,Gold Coast Transit District,Relief vehicles,ZETCP (GGRF),,42463.0,,
6,VCTC,Camarillo,EV Charging,ZETCP (GGRF),,,500000.0,
7,VCTC,Thousand Oaks,DAR vehicle replacement,ZETCP (GGRF),,,1800000.0,
8,VCTC,VCTC Intercity - VCTC,EV Charging,ZETCP (GGRF),,,500000.0,
9,VCTC,VCTC Intercity - VCTC,EV Charging,ZETCP (GGRF),,,,1342463.0


In [35]:
# last merge 
# CLEANING COMPLETE
ventura_big_merge = ventura_merge.merge(
    ventura_zetcp_capital,
    how = "outer",
    on = ["implementing agenc-y/-ies",
          "project", 
          "fund source",
          "capital_FY23-24",
          "capital_FY24-25", 
          "capital_FY25-26", 
          "capital_FY26-27",
         "rtpa"],
    suffixes =("_zetcp_cap","_merged")
)


In [36]:
#should see 57 rows total
display(
    ventura_big_merge.shape,
    ventura_big_merge.sort_values(by = "implementing agenc-y/-ies")
)

(57, 12)

Unnamed: 0,rtpa,implementing agenc-y/-ies,project,fund source,capital_FY23-24,capital_FY24-25,capital_FY25-26,capital_FY26-27,operating_FY23-24,operating_FY24-25,operating_FY25-26,operating_FY26-27
48,VCTC,Camarillo,EV Charging,ZETCP (GGRF),,,500000.0,,,,,
16,VCTC,Camarillo,Replacement buses,TIRCP,,,,1234074.0,,,,
52,VCTC,Camarillo,EV Charging,ZETCP (GGRF),,,,1500000.0,,,,
15,VCTC,Camarillo,Replacement buses,TIRCP,,227000.0,273000.0,,,,,
55,VCTC,Camarillo,Replacement Vehicles,ZETCP (GGRF),,,,441926.0,,,,
29,VCTC,County of Ventura,Operations Support,TIRCP,,,,,,31350.0,,
3,VCTC,County of Ventura,Bus Stop Improvements,TIRCP,,,1000.0,,,,,
27,VCTC,Gold Coast Transit District,Operations Support,TIRCP,,,,,,,,9520904.0
26,VCTC,Gold Coast Transit District,Operations Support,TIRCP,,,,,,4871741.0,7790198.0,
22,VCTC,Gold Coast Transit District,Replacement buses,TIRCP,,,,6920000.0,,,,


## Kern

In [37]:
def get_kern_data(head_count: int, foot_count: int, agency: str, proj_title: str):
    
    kern_cols = {
    'Unnamed: 0': "fund source",
     'Unnamed: 1':"capital_FY23-24",
     'Unnamed: 2':"operating_FY23-24",
     'Unnamed: 3':"capital_FY24-25",
     'Unnamed: 4':"operating_FY24-25",
    'Unnamed: 5':"capital_FY25-26",	
    'Unnamed: 6':"operating_FY25-26",
    'Unnamed: 7':"capital_FY26-27",	
    'Unnamed: 8':"operating_FY26-27"
}
    df = pd.read_excel(f"{GCS_PATH}kern_fund_request.xlsx",
                      header = head_count, 
                      skipfooter = foot_count
                     ).rename(columns=kern_cols).assign(implementing_agency= agency,
                                                       rtpa = "Kern COG",
                                                       project = proj_title)
    return df

In [38]:
arvin_2 = get_kern_data(3, 43, "arvin_2","City of Arvin New Transit ZEV Bus Procurement and Expansion of Operation, Bus Services, and Facilities Improvements")
arvin_2

Unnamed: 0,fund source,capital_FY23-24,operating_FY23-24,capital_FY24-25,operating_FY24-25,capital_FY25-26,operating_FY25-26,capital_FY26-27,operating_FY26-27,implementing_agency,rtpa,project
0,Arvin,,,,,,,,,arvin_2,Kern COG,City of Arvin New Transit ZEV Bus Procurement ...
1,TIRCP,1108563.0,0.0,1118595.0,0.0,0.0,0.0,0.0,0.0,arvin_2,Kern COG,City of Arvin New Transit ZEV Bus Procurement ...
2,ZETCP,120575.0,0.0,68316.0,0.0,68316.0,0.0,68316.0,0.0,arvin_2,Kern COG,City of Arvin New Transit ZEV Bus Procurement ...


In [39]:
kern = pd.read_excel(f"{GCS_PATH}kern_fund_request.xlsx")

# this is so bad, may need to do this in excel to make it work. 
# the agency name and fund soruce are on the same column

kern_cols = {
    'Unnamed: 0': "fund source",
     'Unnamed: 1':"capital_FY23-24",
     'Unnamed: 2':"operating_FY23-24",
     'Unnamed: 3':"capital_FY24-25",
     'Unnamed: 4':"operating_FY24-25",
    'Unnamed: 5':"capital_FY25-26",	
    'Unnamed: 6':"operating_FY25-26",
    'Unnamed: 7':"capital_FY26-27",	
    'Unnamed: 8':"operating_FY26-27"
}

arvin = get_kern_data(3, 43, "arvin", "Purcahse and install EV mirco-grid Purcahse and install EV mirco-grid")

california_city = get_kern_data(7, 39, "california city", "Purchase and construct transit building to house EV vans and solar charging stations")

delano = get_kern_data(11, 35, "delano", "Construct transit Facility")

get = get_kern_data(15, 31, "get", "Golden Empire Transit Free or near free transit fares Back up hydrogent fuel plant")

kern_transit = get_kern_data(19, 27, "kern transit", "Transition to Zero-Emission Vehicles and supporting infrastructure")

mcfarland = get_kern_data(23, 23, "mcfarland", "Design and construct a transit station providing a transit office , waitning area, restrooms and EV charging stations")

ridgecrest = get_kern_data(27, 19, "ridgecrest", "Replacement of Cutaway Buses with Electric Vans and Construction of Bus Stop at North Norma Street and West Felspar Avenue")

shafter = get_kern_data(31, 15, "shafter", "bus storage, new transit vehicles, free ridership fare program")

taft =get_kern_data(35, 11, "taft", "convert the City’s gasoline powered fleet of on-demand transit vehicles to plug-in electric vans compatible with the solar-powered charging infrastructure being completed now.")

tehahapi = get_kern_data(39, 9, "tehahapi", "Improvements to Downtown Transit Center and Installation of EV Charging Infrastructure")

wasco = get_kern_data(43, 3, "wasco", "New Transit Operating and Maintenance Facility")


display(
    wasco,
    tehahapi,
    taft,
    shafter,
    ridgecrest,
    mcfarland,
    kern_transit,
    get,
    arvin,
    california_city,
    delano,
    
)

Unnamed: 0,fund source,capital_FY23-24,operating_FY23-24,capital_FY24-25,operating_FY24-25,capital_FY25-26,operating_FY25-26,capital_FY26-27,operating_FY26-27,implementing_agency,rtpa,project
0,Wasco,,,,,,,,,wasco,Kern COG,New Transit Operating and Maintenance Facility
1,TIRCP,1350328.0,0.0,1362548.0,0.0,0.0,0.0,0.0,0.0,wasco,Kern COG,New Transit Operating and Maintenance Facility
2,ZETCP,146871.0,0.0,83215.0,0.0,83215.0,0.0,83215.0,0.0,wasco,Kern COG,New Transit Operating and Maintenance Facility


Unnamed: 0,fund source,capital_FY23-24,operating_FY23-24,capital_FY24-25,operating_FY24-25,capital_FY25-26,operating_FY25-26,capital_FY26-27,operating_FY26-27,implementing_agency,rtpa,project
0,Tehahapi,,,,,,,,,tehahapi,Kern COG,Improvements to Downtown Transit Center and In...


Unnamed: 0,fund source,capital_FY23-24,operating_FY23-24,capital_FY24-25,operating_FY24-25,capital_FY25-26,operating_FY25-26,capital_FY26-27,operating_FY26-27,implementing_agency,rtpa,project
0,Taft,,,,,,,,,taft,Kern COG,convert the City’s gasoline powered fleet of o...
1,TIRCP,359651.0,0.0,362907.0,0.0,0.0,0.0,0.0,0.0,taft,Kern COG,convert the City’s gasoline powered fleet of o...
2,ZETCP,39119.0,0.0,22164.0,0.0,22164.0,0.0,22164.0,0.0,taft,Kern COG,convert the City’s gasoline powered fleet of o...


Unnamed: 0,fund source,capital_FY23-24,operating_FY23-24,capital_FY24-25,operating_FY24-25,capital_FY25-26,operating_FY25-26,capital_FY26-27,operating_FY26-27,implementing_agency,rtpa,project
0,Shafter,,,,,,,,,shafter,Kern COG,"bus storage, new transit vehicles, free riders..."
1,TIRCP,1029704.0,0.0,1503605.0,0.0,0.0,0.0,0.0,0.0,shafter,Kern COG,"bus storage, new transit vehicles, free riders..."
2,ZETCP,111998.0,0.0,0.0,63456.0,0.0,63456.0,63456.0,0.0,shafter,Kern COG,"bus storage, new transit vehicles, free riders..."


Unnamed: 0,fund source,capital_FY23-24,operating_FY23-24,capital_FY24-25,operating_FY24-25,capital_FY25-26,operating_FY25-26,capital_FY26-27,operating_FY26-27,implementing_agency,rtpa,project
0,Ridgecrest,,,,,,,,,ridgecrest,Kern COG,Replacement of Cutaway Buses with Electric Van...
1,TIRCP,1490120.0,0.0,1503605.0,0.0,0.0,0.0,0.0,0.0,ridgecrest,Kern COG,Replacement of Cutaway Buses with Electric Van...
2,ZETCP,162076.0,0.0,91830.0,0.0,91830.0,0.0,91830.0,0.0,ridgecrest,Kern COG,Replacement of Cutaway Buses with Electric Van...


Unnamed: 0,fund source,capital_FY23-24,operating_FY23-24,capital_FY24-25,operating_FY24-25,capital_FY25-26,operating_FY25-26,capital_FY26-27,operating_FY26-27,implementing_agency,rtpa,project
0,McFarland,,,,,,,,,mcfarland,Kern COG,Design and construct a transit station providi...
1,TIRCP,707216.0,0.0,713606.0,0.0,0.0,0.0,0.0,0.0,mcfarland,Kern COG,Design and construct a transit station providi...
2,ZETCP,76922.0,0.0,43583.0,0.0,43583.0,0.0,43583.0,0.0,mcfarland,Kern COG,Design and construct a transit station providi...


Unnamed: 0,fund source,capital_FY23-24,operating_FY23-24,capital_FY24-25,operating_FY24-25,capital_FY25-26,operating_FY25-26,capital_FY26-27,operating_FY26-27,implementing_agency,rtpa,project
0,Kern Transit,,,,,,,,,kern transit,Kern COG,Transition to Zero-Emission Vehicles and suppo...
1,TIRCP,8210741.0,0.0,8285045.0,0.0,0.0,0.0,0.0,0.0,kern transit,Kern COG,Transition to Zero-Emission Vehicles and suppo...
2,ZETCP,903971.0,0.0,505993.0,0.0,505993.0,0.0,505993.0,0.0,kern transit,Kern COG,Transition to Zero-Emission Vehicles and suppo...


Unnamed: 0,fund source,capital_FY23-24,operating_FY23-24,capital_FY24-25,operating_FY24-25,capital_FY25-26,operating_FY25-26,capital_FY26-27,operating_FY26-27,implementing_agency,rtpa,project
0,GET,,,,,,,,,get,Kern COG,Golden Empire Transit Free or near free transi...
1,TIRCP,0.0,0.0,0.0,27834889.0,0.0,28086784.0,0.0,0.0,get,Kern COG,Golden Empire Transit Free or near free transi...
2,ZETCP,0.0,0.0,3027524.0,0.0,1715347.0,0.0,1715347.0,0.0,get,Kern COG,Golden Empire Transit Free or near free transi...


Unnamed: 0,fund source,capital_FY23-24,operating_FY23-24,capital_FY24-25,operating_FY24-25,capital_FY25-26,operating_FY25-26,capital_FY26-27,operating_FY26-27,implementing_agency,rtpa,project
0,Arvin,,,,,,,,,arvin,Kern COG,Purcahse and install EV mirco-grid Purcahse an...
1,TIRCP,1108563.0,0.0,1118595.0,0.0,0.0,0.0,0.0,0.0,arvin,Kern COG,Purcahse and install EV mirco-grid Purcahse an...
2,ZETCP,120575.0,0.0,68316.0,0.0,68316.0,0.0,68316.0,0.0,arvin,Kern COG,Purcahse and install EV mirco-grid Purcahse an...


Unnamed: 0,fund source,capital_FY23-24,operating_FY23-24,capital_FY24-25,operating_FY24-25,capital_FY25-26,operating_FY25-26,capital_FY26-27,operating_FY26-27,implementing_agency,rtpa,project
0,California City,,,,,,,,,california city,Kern COG,Purchase and construct transit building to hou...
1,TIRCP,711044.0,0.0,717478.0,0.0,0.0,0.0,0.0,0.0,california city,Kern COG,Purchase and construct transit building to hou...
2,ZETCP,77338.0,0.0,43819.0,0.0,43819.0,0.0,43819.0,0.0,california city,Kern COG,Purchase and construct transit building to hou...


Unnamed: 0,fund source,capital_FY23-24,operating_FY23-24,capital_FY24-25,operating_FY24-25,capital_FY25-26,operating_FY25-26,capital_FY26-27,operating_FY26-27,implementing_agency,rtpa,project
0,Delano,,,,,,,,,delano,Kern COG,Construct transit Facility
1,TIRCP,2571742.0,0.0,2595015.0,0.0,0.0,0.0,0.0,0.0,delano,Kern COG,Construct transit Facility
2,ZETCP,279721.0,0.0,158486.0,0.0,158486.0,0.0,158486.0,0.0,delano,Kern COG,Construct transit Facility


In [40]:
# CLEANING COMPLETE

kern_concat = pd.concat([
    wasco,
    tehahapi,
    taft,
    shafter,
    ridgecrest,
    mcfarland,
    kern_transit,
    get,
    arvin,
    california_city,
    delano,
],ignore_index=True)

kern_clean = kern_concat[kern_concat["fund source"].isin(["TIRCP","ZETCP"])].rename(columns ={"implementing_agency":"implementing agenc-y/-ies"})

kern_clean = kern_clean[col_order]

kern_clean

Unnamed: 0,rtpa,implementing agenc-y/-ies,project,fund source,capital_FY23-24,capital_FY24-25,capital_FY25-26,capital_FY26-27,operating_FY23-24,operating_FY24-25,operating_FY25-26,operating_FY26-27
1,Kern COG,wasco,New Transit Operating and Maintenance Facility,TIRCP,1350328.0,1362548.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Kern COG,wasco,New Transit Operating and Maintenance Facility,ZETCP,146871.0,83215.0,83215.0,83215.0,0.0,0.0,0.0,0.0
5,Kern COG,taft,convert the City’s gasoline powered fleet of o...,TIRCP,359651.0,362907.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Kern COG,taft,convert the City’s gasoline powered fleet of o...,ZETCP,39119.0,22164.0,22164.0,22164.0,0.0,0.0,0.0,0.0
8,Kern COG,shafter,"bus storage, new transit vehicles, free riders...",TIRCP,1029704.0,1503605.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Kern COG,shafter,"bus storage, new transit vehicles, free riders...",ZETCP,111998.0,0.0,0.0,63456.0,0.0,63456.0,63456.0,0.0
11,Kern COG,ridgecrest,Replacement of Cutaway Buses with Electric Van...,TIRCP,1490120.0,1503605.0,0.0,0.0,0.0,0.0,0.0,0.0
12,Kern COG,ridgecrest,Replacement of Cutaway Buses with Electric Van...,ZETCP,162076.0,91830.0,91830.0,91830.0,0.0,0.0,0.0,0.0
14,Kern COG,mcfarland,Design and construct a transit station providi...,TIRCP,707216.0,713606.0,0.0,0.0,0.0,0.0,0.0,0.0
15,Kern COG,mcfarland,Design and construct a transit station providi...,ZETCP,76922.0,43583.0,43583.0,43583.0,0.0,0.0,0.0,0.0


# Test of concat all the dictionary dataframe vales

In [41]:
fund_request_df = cleaned_fund_request.values()

type(fund_request_df)

dict_values

In [42]:
all_fund_requests = pd.concat(cleaned_fund_request.values(), ignore_index=True)

In [43]:
all_fund_requests = pd.concat([all_fund_requests,
                              santa_cruz_cleaned,
                              orange_cleaned,
                              butte_cleaned,
                              lassen_cleaned,
                              ventura_big_merge,
                              kern_clean
                             ], ignore_index=True)

In [44]:
all_fund_requests["fund source"] = all_fund_requests["fund source"].astype(str)

In [45]:
display(
    all_fund_requests.shape,
    type(all_fund_requests),
    all_fund_requests.dtypes,
    
)

(287, 12)

pandas.core.frame.DataFrame

rtpa                         object
implementing agenc-y/-ies    object
project                      object
fund source                  object
capital_FY23-24              object
capital_FY24-25              object
capital_FY25-26              object
capital_FY26-27              object
operating_FY23-24            object
operating_FY24-25            object
operating_FY25-26            object
operating_FY26-27            object
dtype: object

# Save Concat DF Parquet to GCS

In [47]:
# SAVING TO GCS!
all_fund_requests.to_parquet(f"{GCS_PATH}all_fund_requests_concat.parquet")

# TEST of Melting the dataframe

In [48]:
id_vars= [
    'rtpa',
 'implementing agenc-y/-ies',
 'project',
 'fund source',
]
val_vars = [
    'capital_FY23-24',
 'capital_FY24-25',
 'capital_FY25-26',
 'capital_FY26-27',
 'operating_FY23-24',
 'operating_FY24-25',
 'operating_FY25-26',
 'operating_FY26-27'
]

melt = all_fund_requests.melt(
    id_vars = id_vars,
    value_vars = val_vars,
    var_name = "capital/operation fy",
    value_name = "fund amount",
    ignore_index = True)

In [49]:
display(
    type(melt),
    melt.shape,
    melt.dtypes,
    melt.head()
)

pandas.core.frame.DataFrame

(2296, 6)

rtpa                         object
implementing agenc-y/-ies    object
project                      object
fund source                  object
capital/operation fy         object
fund amount                  object
dtype: object

Unnamed: 0,rtpa,implementing agenc-y/-ies,project,fund source,capital/operation fy,fund amount
0,Alpine County Transportation Commission,Alpine County Transportation Commission,Transit Facility Conversion Project,TIRCP,capital_FY23-24,360641.0
1,Alpine County Transportation Commission,Alpine County Transportation Commission,Transit Facility Conversion Project,TIRCP,capital_FY23-24,
2,Alpine County Transportation Commission,Alpine County Transportation Commission,Transit Facility Conversion Project,ZETCP (GGRF),capital_FY23-24,3616.684
3,Alpine County Transportation Commission,Alpine County Transportation Commission,Transit Facility Conversion Project,ZETCP (PTA),capital_FY23-24,3123.316
4,Amador County Transportation Commission,Amador Transit,,TIRCP,capital_FY23-24,100000.0


In [50]:
# splitting the cap/operations columns

melt[["project type", "fiscal year"]] = melt["capital/operation fy"].str.split('_FY', expand = True)

In [51]:
# check for NaNs
melt["fund amount"].isna().sum()

1634

In [52]:
melt["fund amount"] = melt["fund amount"].fillna(0).astype("int64")
melt["fund source"] = melt["fund source"].astype("str")

In [53]:
# ensure no more NaNs
melt["fund amount"].isna().sum()

0

In [54]:
display(
    melt.columns,
    melt.sample(3)
)

Index(['rtpa', 'implementing agenc-y/-ies', 'project', 'fund source',
       'capital/operation fy', 'fund amount', 'project type', 'fiscal year'],
      dtype='object')

Unnamed: 0,rtpa,implementing agenc-y/-ies,project,fund source,capital/operation fy,fund amount,project type,fiscal year
1193,Madera County Transportation Commission,Madera County,Madera County Transit Fleet Electrification,TIRCP,operating_FY23-24,0,operating,23-24
1022,SLOCOG,SLOCOG,SLOCOG Admin.,TIRCP,capital_FY26-27,0,capital,26-27
726,SLOCOG,San Luis Obispo RTA,RTA-11,ZETCP (PTA),capital_FY25-26,0,capital,25-26


In [55]:
# checking fund amounts for any non-int values 
melt["fund amount"].unique()

array([   360641,         0,      3616,      3123,    100000,   2576611,
          238532,    500000,   1175501,     69395,     80357,   9085857,
          242312,    521458,   6849293,    329561,    295818,   1133408,
          101796, 498650905, 119494973,   3706510,     87574,    198860,
           84159,   8254231,    108201,     42539,     49259,   4848229,
          123518,    271142,    110636,    104561,    696393,    722403,
          836515,   1000000,   2080000,   6400000,    200000,    370000,
         1486685,    297576,    133646,  10000000,    400000,     92109,
          501942,   1019544,   3354086,   2510740,    614200,    250000,
          100520,     61421,     53042,  63382700,   4003053,   7956643,
         2868594,  60000000,   2328990,    348002,  10126000,   5434000,
         1705263,  26000000,    943316,   2050000,      9408,      8124,
          124000,    280000,    395000,   1778000,   1400000,   4000000,
          387000,    375000,    600000,    422000, 

In [56]:
display(
    melt.dtypes,
    melt["fund source"].value_counts()
)

rtpa                         object
implementing agenc-y/-ies    object
project                      object
fund source                  object
capital/operation fy         object
fund amount                   int64
project type                 object
fiscal year                  object
dtype: object

TIRCP           1352
ZETCP (GGRF)     464
ZETCP (PTA)      304
ZETCP            120
CMAQ              16
Measure V          8
`5339              8
SGR                8
Farebox            8
`5307              8
Name: fund source, dtype: int64

# Save Melt DF Parquet to GCS

In [57]:
melt.to_parquet(f"{GCS_PATH}all_fund_requests_melt.parquet")

# Draft Aggregations

In [None]:
def make_bar(data, x_axis, y_axis):
    chart = alt.Chart(data).mark_bar().encode(
        x = x_axis,
        y = y_axis,
    )
    return chart

In [None]:
by_type = melt.groupby(["project type"]).agg({
    "fund amount": "sum",
}).reset_index()

display(by_type)
make_bar(by_type, y_axis = "project type", x_axis = "fund amount")

In [None]:
by_year = melt.groupby(["fiscal year","project type"]).agg({
    "fund amount": "sum",
}).reset_index()

display(by_year)
make_bar(by_year, y_axis = "fiscal year", x_axis = "fund amount",)

In [None]:
alt.Chart(by_year).mark_line(point=True).encode(
    x = "fiscal year",
    y = "fund amount",
    color = "project type"
    )

In [None]:
alt.Chart(by_year).mark_bar(point=True).encode(
    x = "fiscal year",
    y = "fund amount",
    color = "project type"
    )

In [None]:
by_source = melt.groupby(["fund source"]).agg({
    "fund amount": "sum",
        "rtpa": "nunique"
}).reset_index()

make_bar(by_source, y_axis = "fund source", x_axis = "fund amount")

In [None]:
by_rtpa = melt.groupby(["rtpa"]).agg({
    "fund amount": "sum",
    "project": "nunique"
}).reset_index()

make_bar(by_rtpa, "rtpa", "fund amount")