# SB125 Fund Split Analysis

## Question:
- How did RTAs split SB125 funds between operations and capital?

## Methodology:
- upload all avilable `SB125 fund request template` files to gcs
- examine all files for consistencies:
    - come with cleaning plan for inconsistent examples (files withot capital/operating columns)
- concat all rows across all files


## Notes:
- some RTPAs did not submit a `SB125 fund request template.xlsx` file, but instead included an quivilent file their allocation package

In [1]:
import pandas as pd
import os
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

GCS_PATH = "gs://calitp-analytics-data/data-analyses/sb125/fund_split/"

In [2]:
file_list = [
    "sierra_fund_request.xlsx",
    "slocog_fund_request.xlsx",
    "tehema_fund_request.xlsx",
    "tuolumne_fund_request.xlsx",
    "ventura_fund_request.xlsx",
    "alpine_fund_request.xlsx",
    "amador_fund_request.xlsx",
    "butte_fund_request.xlsx",
    "calaveras_fund_request.xlsx",
    "del_norte_fund_request.xlsx",
    "el_dorado_fund_request.xlsx",
    "humboldt_fund_request.xlsx",
    "kern_fund_request.xlsx",
    "kings_fund_request.xlsx",
    "la_metro_fund_request.xlsx",
    "lake_fund_request.xlsx",
    "lassen_fund_request.xlsx",
    "madera_fund_request.xlsx",
    "mariposa_fund_request.xlsx",
    "mendocino_fund_request.xlsx",
    "merced_fund_request.xlsx",
    "mtc_fund_request.xlsx",
    "nevada_fund_request.xlsx",
    "orange_fund_request.xlsx",
    "placer_fund_request.xlsx",
    "plumas_fund_request.xlsx",
    "riverside_fund_request.xlsx",
    "san_benito_fund_request.xlsx",
    "san_diego_mts_fund_request.xlsx",
    "santa_cruz_fund_request.xlsx",
    "shasta_fund_request.xlsx",
]

file_list.sort()

In [3]:
file_list

['alpine_fund_request.xlsx',
 'amador_fund_request.xlsx',
 'butte_fund_request.xlsx',
 'calaveras_fund_request.xlsx',
 'del_norte_fund_request.xlsx',
 'el_dorado_fund_request.xlsx',
 'humboldt_fund_request.xlsx',
 'kern_fund_request.xlsx',
 'kings_fund_request.xlsx',
 'la_metro_fund_request.xlsx',
 'lake_fund_request.xlsx',
 'lassen_fund_request.xlsx',
 'madera_fund_request.xlsx',
 'mariposa_fund_request.xlsx',
 'mendocino_fund_request.xlsx',
 'merced_fund_request.xlsx',
 'mtc_fund_request.xlsx',
 'nevada_fund_request.xlsx',
 'orange_fund_request.xlsx',
 'placer_fund_request.xlsx',
 'plumas_fund_request.xlsx',
 'riverside_fund_request.xlsx',
 'san_benito_fund_request.xlsx',
 'san_diego_mts_fund_request.xlsx',
 'santa_cruz_fund_request.xlsx',
 'shasta_fund_request.xlsx',
 'sierra_fund_request.xlsx',
 'slocog_fund_request.xlsx',
 'tehema_fund_request.xlsx',
 'tuolumne_fund_request.xlsx',
 'ventura_fund_request.xlsx']

In [4]:
def clean_fund_request(file:str) -> pd.DataFrame:
    """
    reads in the file from GCS, maps col_names list to df columns, drops all the blank rows.
    returns df.
    """
    col_names = [
    "rtpa",
    "implementing agenc-y/-ies",
    "project",
    "fund source",
    "capital_FY23-24",
    "capital_FY24-25",
    "capital_FY25-26",
    "capital_FY26-27",
    "operating_FY23-24",
    "operating_FY24-25",
    "operating_FY25-26",
    "operating_FY26-27",
    "total",
]
    
    df = pd.read_excel(f"{GCS_PATH}{file}", header=2, nrows=40, names=col_names).drop(columns="total")
    row_drop = df["rtpa"].isin(["Grand Total", "RTPA"])
    df = df.drop(df[row_drop].index)
    df = df.dropna(how= "all")
    df[["rtpa", "implementing agenc-y/-ies","project"]] = df[["rtpa", "implementing agenc-y/-ies","project"]].ffill()
    
    return df


In [5]:
alpine = clean_fund_request("alpine_fund_request.xlsx")

In [6]:
len(alpine.columns)

12

In [7]:
amador = clean_fund_request("amador_fund_request.xlsx")

In [8]:
display(alpine.dtypes, 
        amador.dtypes,
)

rtpa                          object
implementing agenc-y/-ies     object
project                       object
fund source                   object
capital_FY23-24              float64
capital_FY24-25              float64
capital_FY25-26              float64
capital_FY26-27              float64
operating_FY23-24            float64
operating_FY24-25            float64
operating_FY25-26            float64
operating_FY26-27            float64
dtype: object

rtpa                          object
implementing agenc-y/-ies     object
project                      float64
fund source                   object
capital_FY23-24              float64
capital_FY24-25              float64
capital_FY25-26              float64
capital_FY26-27              float64
operating_FY23-24            float64
operating_FY24-25            float64
operating_FY25-26            float64
operating_FY26-27            float64
dtype: object

first iteration of fund_request_checker func.
>def read_in(file:str) -> pd.DataFrame:
>    df = pd.read_excel(f"{GCS_PATH}{file}", nrows=40)
>    df = df.dropna(how= "all")
>   
>    if len(df.columns) == 13:
>        print(f"{file} can use clean_fund_request func.")
>    else: 
>        print(f"needs manual check, {file}")
       

old iteration
>def fund_request_checker(file_list:list) -> pd.DataFrame:
>
>    for file in file_list:
>    
>        df = pd.read_excel(f"{GCS_PATH}{file}", nrows=40)
>        df = df.dropna(how= "all")
>    
>        if len(df.columns) == 13:
>            print(f"{file} can use clean_fund_request func.")
>        else: 
>            print(f"needs manual check, {file}")
       

In [9]:
#fund_request_checker(file_list)

old iteration
>def fund_request_checker_v2(file_list:list):
>    gtg_files = []
>    manual_review = []
>    for file in file_list:
>    
>        df = pd.read_excel(f"{GCS_PATH}{file}", nrows=40)
>        df = df.dropna(how= "all")
>    
>        if len(df.columns) == 13:
>            gtg_files.append(f"{file}")
>        else: 
>            manual_review.append(f"{file}")
>    return display(
>        "good to go files",
>        list(gtg_files), 
>        "needs manual check", 
>        list(manual_review)
>    )

In [10]:
#fund_request_checker_v2(file_list)

In [11]:
def fund_request_checker_v3(file_list:list) -> tuple:
    """takes in list of fund_request excel file name. reads in each file, checks if DF has 13 columns.
    if yes, appends do good-to-go list. else, appends to needs-manual-review.
    output is a tuple of the 2 list.
    assign 2 variables to use this func.
    """
    gtg_files = []
    manual_review = []
    for file in file_list:
    
        df = pd.read_excel(f"{GCS_PATH}{file}", nrows=40)
        df = df.dropna(how= "all")
    
        if len(df.columns) == 13:
            gtg_files.append(f"{file}")
        else: 
            manual_review.append(f"{file}")
    return gtg_files, manual_review

In [12]:
good_list, review_list = fund_request_checker_v3(file_list)

In [13]:
display(
    len(good_list),
    len(review_list)
)

24

7

In [14]:
def cleaner_loop(gtg_list:list) -> dict: 
    """
    takes in good-to-go list from fund_request_checker.
    applies the clean_fund_request function to each item on the list, then appends to dictionary.
    key is name of the file, value is the cleaned dataframe.
    output is dictionary. 
    """
    cleaned_df ={}

    for name in gtg_list:
        cleaned_df[name] = clean_fund_request(name)
    return cleaned_df

In [15]:
cleaned_fund_request = cleaner_loop(good_list)

In [16]:
display(
    type(cleaned_fund_request),
    len(cleaned_fund_request),
    list(cleaned_fund_request.keys()),
)

dict

24

['alpine_fund_request.xlsx',
 'amador_fund_request.xlsx',
 'calaveras_fund_request.xlsx',
 'del_norte_fund_request.xlsx',
 'el_dorado_fund_request.xlsx',
 'humboldt_fund_request.xlsx',
 'kings_fund_request.xlsx',
 'la_metro_fund_request.xlsx',
 'lake_fund_request.xlsx',
 'madera_fund_request.xlsx',
 'mariposa_fund_request.xlsx',
 'mendocino_fund_request.xlsx',
 'merced_fund_request.xlsx',
 'nevada_fund_request.xlsx',
 'placer_fund_request.xlsx',
 'plumas_fund_request.xlsx',
 'riverside_fund_request.xlsx',
 'san_benito_fund_request.xlsx',
 'san_diego_mts_fund_request.xlsx',
 'shasta_fund_request.xlsx',
 'sierra_fund_request.xlsx',
 'slocog_fund_request.xlsx',
 'tehema_fund_request.xlsx',
 'tuolumne_fund_request.xlsx']

In [17]:
# view all the good-to-go df

from IPython.display import display

# .items() creates tuples of each element in the dict. key:value maps to key:df

for key, df in cleaned_fund_request.items():
    print(f"DataFrame: {key}")
    #display(df.head(3))


DataFrame: alpine_fund_request.xlsx
DataFrame: amador_fund_request.xlsx
DataFrame: calaveras_fund_request.xlsx
DataFrame: del_norte_fund_request.xlsx
DataFrame: el_dorado_fund_request.xlsx
DataFrame: humboldt_fund_request.xlsx
DataFrame: kings_fund_request.xlsx
DataFrame: la_metro_fund_request.xlsx
DataFrame: lake_fund_request.xlsx
DataFrame: madera_fund_request.xlsx
DataFrame: mariposa_fund_request.xlsx
DataFrame: mendocino_fund_request.xlsx
DataFrame: merced_fund_request.xlsx
DataFrame: nevada_fund_request.xlsx
DataFrame: placer_fund_request.xlsx
DataFrame: plumas_fund_request.xlsx
DataFrame: riverside_fund_request.xlsx
DataFrame: san_benito_fund_request.xlsx
DataFrame: san_diego_mts_fund_request.xlsx
DataFrame: shasta_fund_request.xlsx
DataFrame: sierra_fund_request.xlsx
DataFrame: slocog_fund_request.xlsx
DataFrame: tehema_fund_request.xlsx
DataFrame: tuolumne_fund_request.xlsx


# Cleaning individual DFs

In [18]:
cleaned_fund_request["amador_fund_request.xlsx"]

Unnamed: 0,rtpa,implementing agenc-y/-ies,project,fund source,capital_FY23-24,capital_FY24-25,capital_FY25-26,capital_FY26-27,operating_FY23-24,operating_FY24-25,operating_FY25-26,operating_FY26-27
2,Amador County Transportation Commission,Amador County Transportation Commission,,,,,,,,,,
3,Amador County Transportation Commission,Amador Transit,,TIRCP,100000.0,75000.0,,,,,,


In [19]:
cleaned_fund_request["merced_fund_request.xlsx"] = cleaned_fund_request["merced_fund_request.xlsx"].drop([1,2,34,36,37])

In [20]:
cleaned_fund_request["merced_fund_request.xlsx"]

Unnamed: 0,rtpa,implementing agenc-y/-ies,project,fund source,capital_FY23-24,capital_FY24-25,capital_FY25-26,capital_FY26-27,operating_FY23-24,operating_FY24-25,operating_FY25-26,operating_FY26-27
4,Merced County Association of Governments (MCAG),MCAG,Long Term Planning/Administrative,TIRCP,104561.0,104562.0,,,,,,
6,Merced County Association of Governments (MCAG),Transit Joint Powers Authority Board for Merce...,Electric Bus Charging Infrastructure,TIRCP,696393.0,1070173.0,0.0,0.0,,,,
7,Merced County Association of Governments (MCAG),Transit Joint Powers Authority Board for Merce...,Electric Bus Charging Infrastructure,ZETCP (PTA),722403.0646,,0.0,0.0,,,,
8,Merced County Association of Governments (MCAG),Transit Joint Powers Authority Board for Merce...,Electric Bus Charging Infrastructure,ZETCP (GGRF),836515.9354,874515.0,0.0,0.0,,,,
9,Merced County Association of Governments (MCAG),Transit Joint Powers Authority Board for Merce...,Electric Bus Charging Infrastructure,CMAQ,,800000.0,0.0,0.0,,,,
10,Merced County Association of Governments (MCAG),"The Bus, TJPA",Westside O&M Facility,TIRCP,1000000.0,3530970.0,0.0,0.0,,,,
11,Merced County Association of Governments (MCAG),"The Bus, TJPA",Westside O&M Facility,ZETCP (GGRF),0.0,0.0,874515.0,874515.0,,,,
12,Merced County Association of Governments (MCAG),"The Bus, TJPA",Westside O&M Facility,Measure V,500000.0,0.0,0.0,0.0,,,,
13,Merced County Association of Governments (MCAG),"The Bus, TJPA",Westside O&M Facility,5339,,600000.0,0.0,0.0,,,,
14,Merced County Association of Governments (MCAG),"The Bus, TJPA",Westside O&M Facility,SGR,500000.0,120000.0,0.0,0.0,,,,


In [21]:
cleaned_fund_request["san_benito_fund_request.xlsx"] = cleaned_fund_request["san_benito_fund_request.xlsx"].drop([6,9])

In [22]:
cleaned_fund_request["san_benito_fund_request.xlsx"]

Unnamed: 0,rtpa,implementing agenc-y/-ies,project,fund source,capital_FY23-24,capital_FY24-25,capital_FY25-26,capital_FY26-27,operating_FY23-24,operating_FY24-25,operating_FY25-26,operating_FY26-27
1,San Benito Council of Governments,San Benito Council of Governments,Transit Modernization and Next Gen Infrastruct...,TIRCP,2328990.0,2337345.0,,,,,,
2,San Benito Council of Governments,San Benito Council of Governments,Transit Modernization and Next Gen Infrastruct...,ZETCP,348002.0,195221.0,195221.0,195221.0,,,,
3,San Benito Council of Governments,San Benito Council of Governments,Reinstate Transit Operations,TIRCP,,,,,1000000.0,1000000.0,,
4,San Benito Council of Governments,San Benito Council of Governments,Pilot Program- Express Bus,TIRCP,,,,,239281.0,239281.0,,
5,San Benito Council of Governments,San Benito Council of Governments,Administration,TIRCP,,,,,40801.0,40801.0,,


In [23]:
row_drops = [
    1,
    2,
    3,
    4,
    9,
    10,
    11,
]
cleaned_fund_request["san_diego_mts_fund_request.xlsx"] = cleaned_fund_request["san_diego_mts_fund_request.xlsx"].drop(row_drops)

In [24]:
cleaned_fund_request["san_diego_mts_fund_request.xlsx"]

Unnamed: 0,rtpa,implementing agenc-y/-ies,project,fund source,capital_FY23-24,capital_FY24-25,capital_FY25-26,capital_FY26-27,operating_FY23-24,operating_FY24-25,operating_FY25-26,operating_FY26-27
12,San Diego Metropolitan Transit System,MTS,Electrification of the Imperial Avenue Division,ZETCP,10126000.0,,,,,,,
13,San Diego Metropolitan Transit System,MTS,Electrification of the Kearny Mesa Division,ZETCP,5434000.0,,,,,,,
14,San Diego Metropolitan Transit System,MTS,Electrification of the East County Division,ZETCP,1705263.0,9685392.0,4842696.0,,,,,
15,San Diego Metropolitan Transit System,MTS,"Electrification of the South Bay Division, Pha...",ZETCP,0.0,,4842696.0,9685392.0,,,,
16,San Diego Metropolitan Transit System,MTS,Orange Line Improvement Project,TIRCP,26000000.0,,,,,,,
17,San Diego Metropolitan Transit System,MTS,Security Enhancements,TIRCP,,,,,5000000.0,3500000.0,3500000.0,3500000.0
18,San Diego Metropolitan Transit System,MTS,Trolley Service Enhancements,TIRCP,,,,,8700000.0,11000000.0,11000000.0,11000000.0
19,San Diego Metropolitan Transit System,MTS,Bus Service Enhancements,TIRCP,,,,,6000000.0,12000000.0,12000000.0,12000000.0
20,San Diego Metropolitan Transit System,MTS,Iris Rapid Operations,TIRCP,,,,,7000000.0,4000000.0,4000000.0,4000000.0
21,San Diego Metropolitan Transit System,MTS,"Otay Mesa (CBX, etc.) Service Improvements",TIRCP,,,,,500000.0,1500000.0,1500000.0,1500000.0


In [25]:
cleaned_fund_request["sierra_fund_request.xlsx"] = cleaned_fund_request["sierra_fund_request.xlsx"].drop(list(range(24,32)))

In [26]:
cleaned_fund_request["sierra_fund_request.xlsx"]


Unnamed: 0,rtpa,implementing agenc-y/-ies,project,fund source,capital_FY23-24,capital_FY24-25,capital_FY25-26,capital_FY26-27,operating_FY23-24,operating_FY24-25,operating_FY25-26,operating_FY26-27
0,Sierra County Transportation Commission,Sierra County Transportation Commission,Bus Procurement,ZETCP (GGRF),9408.2078,9836.0,9836.0,9836.0,,,,
1,Sierra County Transportation Commission,Sierra County Transportation Commission,Bus Procurement,ZETCP (PTA),8124.7922,,,,,,,
2,Sierra County Transportation Commission,Sierra County Transportation Commission,Bus Procurement,TIRCP,,260684.242384,,,,,,
3,Sierra County Transportation Commission,Sierra County Transportation Commission,Reduced Fares,TIRCP,,,,,63017.079713,,,
4,Sierra County Transportation Commission,Sierra County Transportation Commission,Operations,TIRCP,,,,,375165.920287,202910.757616,,
5,Sierra County Transportation Commission,Sierra County Transportation Commission,Administration,TIRCP,,,,,25000.0,,,


In [27]:
cleaned_fund_request["nevada_fund_request.xlsx"] = cleaned_fund_request["nevada_fund_request.xlsx"].drop([8,9])

In [28]:
cleaned_fund_request["nevada_fund_request.xlsx"]

Unnamed: 0,rtpa,implementing agenc-y/-ies,project,fund source,capital_FY23-24,capital_FY24-25,capital_FY25-26,capital_FY26-27,operating_FY23-24,operating_FY24-25,operating_FY25-26,operating_FY26-27
0,NCTC,Town of Truckee,Keep Truckee Moving: Expansion and Electrifica...,TIRCP,370000.0,,,,,,,
1,NCTC,Nevada County,Zero Emission Bus Transition & Microgrid EV Re...,TIRCP,1486685.0,,,,,,,
2,NCTC,Nevada County,Zero Emission Bus Transition & Microgrid EV Re...,ZETCP (GGRF),297576.0,311093.0,,,,,,
3,NCTC,Nevada County,Zero Emission Bus Transition & Microgrid EV Re...,ZETCP (PTA),133646.0,,,,,,,
4,NCTC,NCTC,SB 125 Program Administration,ZETCP (PTA),,,,,123336.0,,,


In [29]:
cleaned_fund_request["plumas_fund_request.xlsx"]= cleaned_fund_request["plumas_fund_request.xlsx"].drop(list(range(10,14)))

In [30]:
cleaned_fund_request["plumas_fund_request.xlsx"]

Unnamed: 0,rtpa,implementing agenc-y/-ies,project,fund source,capital_FY23-24,capital_FY24-25,capital_FY25-26,capital_FY26-27,operating_FY23-24,operating_FY24-25,operating_FY25-26,operating_FY26-27
1,Plumas County Transportation Commission,Plumas County Transportation Commission,Arlington Park and Ride,TIRCP,614200.0,,,,,,,
2,Plumas County Transportation Commission,Plumas County Transportation Commission,Bus Shelters,TIRCP,250000.0,,,,,,,
3,Plumas County Transportation Commission,Plumas County Transportation Commission,Bus Matching Funds,TIRCP,100520.0,,,,,,,
4,Plumas County Transportation Commission,Plumas County Transportation Commission,Operating Expenses and Free Fares,TIRCP,,,,,292543.0,1259680.0,,
5,Plumas County Transportation Commission,Plumas County Transportation Commission,Battery Electric Buses and Charging Infrastruc...,ZETCP (GGRF),61421.0,64211.0,64211.0,64211.0,,,,
6,Plumas County Transportation Commission,Plumas County Transportation Commission,Battery Electric Buses and Charging Infrastruc...,ZETCP (PTA),53042.0,,,,,,,


In [31]:
cleaned_fund_request["humboldt_fund_request.xlsx"]

Unnamed: 0,rtpa,implementing agenc-y/-ies,project,fund source,capital_FY23-24,capital_FY24-25,capital_FY25-26,capital_FY26-27,operating_FY23-24,operating_FY24-25,operating_FY25-26,operating_FY26-27
0,Humboldt County Association of Governments,Humboldt Transit Authority,Maintain Existing Service,TIRCP,,,,,772834.0,1910431.32,2971955.64,7415199
1,Humboldt County Association of Governments,Humboldt Transit Authority,Maintain Existing Service,ZETCP (PTA),,,,,342326.0,0,0,0
2,Humboldt County Association of Governments,Humboldt Transit Authority,Maintain Existing Service,ZETCP (GGRF),,,,,391693.0,432746.99,432746.99,432747
3,Humboldt County Association of Governments,Arcata and Mad River Transit System,Operations,TIRCP,,,,,204188.0,413152,291131,-
4,Humboldt County Association of Governments,Arcata and Mad River Transit System,Operations,ZETCP (GGRF),,,,,51028.06,30084.02,30084.02,30084.02
5,Humboldt County Association of Governments,Yurok Tribal Transportation System,Operations,TIRCP,,,,,150000.0,150000,-,-
6,Humboldt County Association of Governments,HCAOG,RTPA Administration,ZETCP (PTA),,,,,40000.0,-,-,-


# Test of concat all the dictionary dataframe vales

In [32]:
all_fund_request = pd.concat(cleaned_fund_request.values(), ignore_index=True)

In [33]:
display(
    all_fund_request.shape,
    type(all_fund_request),
    all_fund_request.columns
)

(173, 12)

pandas.core.frame.DataFrame

Index(['rtpa', 'implementing agenc-y/-ies', 'project', 'fund source',
       'capital_FY23-24', 'capital_FY24-25', 'capital_FY25-26',
       'capital_FY26-27', 'operating_FY23-24', 'operating_FY24-25',
       'operating_FY25-26', 'operating_FY26-27'],
      dtype='object')

In [34]:
all_fund_request["rtpa"].value_counts()

SLOCOG                                                      27
Merced County Association of Governments (MCAG)             22
Kings County Association of Governments                     12
San Diego Metropolitan Transit System                       12
Placer County Transportation Planning Agency (PCTPA)        11
Madera County Transportation Commission                     10
Humboldt County Association of Governments                   7
Sierra County Transportation Commission                      6
Shasta Regional Transportation Agency                        6
Plumas County Transportation Commission                      6
Tehama County Transportation Commission                      5
San Benito Council of Governments                            5
RCTC                                                         5
NCTC                                                         5
Alpine County Transportation Commission                      4
Mendocino Council of Governments                       

In [35]:
all_fund_request[all_fund_request["rtpa"] == "pta"]

Unnamed: 0,rtpa,implementing agenc-y/-ies,project,fund source,capital_FY23-24,capital_FY24-25,capital_FY25-26,capital_FY26-27,operating_FY23-24,operating_FY24-25,operating_FY25-26,operating_FY26-27


In [36]:
all_fund_request[all_fund_request["implementing agenc-y/-ies"] == "NCTC"]

Unnamed: 0,rtpa,implementing agenc-y/-ies,project,fund source,capital_FY23-24,capital_FY24-25,capital_FY25-26,capital_FY26-27,operating_FY23-24,operating_FY24-25,operating_FY25-26,operating_FY26-27
85,NCTC,NCTC,SB 125 Program Administration,ZETCP (PTA),,,,,123336.0,,,


In [37]:
# sum all the "capital columns"
#all_fund_request[["capital_FY23-24":"capital_FY26-27"]].sum()

In [38]:
#sum all the "operations columns"

In [39]:
all_fund_request.groupby("rtpa").agg({
    "capital_FY23-24": "sum",
    "operating_FY23-24": "sum"
})

Unnamed: 0_level_0,capital_FY23-24,operating_FY23-24
rtpa,Unnamed: 1_level_1,Unnamed: 2_level_1
Alpine County Transportation Commission,367381.0,0.0
Amador County Transportation Commission,100000.0,0.0
Calaveras County Council of Goverments,2815143.0,0.0
DNLTC,1825253.0,0.0
El Dorado County Transportation Commission,9849627.0,208011.0
Humboldt County Association of Governments,0.0,1952069.06
Kings County Association of Governments,8709876.0,100000.0
Lake County/City Council of Governments,4077103.0,0.0
Los Angeles County Metropolitan Transportation Authority,618145878.0,0.0
Madera County Transportation Commission,8254231.0,188112.0


In [40]:
all_fund_request.pivot_table(
    values = ["capital_FY23-24", "operating_FY23-24"],
    #columns= ["capital_FY23-24", "operating_FY23-24"],
    index = "rtpa",
    aggfunc = "sum",
    margins = True,
    margins_name = "Grand Total"
)

Unnamed: 0_level_0,capital_FY23-24,operating_FY23-24
rtpa,Unnamed: 1_level_1,Unnamed: 2_level_1
Alpine County Transportation Commission,367381.0,0.0
Amador County Transportation Commission,100000.0,0.0
Calaveras County Council of Goverments,2815143.0,0.0
DNLTC,1825253.0,0.0
El Dorado County Transportation Commission,9849627.0,208011.0
Humboldt County Association of Governments,0.0,1952069.06
Kings County Association of Governments,8709876.0,100000.0
Lake County/City Council of Governments,4077103.0,0.0
Los Angeles County Metropolitan Transportation Authority,618145878.0,0.0
Madera County Transportation Commission,8254231.0,188112.0


# TEST of Melting the dataframe

In [41]:
id_vars= [
    'rtpa',
 'implementing agenc-y/-ies',
 'project',
 'fund source',
]
val_vars = [
    'capital_FY23-24',
 'capital_FY24-25',
 'capital_FY25-26',
 'capital_FY26-27',
 'operating_FY23-24',
 'operating_FY24-25',
 'operating_FY25-26',
 'operating_FY26-27'
]

melt = all_fund_request.melt(
    id_vars = id_vars,
    value_vars = val_vars,
    var_name = "capital/operation fy",
    value_name = "fund amount",
    ignore_index = True)

In [42]:
display(
    type(melt),
    melt.shape,
    melt.dtypes,
    melt.head()
)

pandas.core.frame.DataFrame

(1384, 6)

rtpa                         object
implementing agenc-y/-ies    object
project                      object
fund source                  object
capital/operation fy         object
fund amount                  object
dtype: object

Unnamed: 0,rtpa,implementing agenc-y/-ies,project,fund source,capital/operation fy,fund amount
0,Alpine County Transportation Commission,Alpine County Transportation Commission,Transit Facility Conversion Project,TIRCP,capital_FY23-24,360641.0
1,Alpine County Transportation Commission,Alpine County Transportation Commission,Transit Facility Conversion Project,TIRCP,capital_FY23-24,
2,Alpine County Transportation Commission,Alpine County Transportation Commission,Transit Facility Conversion Project,ZETCP (GGRF),capital_FY23-24,3616.684
3,Alpine County Transportation Commission,Alpine County Transportation Commission,Transit Facility Conversion Project,ZETCP (PTA),capital_FY23-24,3123.316
4,Amador County Transportation Commission,Amador County Transportation Commission,,,capital_FY23-24,


In [43]:
# splitting the cap/operations columns

melt[["project type", "fiscal year"]] = melt["capital/operation fy"].str.split('_FY', expand = True)

In [44]:
display(
    melt.columns,
    melt.head()
)

Index(['rtpa', 'implementing agenc-y/-ies', 'project', 'fund source',
       'capital/operation fy', 'fund amount', 'project type', 'fiscal year'],
      dtype='object')

Unnamed: 0,rtpa,implementing agenc-y/-ies,project,fund source,capital/operation fy,fund amount,project type,fiscal year
0,Alpine County Transportation Commission,Alpine County Transportation Commission,Transit Facility Conversion Project,TIRCP,capital_FY23-24,360641.0,capital,23-24
1,Alpine County Transportation Commission,Alpine County Transportation Commission,Transit Facility Conversion Project,TIRCP,capital_FY23-24,,capital,23-24
2,Alpine County Transportation Commission,Alpine County Transportation Commission,Transit Facility Conversion Project,ZETCP (GGRF),capital_FY23-24,3616.684,capital,23-24
3,Alpine County Transportation Commission,Alpine County Transportation Commission,Transit Facility Conversion Project,ZETCP (PTA),capital_FY23-24,3123.316,capital,23-24
4,Amador County Transportation Commission,Amador County Transportation Commission,,,capital_FY23-24,,capital,23-24


In [45]:
# checking fund amounts for any non-int values 
melt["fund amount"].unique()

array([360641.0, nan, 3616.6839999999997, 3123.316, 100000.0, 2576611.0,
       238532.0, 500000.0, 1175501.0, 69395.0, 80357.0, 9085857.0,
       242312.0, 521458.0, 6849293.0, 329561.0, 295818.0, 1133408.0,
       101796.0, 498650905.0, 119494973.0, 3706510.0, 87574.0, 198860.0,
       84159.0, 8254231.0, 108201.0, 42539.66, 49259.34, 4848229.0,
       123518.0, 271142.0, 110636.0, 104561, 696393, 722403.0645999999,
       836515.9354, 1000000, 0, 2080000, 6400000, 200000, 370000.0,
       1486685.0, 297576.0, 133646.0, 10000000.0, 400000.0, 92109.0,
       501942.0, 1019544.0, 3354086.0, 2510740.0, 614200, 250000, 100520,
       61421, 53042, 63382700, 4003053, 7956643, 2868594, 60000000,
       2328990.0, 348002.0, 10126000, 5434000, 1705263, 26000000,
       943316.0, 2050000.0, 9408.2078, 8124.7922, 124000.0, 280000.0,
       395000.0, 1778000.0, 1400000.0, 4000000.0, 387000.0, 375000.0,
       600000.0, 422000.0, 1553000.0, 1787000.0, 2000000.0, 166100.0,
       81151.0, 300000.

In [46]:
# updates the `-` string to zero.
melt.loc[(melt["rtpa"] == "Humboldt County Association of Governments") & (melt["fund amount"] == "-"),"fund amount"] = 0

In [47]:
melt[melt["rtpa"] == "Humboldt County Association of Governments"].sample(5)

Unnamed: 0,rtpa,implementing agenc-y/-ies,project,fund source,capital/operation fy,fund amount,project type,fiscal year
537,Humboldt County Association of Governments,Humboldt Transit Authority,Maintain Existing Service,ZETCP (GGRF),capital_FY26-27,,capital,26-27
1054,Humboldt County Association of Governments,Humboldt Transit Authority,Maintain Existing Service,TIRCP,operating_FY25-26,2971955.64,operating,25-26
885,Humboldt County Association of Governments,Arcata and Mad River Transit System,Operations,ZETCP (GGRF),operating_FY24-25,30084.02,operating,24-25
535,Humboldt County Association of Governments,Humboldt Transit Authority,Maintain Existing Service,TIRCP,capital_FY26-27,,capital,26-27
539,Humboldt County Association of Governments,Arcata and Mad River Transit System,Operations,ZETCP (GGRF),capital_FY26-27,,capital,26-27


In [48]:
# check for NaNs again
melt["fund amount"].isna().sum()

1029

In [49]:
melt["fund amount"] = melt["fund amount"].fillna(0).astype("int64")

In [50]:
melt["fund amount"].isna().sum()

0

In [56]:
melt["fund source"].value_counts()

TIRCP           784
ZETCP (GGRF)    248
ZETCP (PTA)     248
ZETCP            40
CMAQ             16
Measure V         8
5339              8
SGR               8
Farebox           8
5307              8
Name: fund source, dtype: int64

In [52]:
melt.dtypes

rtpa                         object
implementing agenc-y/-ies    object
project                      object
fund source                  object
capital/operation fy         object
fund amount                   int64
project type                 object
fiscal year                  object
dtype: object

# Draft Aggregations

In [53]:
melt.groupby(["project type"]).agg({
    "fund amount": "sum",
}).reset_index()


Unnamed: 0,project type,fund amount
0,capital,1581375207
1,operating,254127295


In [60]:
melt.groupby(["fiscal year","project type"]).agg({
    "fund amount": "sum",
}).reset_index()


Unnamed: 0,fiscal year,project type,fund amount
0,23-24,capital,902467777
1,23-24,operating,38154526
2,24-25,capital,640693770
3,24-25,operating,47629226
4,25-26,capital,19826278
5,25-26,operating,53440635
6,26-27,capital,18387382
7,26-27,operating,114902908


In [61]:
melt.groupby(["fund source"]).agg({
    "fund amount": "sum",
        "rtpa": "nunique"
})

Unnamed: 0_level_0,fund amount,rtpa
fund source,Unnamed: 1_level_1,Unnamed: 2_level_1
5307,400000,1
5339,600000,1
CMAQ,2300000,1
Farebox,400000,1
Measure V,500000,1
SGR,620000,1
TIRCP,1564557904,24
ZETCP,47255104,2
ZETCP (GGRF),205870265,21
ZETCP (PTA),12999229,18


In [57]:
melt.groupby(["rtpa"]).agg({
    "fund amount": "sum",
    "project": "nunique"
})

Unnamed: 0_level_0,fund amount,project
rtpa,Unnamed: 1_level_1,Unnamed: 2_level_1
Alpine County Transportation Commission,739517,1
Amador County Transportation Commission,175000,0
Calaveras County Council of Goverments,5798936,1
DNLTC,3756248,2
El Dorado County Transportation Commission,10057638,2
Humboldt County Association of Governments,16492428,3
Kings County Association of Governments,18204049,9
Lake County/City Council of Governments,4077103,2
Los Angeles County Metropolitan Transportation Authority,1173924082,3
Madera County Transportation Commission,18811247,5


In [66]:
melt[melt["rtpa"].str.contains("SLOCOG")]["project"].nunique()

24