In [1]:
import altair as alt
import pandas as pd

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

GCS_PATH = "gs://calitp-analytics-data/data-analyses/sb125/fund_split/"

# SB125 Fund Split Analysis

## Question:
- How did RTAs split SB125 funds between operations and capital?

## Methodology:
- upload all avilable `SB125 fund request template` files to gcs
- examine all files for consistencies:
    - come with cleaning plan for inconsistent examples (files withot capital/operating columns)
- concat all rows across all files


## Notes:
- some RTPAs did not submit a `SB125 fund request template.xlsx` file, but instead included an quivilent file their allocation package

# Read in Melt DF parquet from GCS

In [2]:
all_melt = pd.read_parquet(f"{GCS_PATH}all_fund_requests_melt.parquet")

In [3]:
display(
    all_melt.shape,
    all_melt.columns,
    all_melt.sample(3)
)

(2296, 8)

Index(['rtpa', 'implementing agenc-y/-ies', 'project', 'fund source',
       'capital/operation fy', 'fund amount', 'project type', 'fiscal year'],
      dtype='object')

Unnamed: 0,rtpa,implementing agenc-y/-ies,project,fund source,capital/operation fy,fund amount,project type,fiscal year
589,Humboldt County Association of Governments,Humboldt Transit Authority,Maintain Existing Service,TIRCP,capital_FY25-26,0,capital,25-26
72,Merced County Association of Governments (MCAG),"The Bus, TJPA",Microtransit Services Expansion (Vehicles),TIRCP,capital_FY23-24,2080000,capital,23-24
351,Merced County Association of Governments (MCAG),"The Bus, TJPA",Westside O&M Facility,ZETCP (GGRF),capital_FY24-25,0,capital,24-25


# Draft Aggregations

In [4]:
def make_bar(data, x_axis, y_axis):
    chart = (
        alt.Chart(data)
        .mark_bar()
        .encode(
            x=x_axis,
            y=y_axis,
        )
    )
    return chart

In [5]:
by_type = (
    all_melt.groupby(["project type"])
    .agg(
        {
            "fund amount": "sum",
        }
    )
    .reset_index()
)

display(by_type)
make_bar(by_type, y_axis="project type", x_axis="fund amount")

Unnamed: 0,project type,fund amount
0,capital,1855348721
1,operating,625654847


In [6]:
by_year = (
    all_melt.groupby(["fiscal year", "project type"])
    .agg(
        {
            "fund amount": "sum",
        }
    )
    .reset_index()
)

display(by_year)
make_bar(
    by_year,
    y_axis="fiscal year",
    x_axis="fund amount",
)

Unnamed: 0,fiscal year,project type,fund amount
0,23-24,capital,945242943
1,23-24,operating,230807266
2,24-25,capital,812480147
3,24-25,operating,169494114
4,25-26,capital,49092309
5,25-26,operating,96177774
6,26-27,capital,48533322
7,26-27,operating,129175693


In [7]:
alt.Chart(by_year).mark_line(point=True).encode(
    x="fiscal year", y="fund amount", color="project type"
)

In [8]:
alt.Chart(by_year).mark_bar(point=True).encode(
    x="fiscal year", y="fund amount", color="project type"
)

In [9]:
by_source = (
    all_melt.groupby(["fund source"])
    .agg({"fund amount": "sum", "rtpa": "nunique"})
    .reset_index()
)

make_bar(by_source, y_axis="fund source", x_axis="fund amount")

In [10]:
by_rtpa = (
    all_melt.groupby(["rtpa"])
    .agg({"fund amount": "sum", "project": "nunique"})
    .reset_index()
)

make_bar(by_rtpa, "rtpa", "fund amount")