In [1]:
import altair as alt
import pandas as pd
from IPython.display import Markdown, display, HTML

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

GCS_PATH = "gs://calitp-analytics-data/data-analyses/sb125/fund_split/"

# SB125 Fund Split Analysis

## Question:
- How did RTAs split SB125 funds between operations and capital?

## Methodology:
- upload all avilable `SB125 fund request template` files to gcs
- examine all files for consistencies:
    - come with cleaning plan for inconsistent examples (files withot capital/operating columns)
- concat all rows across all files


## Notes:
- Some RTPAs did not submit a `SB125 fund request template.xlsx` file, but instead included an equivilent file their allocation package
- There are ~30 files to analyze, most of which need to be uniquely cleaned. All cleaning steps are done in the `fund_split.py` script and saved to GCS

# Read in Melt DF parquet from GCS

In [2]:
all_melt = pd.read_parquet(f"{GCS_PATH}all_fund_requests_melt.parquet")

In [24]:
display(all_melt.shape, all_melt.columns)

(2296, 8)

Index(['rtpa', 'implementing agenc-y/-ies', 'project', 'fund source',
       'capital/operation fy', 'fund amount', 'project type', 'fiscal year'],
      dtype='object')

In [4]:
by_source = all_melt.groupby(["fund source"]).agg({"fund amount": "sum", "rtpa": "nunique"}).reset_index()
by_type = all_melt.groupby(["project type"]).agg({"fund amount": "sum",}).reset_index()
by_year = all_melt.groupby(["fiscal year", "project type"]).agg({"fund amount": "sum"}).reset_index()
by_rtpa = all_melt.groupby(["rtpa"]).agg({"fund amount": "sum", "project": "nunique"}).reset_index()


In [29]:
by_source.columns

Index(['fund source', 'fund amount', 'rtpa'], dtype='object')

# Charts

In [32]:
stack_chart = alt.Chart(by_year).mark_bar(point=True).encode(
    x="fiscal year", 
    y="fund amount", 
    color="project type",
    tooltip=["project type", "fund amount"],
).properties(width=600, height=400)


overall_chart = alt.Chart(by_type).mark_bar(point=True).encode(
    x="project type", 
    y="fund amount", 
    color="project type",
    tooltip=["project type", "fund amount"],
).properties(width=600, height=400)

source_chart = alt.Chart(by_type).mark_bar(point=True).encode(
    x="project type", 
    y="fund amount", 
    color="project type",
    tooltip=["project type", "fund amount"],
).properties(width=600, height=400)


display(
    Markdown("## Overall split of SB125 allocations funds by capital/operating expenses"),
    overall_chart,
    by_type,
    Markdown("## SB125 allocations funds by capital/operating expenses, by fiscal year"),
    stack_chart,
    #by_year.sort_values(by="project type")
)

## Overall split of SB125 allocations funds by capital/operating expenses

Unnamed: 0,project type,fund amount
0,capital,1855348721
1,operating,625654847


## SB125 allocations funds by capital/operating expenses, by fiscal year

# Draft Aggregations

In [28]:
def make_bar(data, x_axis, y_axis):
    chart = (
        alt.Chart(data)
        .mark_bar()
        .encode(
            x=x_axis,
            y=y_axis,
            #color= color_val,
        )
        .properties(width="container", height=400)
    )
    text = chart.mark_text(align="left", baseline="middle", dx=3).encode(
            text="fund amount"
    )
    return chart + text

In [45]:
make_bar(by_source[by_source["fund source"].isin(
    ["TIRCP", "ZETCP", "ZETCP (GGRF)", "ZETCP (PTA)"]
)], y_axis="fund amount", x_axis="fund source")