In [1]:
import altair as alt
import pandas as pd
from IPython.display import HTML, Markdown, display

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

GCS_PATH = "gs://calitp-analytics-data/data-analyses/sb125/fund_split/"

# SB125 Fund Split Analysis

## Question:
- How did RTAs split SB125 funds between operations and capital?

## Methodology:
- upload all avilable `SB125 fund request template` files to gcs
- examine all files for consistencies:
    - come with cleaning plan for inconsistent examples (files withot capital/operating columns)
- concat all rows across all files


## Notes:
- Some RTPAs did not submit a `SB125 fund request template.xlsx` file, but instead included an equivilent file their allocation package
- There are ~30 files to analyze, most of which need to be uniquely cleaned. All cleaning steps are done in the `fund_split.py` script and saved to GCS

# Read in Melt DF parquet from GCS

In [2]:
all_melt = pd.read_parquet(f"{GCS_PATH}all_fund_requests_melt.parquet")

In [3]:
display(all_melt.shape, list(all_melt.columns))

(2296, 8)

['rtpa',
 'implementing agenc-y/-ies',
 'project',
 'fund source',
 'capital/operation fy',
 'fund amount',
 'project type',
 'fiscal year']

In [4]:
by_source = (
    all_melt.groupby(["fund source"])
    .agg({"fund amount": "sum", "rtpa": "nunique"})
    .reset_index()
)
by_type = (
    all_melt.groupby(["project type"])
    .agg(
        {
            "fund amount": "sum",
        }
    )
    .reset_index()
)
by_year = (
    all_melt.groupby(["fiscal year", "project type"])
    .agg({"fund amount": "sum"})
    .reset_index()
)
by_rtpa = (
    all_melt.groupby(["rtpa", "project type", "fiscal year"])
    .agg({"fund amount": "sum"})
    .reset_index()
)

# Charts

In [5]:
stack_chart = (
    alt.Chart(by_year)
    .mark_bar(point=True)
    .encode(
        x="fiscal year",
        y="fund amount",
        color="project type",
        tooltip=["project type", "fund amount"],
    )
    .properties(
        title="SB125 allocation funds by capital/operating expenses, by fiscal year",
        width=600,
        height=400,
    )
)


overall_chart = (
    alt.Chart(by_type)
    .mark_bar(point=True)
    .encode(
        x="project type",
        y="fund amount",
        color="project type",
        tooltip=["project type", "fund amount"],
    )
    .properties(
        title="Overall split of SB125 allocation funds by capital/operating expenses",
        width=600,
        height=400,
    )
)

source_chart = (
    alt.Chart(by_source)
    .mark_bar(point=True)
    .encode(
        x="fund source",
        y="fund amount",
        # color="project type",
        tooltip=["fund source", "fund amount"],
    )
    .properties(title="Allocation funds by funding source", width=600, height=400)
)


display(
    overall_chart,
    by_type,
    stack_chart,
    by_year.sort_values(by="project type"),
    source_chart,
)

Unnamed: 0,project type,fund amount
0,capital,1855348721
1,operating,625654847


Unnamed: 0,fiscal year,project type,fund amount
0,23-24,capital,945242943
2,24-25,capital,812480147
4,25-26,capital,49092309
6,26-27,capital,48533322
1,23-24,operating,230807266
3,24-25,operating,169494114
5,25-26,operating,96177774
7,26-27,operating,129175693


# Draft Aggregations

## simple bar chart function with altair

In [6]:
def make_bar(data, x_axis, y_axis):
    chart = (
        alt.Chart(data)
        .mark_bar()
        .encode(
            x=x_axis,
            y=y_axis,
            # color= color_val,
        )
        .properties(width="container", height=400)
    )
    text = chart.mark_text(align="center", baseline="middle", dx=3).encode(
        text="fund amount"
    )
    return chart + text

In [7]:
make_bar(
    by_source[
        by_source["fund source"].isin(["TIRCP", "ZETCP", "ZETCP (GGRF)", "ZETCP (PTA)"])
    ],
    y_axis="fund amount",
    x_axis="fund source",
)

## Split of capx/ops by RTPA

In [8]:
rtpa_split = (
    alt.Chart(
        by_rtpa[
            by_rtpa["rtpa"]
            != "Los Angeles County Metropolitan Transportation Authority"
        ]
    )
    .mark_bar(point=True)
    .encode(
        x=alt.X("rtpa", sort="-y"),
        xOffset="fiscal year",
        y="fund amount",
        color="project type",
        tooltip=["rtpa", "project type", "fund amount", "fiscal year"],
    )
    .properties(
        title="CapX/Ops funding split by RTPA by Fiscal Year",
        width="container",
        height=400,
    )
)

In [9]:
rtpa_split

In [10]:
metro_split = (
    alt.Chart(
        by_rtpa[
            by_rtpa["rtpa"]
            == "Los Angeles County Metropolitan Transportation Authority"
        ]
    )
    .mark_bar(point=True)
    .encode(
        x="rtpa",
        xOffset="fiscal year",
        y="fund amount",
        color="project type",
        detail="fiscal year",
        tooltip=["rtpa", "project type", "fund amount", "fiscal year"],
    )
    .properties(
        title="CapX/Ops funding split for METRO by Fiscal Year",
        width="container",
        height=400,
    )
)

In [11]:
metro_split

## Which RTPAs request the most/least funds?

In [12]:
# I want to see the top 10 RTPAs by total fund amount. then see their split of funds
top_list = (
    by_rtpa.groupby(["rtpa"])
    .agg({"fund amount": "sum"})
    .sort_values(by="fund amount", ascending=False)
    .reset_index()[1:7]
)

In [13]:
# I want to see the bottom 10 RTPAs by total fund amount. then see their split of funds

bottom_list = (
    by_rtpa.groupby(["rtpa"])
    .agg({"fund amount": "sum"})
    .sort_values(by="fund amount", ascending=False)
    .reset_index()
    .tail(23)
)

In [14]:
# just list of the rtpa names
top_rtpa = list(top_list["rtpa"])
bottom_rtpa = list(bottom_list["rtpa"])

In [15]:
print(
    f"""
The RTPA with the most requested funds is LA METRO.
Afterwards, these RPTAs requested the most funds.
{(top_list)}
"""
)


The RTPA with the most requested funds is LA METRO.
Afterwards, these RPTAs requested the most funds.
                                              rtpa  fund amount
1                                             OCTA    380916077
2            San Diego Metropolitan Transit System    283651605
3                                             RCTC    138210990
4                                         Kern COG    103242581
5                                             VCTC     98560329
6  Merced County Association of Governments (MCAG)     38377592



In [16]:
top_15 = (
    alt.Chart(by_rtpa[by_rtpa["rtpa"].isin(top_rtpa)])
    .mark_bar(point=True)
    .encode(
        x=alt.X("rtpa:N", sort="-y"),
        xOffset="fiscal year:N",
        y="fund amount:Q",
        color="project type",
        detail="fiscal year",
        tooltip=["rtpa", "project type", "fund amount", "fiscal year"],
    )
    .properties(
        title="CapX/Ops funding split for top 15 most funds requested by RTPA by Fiscal Year",
        width="container",
        height=600,
    )
    .resolve_scale(x="shared", y="independent")
)

In [17]:
bottom_15 = (
    alt.Chart(by_rtpa[by_rtpa["rtpa"].isin(bottom_rtpa)])
    .mark_bar(point=True)
    .encode(
        x=alt.X("rtpa:N", sort="-y"),
        xOffset="fiscal year:N",
        y="fund amount:Q",
        color="project type",
        detail="fiscal year",
        tooltip=["rtpa", "project type", "fund amount", "fiscal year"],
    )
    .properties(
        title="CapX/Ops funding split for bottom 15 most funds requested by RTPA by Fiscal Year",
        width="container",
        height=600,
    )
    .resolve_scale(x="shared", y="independent")
)

In [18]:
# now, can i use this list on `by_rtpa?
top_15

In [19]:
bottom_15

## Chart of each RTPA and their agencies, showing split of funds by OpX/CapX by FY

In [20]:
# aggregate by rtpa, agency, project type and FY
rtpa_group = (
    all_melt.groupby(
        ["rtpa", "implementing agenc-y/-ies", "project type", "fiscal year"]
    )
    .agg({"fund amount": "sum"})
    .reset_index()
)

In [21]:
# melted DF for altair
rtpa_melt = rtpa_group.melt(
    id_vars=["rtpa", "implementing agenc-y/-ies", "fiscal year", "project type"],
    value_vars="fund amount",
    value_name="dollars",
)

In [28]:
# Chart it!

#initial bar chart
rtpa_bar_chart = alt.Chart(rtpa_melt).mark_bar().encode(
    y=alt.Y("dollars"),
    #yOffset="rtpa",
    x=alt.X("project type"),
    xOffset="fiscal year",
    color="implementing agenc-y/-ies",
    #column="rtpa",
    #row="rtpa",
    tooltip=["rtpa","implementing agenc-y/-ies","project type", "dollars", "fiscal year"]
)

# data label text
rtpa_chart_text = rtpa_bar_chart.mark_text(
    align="center",
    dx=0,
    dy=-10,
    
    ).encode(text="dollars:N", color=alt.value("black"))

# combine bar and text charts
rtpa_chart_combo = (rtpa_bar_chart + rtpa_chart_text).properties(
    title="Implementing agency funding split",
    width=400
)

# add facets by RTPA
rtpa_facet = rtpa_chart_combo.facet(
    facet=alt.Facet("rtpa", title="Implementing agency funding split"), columns=2
).resolve_scale(
    x="shared", y="independent"
)

# render chart
rtpa_facet

In [29]:
# line chart, just capital


rtpa_line= alt.Chart(rtpa_melt[rtpa_melt["project type"] == "capital"]).mark_line().encode(
    y=alt.Y("dollars"),
    #yOffset="rtpa",
    x=alt.X("fiscal year"),
    #xOffset="project type",
    color="implementing agenc-y/-ies",
    #column="rtpa",
    #row="rtpa",
    tooltip=["rtpa","implementing agenc-y/-ies","project type", "dollars", "fiscal year"]
)

cap_line_text = rtpa_line.mark_text(
    align="center",
    dx=0,
    dy=-10,
    ).encode(text="dollars:N", color=alt.value("black")
          )

rtpa_capital = (rtpa_line + cap_line_text).properties(
    title="Implementing agency funding split",
    width=400
)

rtpa_capital_facet = rtpa_capital.facet(
    facet=alt.Facet("rtpa", title="Capital funds by agencies in RTPAs, by fiscal year"), columns=2
).resolve_scale(
    x="independent", y="independent"
)

                        
rtpa_capital_facet

In [30]:
# line chart, just operating


rtpa_op_line= alt.Chart(rtpa_melt[rtpa_melt["project type"] == "operating"]).mark_line().encode(
    y=alt.Y("dollars"),
    #yOffset="rtpa",
    x=alt.X("fiscal year"),
    #xOffset="project type",
    color="implementing agenc-y/-ies",
    #column="rtpa",
    #row="rtpa",
    tooltip=["rtpa","implementing agenc-y/-ies","project type", "dollars", "fiscal year"]
).properties(
    title="Implementing agency funding split",
    width=400
)

op_line_text = rtpa_op_line.mark_text(
    align="center",
    dx=0,
    dy=-10,
    ).encode(text="dollars:N", color=alt.value("black")
          )

rtpa_operating = (rtpa_op_line + op_line_text).properties(
    title="Implementing agency funding split",
    width=400
)

rtpa_operating_facet = rtpa_capital.facet(
    facet=alt.Facet("rtpa", title="Operating funds by agencies in RTPAs, by fiscal year"), columns=2
).resolve_scale(
    x="independent", y="independent"
)

                        
rtpa_operating_facet