In [1]:
import altair as alt
import pandas as pd
from IPython.display import HTML, Image, Markdown, display, display_html

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

GCS_PATH = "gs://calitp-analytics-data/data-analyses/sb125/fund_split/"

In [39]:
# move filter location to the top

display(HTML(        """<style>form.vega-bindings {  position: absolute;  right: 0px;  top: 0px;}</style>"""))

# SB125 Fund Split Analysis

## Question:
- How did RTAs split SB125 funds between operations and capital?

## Methodology:
- upload all avilable `SB125 fund request template` files to gcs
- examine all files for consistencies:
    - come with cleaning plan for inconsistent examples (files withot capital/operating columns)
- concat all rows across all files


## Notes:
- Some RTPAs did not submit a `SB125 fund request template.xlsx` file, but instead included an equivilent file their allocation package
- There are ~30 files to analyze, most of which need to be uniquely cleaned. All cleaning steps are done in the `fund_split.py` script and saved to GCS

# Read in Melt DF parquet from GCS

In [2]:
all_melt = pd.read_parquet(f"{GCS_PATH}all_fund_requests_melt.parquet")

In [3]:
display(all_melt.shape, all_melt.dtypes)

(2296, 8)

rtpa                         object
implementing agenc-y/-ies    object
project                      object
fund source                  object
capital/operation fy         object
fund amount                   int64
project type                 object
fiscal year                  object
dtype: object

**Altair Data Types**

"*:Q" - Quantitative (quantity, numerical numbers)

"*:T" - Temporal (dates and times)

"*:O" - Ordinal (ranked order, small-med-large, 

"*:N" - Nominal (categorical)

In [4]:
by_source = (
    all_melt.groupby(["fund source"])
    .agg({"fund amount": "sum", "rtpa": "nunique"})
    .reset_index()
)
by_type = (
    all_melt.groupby(["project type"])
    .agg(
        {
            "fund amount": "sum",
        }
    )
    .reset_index()
)
by_year = (
    all_melt.groupby(["fiscal year", "project type"])
    .agg({"fund amount": "sum"})
    .reset_index()
)
by_rtpa = (
    all_melt.groupby(["rtpa", "project type", "fiscal year"])
    .agg({"fund amount": "sum"})
    .reset_index()
)

by_agency = (
    all_melt.groupby(["implementing agenc-y/-ies", "project type", "fiscal year"])
    .agg({"fund amount":"sum"})
).reset_index()

In [5]:
# aggregate by rtpa, agency, project type and FY
rtpa_group = (
    all_melt.groupby(
        ["rtpa", "implementing agenc-y/-ies", "project type", "fiscal year"]
    )
    .agg({"fund amount": "sum"})
    .reset_index()
)

# Main Charts

## overall summary charts

In [6]:
#Overall summary charts

stack_chart = (
    alt.Chart(by_year)
    .mark_bar(point=True)
    .encode(
        y="fiscal year",
        x="fund amount",
        color="project type",
        tooltip=["project type", "fund amount"],
    )
    .properties(
        title="SB125 allocation funds by capital/operating expenses, by fiscal year",
        width=600,
        height=300,
    )
)

stack_text = stack_chart.mark_text(
    align="left",
    dx=-10,
    dy=10
).encode(
    text="fund amount", color=alt.value("black")
)

overall_chart = (
    alt.Chart(by_type)
    .mark_bar(point=True)
    .encode(
        y="project type",
        x="fund amount",
        color="project type",
        tooltip=["project type", "fund amount"],
    )
    .properties(
        title="Overall split of SB125 allocation funds by capital/operating expenses",
        width=600,
        height=300,
    )
)

overall_text = overall_chart.mark_text(
    align="left",
    dx=0,
    dy=10
).encode(
    text="fund amount", color=alt.value("black")
)

source_chart = (
    alt.Chart(by_source)
    .mark_bar(point=True)
    .encode(
        y="fund source",
        x="fund amount",
        # color="project type",
        tooltip=["fund source", "fund amount"],
    )
    .properties(title="Allocation funds by funding source", width=600, height=300)
)

source_text = source_chart.mark_text(
    align="left",
    dx=0,
    dy=10
).encode(
    text="fund amount", color=alt.value("black")
)


display(
    overall_chart + overall_text,
    stack_chart + stack_text,
    source_chart + source_text,
)

## mini charts of agencies, with drop down menu for RTPA.
- per katrina
- in each mini chart
    - stacked bar chart of capX/opX funds, for each FY

In [40]:
#initial bar chart
rtpa_bar_chart_2 = alt.Chart(rtpa_group).mark_line(point=True).encode(
    alt.Y("fund amount"),
    alt.X("fiscal year"),
    color="project type",
    #tooltip=["rtpa","implementing agenc-y/-ies","project type", "fund amount", "fiscal year"]
)

# create drop down

## list of rtpas
rtpa_list = list(rtpa_group["rtpa"].unique())

## actual drop down mechanism
rtpa_dropdown = alt.binding_select(
    options = rtpa_list,
    name= "Select RTPA"
)

rtpa_selector = alt.selection_point(fields=['rtpa'], value="VCTC", bind=rtpa_dropdown)

# data label text
rtpa_chart_text_2 = rtpa_bar_chart_2.mark_text(
    align="center",
    dx=0,
    dy=-10,
    
    ).encode(text="fund amount:N", color=alt.value("black"))

# combine bar and text charts
rtpa_chart_combo_2 = (rtpa_bar_chart_2 + rtpa_chart_text_2).properties(
    title="Implementing agency funding split",
    width=300,
    height=150
)

# add facets by agency
rtpa_facet_2 = rtpa_chart_combo_2.add_params(
    rtpa_selector
).facet(
    facet=alt.Facet("implementing agenc-y/-ies", title="SB 125 Funding by capital/operating cost by fiscal year, by agencies in RTPAs"), columns=3
).resolve_scale(
    x="independent", y="independent"
).transform_filter(
    rtpa_selector
)

# render chart
rtpa_facet_2

## side-by-side charts comparing operating and capital expesnes for agencies in an RTPA
- with drop down for selectable RTPA

In [77]:
# list of RTPAs
rtpa_list = list(rtpa_group["rtpa"].unique())

#selectors
rtpa_dropdown = alt.binding_select(
    options = rtpa_list,
    name= "Select RTPA"
)

rtpa_selector = alt.selection_point(
    fields=['rtpa'], value="VCTC", bind=rtpa_dropdown
)

#opx chart
opx_chart = alt.Chart(rtpa_group[rtpa_group['project type']=="operating"], title="Operating Funds").mark_bar().encode(
    alt.Y("implementing agenc-y/-ies").title("Agency"),
    alt.X("fund amount"),
    alt.YOffset("fiscal year"),
    alt.Color("implementing agenc-y/-ies"),
    tooltip=["fund amount", "fiscal year"]
).add_params(
    rtpa_selector
).transform_filter(
    rtpa_selector
).properties(
        width=400,
        height=400,
).resolve_scale(
    x="shared", y="independent"
)

opx_text = opx_chart.mark_text(
    align="left",
    dx=0,
    dy=0
).encode(
    text="fund amount", color=alt.value("black")
)


opx_chart_text = (opx_chart + opx_text)


#capx chart
capx_chart = opx_chart.properties(
    data=rtpa_group[rtpa_group['project type']=="capital"], title="Capital Funds"
).transform_filter(
    rtpa_selector
)

capx_text = capx_chart.mark_text(
    align="left",
    dx=0,
    dy=0
).encode(
    text="fund amount", color=alt.value("black")
)

capx_chart_text = (capx_chart + capx_text)

opx_capx_chart = alt.hconcat(
    opx_chart, capx_chart
).resolve_scale(
    x="shared", y="independent"
).properties(
    title=alt.TitleParams(
        text="RTPA split of Operating and Capital Funds Requested by Agencies",
        subtitle="Separated by Fiscal Year"
    )
)


opx_capx_chart


In [54]:
capx_chart + capx_chart.mark_text()

SchemaValidationError: Multiple errors were found.

Error 1: `LayerChart` has no parameter named 'mark'

    Existing parameter names are:
    layer        data          height    projection   usermeta   
    autosize     datasets      name      resolve      view       
    background   description   padding   title        width      
    config       encoding      params    transform               

    See the help for `LayerChart` to read the full description of these parameters

Error 2: `LayerChart` has no parameter named 'mark'

    Existing parameter names are:
    layer        data          height    projection   usermeta   
    autosize     datasets      name      resolve      view       
    background   description   padding   title        width      
    config       encoding      params    transform               

    See the help for `LayerChart` to read the full description of these parameters

alt.LayerChart(...)

# Draft Aggregations

## simple bar chart function with altair

In [9]:
def make_bar(data, x_axis, y_axis):
    chart = (
        alt.Chart(data)
        .mark_bar()
        .encode(
            x=x_axis,
            y=y_axis,
            # color= color_val,
        )
        .properties(width="container", height=400)
    )
    text = chart.mark_text(align="center", baseline="middle", dx=3).encode(
        text="fund amount"
    )
    return chart + text

In [10]:
make_bar(
    by_source[
        by_source["fund source"].isin(["TIRCP", "ZETCP", "ZETCP (GGRF)", "ZETCP (PTA)"])
    ],
    y_axis="fund amount",
    x_axis="fund source",
)

## Split of capx/ops by RTPA

In [11]:
rtpa_split = (
    alt.Chart(
        by_rtpa
    )
    .mark_bar(point=True)
    .encode(
        x=alt.X("rtpa:N", sort="-y"),
        xOffset="fiscal year:O",
        y="fund amount:Q",
        color="project type:N",
        tooltip=["rtpa", "project type", "fund amount", "fiscal year"],
    )
    .properties(
        title="CapX/Ops funding split by RTPA by Fiscal Year",
        width="container",
        height=400,
    )
)
rtpa_split_2 = rtpa_split.add_params(rtpa_selector).transform_filter(rtpa_selector)

In [12]:
rtpa_split_2

In [13]:
metro_split = (
    alt.Chart(
        by_rtpa[
            by_rtpa["rtpa"]
            == "Los Angeles County Metropolitan Transportation Authority"
        ]
    )
    .mark_bar(point=True)
    .encode(
        x="rtpa",
        xOffset="fiscal year",
        y="fund amount",
        color="project type",
        detail="fiscal year",
        tooltip=["rtpa", "project type", "fund amount", "fiscal year"],
    )
    .properties(
        title="CapX/Ops funding split for METRO by Fiscal Year",
        width="container",
        height=400,
    )
)

In [14]:
metro_split

## Which RTPAs request the most/least funds?

In [15]:
# I want to see the top 10 RTPAs by total fund amount. then see their split of funds
top_list = (
    by_rtpa.groupby(["rtpa"])
    .agg({"fund amount": "sum"})
    .sort_values(by="fund amount", ascending=False)
    .reset_index()[1:7]
)

In [16]:
# I want to see the bottom 10 RTPAs by total fund amount. then see their split of funds

bottom_list = (
    by_rtpa.groupby(["rtpa"])
    .agg({"fund amount": "sum"})
    .sort_values(by="fund amount", ascending=False)
    .reset_index()
    .tail(23)
)

In [17]:
# just list of the rtpa names
top_rtpa = list(top_list["rtpa"])
bottom_rtpa = list(bottom_list["rtpa"])

In [18]:
print(
    f"""
The RTPA with the most requested funds is LA METRO.
Afterwards, these RPTAs requested the most funds.
{(top_list)}
"""
)


The RTPA with the most requested funds is LA METRO.
Afterwards, these RPTAs requested the most funds.
                                              rtpa  fund amount
1                                             OCTA    380916077
2            San Diego Metropolitan Transit System    283651605
3                                             RCTC    138210990
4                                         Kern COG    103242581
5                                             VCTC     98560329
6  Merced County Association of Governments (MCAG)     38377592



In [19]:
top_15 = (
    alt.Chart(by_rtpa[by_rtpa["rtpa"].isin(top_rtpa)])
    .mark_bar(point=True)
    .encode(
        x=alt.X("rtpa:N", sort="-y"),
        xOffset="fiscal year:N",
        y="fund amount:Q",
        color="project type",
        detail="fiscal year",
        tooltip=["rtpa", "project type", "fund amount", "fiscal year"],
    )
    .properties(
        title="CapX/Ops funding split for top 15 most funds requested by RTPA by Fiscal Year",
        width="container",
        height=600,
    )
    .resolve_scale(x="shared", y="independent")
)

In [20]:
bottom_15 = (
    alt.Chart(by_rtpa[by_rtpa["rtpa"].isin(bottom_rtpa)])
    .mark_bar(point=True)
    .encode(
        x=alt.X("rtpa:N", sort="-y"),
        xOffset="fiscal year:N",
        y="fund amount:Q",
        color="project type",
        detail="fiscal year",
        tooltip=["rtpa", "project type", "fund amount", "fiscal year"],
    )
    .properties(
        title="CapX/Ops funding split for bottom 15 most funds requested by RTPA by Fiscal Year",
        width="container",
        height=600,
    )
    .resolve_scale(x="shared", y="independent")
)

In [21]:
# now, can i use this list on `by_rtpa?
top_15

In [22]:
bottom_15

## Chart of each RTPA and their agencies, showing split of funds by OpX/CapX by FY

In [23]:
# melted DF for altair
rtpa_melt = rtpa_group.melt(
    id_vars=["rtpa", "implementing agenc-y/-ies", "fiscal year", "project type"],
    value_vars="fund amount",
    value_name="dollars",
)

In [24]:
display(
    all_melt.shape,
    all_melt.head(),
    
    rtpa_group.shape,
    rtpa_group.head(),
    
    rtpa_melt.shape,
    rtpa_melt.head()
)

(2296, 8)

Unnamed: 0,rtpa,implementing agenc-y/-ies,project,fund source,capital/operation fy,fund amount,project type,fiscal year
0,Alpine County Transportation Commission,Alpine County Transportation Commission,Transit Facility Conversion Project,TIRCP,capital_FY23-24,360641,capital,23-24
1,Alpine County Transportation Commission,Alpine County Transportation Commission,Transit Facility Conversion Project,TIRCP,capital_FY23-24,0,capital,23-24
2,Alpine County Transportation Commission,Alpine County Transportation Commission,Transit Facility Conversion Project,ZETCP (GGRF),capital_FY23-24,3616,capital,23-24
3,Alpine County Transportation Commission,Alpine County Transportation Commission,Transit Facility Conversion Project,ZETCP (PTA),capital_FY23-24,3123,capital,23-24
4,Amador County Transportation Commission,Amador Transit,,TIRCP,capital_FY23-24,100000,capital,23-24


(704, 5)

Unnamed: 0,rtpa,implementing agenc-y/-ies,project type,fiscal year,fund amount
0,Alpine County Transportation Commission,Alpine County Transportation Commission,capital,23-24,367380
1,Alpine County Transportation Commission,Alpine County Transportation Commission,capital,24-25,364575
2,Alpine County Transportation Commission,Alpine County Transportation Commission,capital,25-26,3781
3,Alpine County Transportation Commission,Alpine County Transportation Commission,capital,26-27,3781
4,Alpine County Transportation Commission,Alpine County Transportation Commission,operating,23-24,0


(704, 6)

Unnamed: 0,rtpa,implementing agenc-y/-ies,fiscal year,project type,variable,dollars
0,Alpine County Transportation Commission,Alpine County Transportation Commission,23-24,capital,fund amount,367380
1,Alpine County Transportation Commission,Alpine County Transportation Commission,24-25,capital,fund amount,364575
2,Alpine County Transportation Commission,Alpine County Transportation Commission,25-26,capital,fund amount,3781
3,Alpine County Transportation Commission,Alpine County Transportation Commission,26-27,capital,fund amount,3781
4,Alpine County Transportation Commission,Alpine County Transportation Commission,23-24,operating,fund amount,0


In [25]:
# Chart it!

#initial bar chart
rtpa_bar_chart = alt.Chart(rtpa_melt).mark_bar().encode(
    y=alt.Y("dollars"),
    #yOffset="rtpa",
    x=alt.X("project type"),
    xOffset="fiscal year",
    color="implementing agenc-y/-ies",
    #column="rtpa",
    #row="rtpa",
    tooltip=["rtpa","implementing agenc-y/-ies","project type", "dollars", "fiscal year"]
)

# data label text
rtpa_chart_text = rtpa_bar_chart.mark_text(
    align="center",
    dx=0,
    dy=-10,
    
    ).encode(text="dollars:N", color=alt.value("black"))

# combine bar and text charts
rtpa_chart_combo = (rtpa_bar_chart + rtpa_chart_text).properties(
    title="Implementing agency funding split",
    width=400
)

# add facets by RTPA
rtpa_facet = rtpa_chart_combo.facet(
    facet=alt.Facet("rtpa", title="SB 125 Funding by capital/operating cost by fiscal year, by agencies in RTPAs"), columns=2
).resolve_scale(
    x="shared", y="independent"
)

# render chart
rtpa_facet

### line chart of just capital funds, mini charts of each RTPA

In [26]:
# line chart, just capital


rtpa_line= alt.Chart(rtpa_melt[rtpa_melt["project type"] == "capital"]).mark_line(point=True).encode(
    y=alt.Y("dollars"),
    #yOffset="rtpa",
    x=alt.X("fiscal year"),
    #xOffset="project type",
    color="implementing agenc-y/-ies",
    #column="rtpa",
    #row="rtpa",
    tooltip=["rtpa","implementing agenc-y/-ies","project type", "dollars", "fiscal year"]
)

cap_line_text = rtpa_line.mark_text(
    align="center",
    dx=0,
    dy=-10,
    ).encode(text="dollars:N", color=alt.value("black")
          )

rtpa_capital = (rtpa_line + cap_line_text).properties(
    width=400
)

rtpa_capital_facet = rtpa_capital.facet(
    facet=alt.Facet("rtpa", title="Capital funds by agencies in RTPAs, by fiscal year"), columns=1
).resolve_scale(
    x="independent", y="independent"
)

                        
#rtpa_capital_facet

### line just of just operating funds, mini charts of each RTPA

In [27]:
# line chart, just operating


rtpa_op_line= alt.Chart(rtpa_melt[rtpa_melt["project type"] == "operating"]).mark_line(point=True).encode(
    alt.Y("dollars"),
    #yOffset="rtpa",
    alt.X("fiscal year"),
    #xOffset="project type",
    color="implementing agenc-y/-ies",
    #column="rtpa",
    #row="rtpa",
    tooltip=["rtpa","implementing agenc-y/-ies","project type", "dollars", "fiscal year"]
)

op_line_text = rtpa_op_line.mark_text(
    align="center",
    dx=0,
    dy=-10,
    ).encode(text="dollars:N", color=alt.value("black")
          )

rtpa_operating = (rtpa_op_line + op_line_text).properties(
    width=400
)

rtpa_operating_facet = rtpa_operating.facet(
    alt.Facet("rtpa", title="Operating funds by agencies in RTPAs, by fiscal year"), columns=1
).resolve_scale(
    x="independent", y="independent"
)

                        
#rtpa_operating_facet

## concat of just capital and just operating 

In [28]:
#rtpa_capital_facet | rtpa_operating_facet

## big single chart of all agencies fund amount, same color, fading from most to least

In [59]:
alt.Chart(by_agency).mark_bar(size=20).encode(
    alt.X("sum(fund amount):Q"),
    alt.YOffset("implementing agenc-y/-ies:N"),
    alt.Y("fiscal year:N"),
    #alt.Size("sum(fund amount):Q"),
    #alt.OpacityValue(0.4),
    alt.Color("fund amount", 
              legend=alt.Legend(orient='bottom', columns=8),
              #scale=alt.Scale(scheme='blues')
             ),
    tooltip=["implementing agenc-y/-ies:N"]
    #alt.Stroke("sum(fund amount):Q", ),
).properties(
    title="Total funding amount by Transit Agencies for each Fiscal Year",
    width="container",
    height=600
)


## RPTA to district?

In [30]:
# read in final_crosswalk.csv from ntd bucket
crosswalk = pd.read_excel("gs://calitp-analytics-data/data-analyses/sb125/fund_split/rtpa_to_county_to_district_crosswalk.xlsx", sheet_name="rtpa_county_district_crosswalk")

In [31]:
display(crosswalk.shape, crosswalk.columns)

(58, 3)

Index(['COUNTY', 'RTPA', 'DISTRICT'], dtype='object')

In [32]:
crosswalk

Unnamed: 0,COUNTY,RTPA,DISTRICT
0,Alameda,Metropolitan Transportation Commission,4
1,Alpine,Alpine County Local Transportation Commission,10
2,Amador,Amador County Transportation Commission,10
3,Butte,Butte County Association of Governments,3
4,Calaveras,Calaveras Council of Governments,10
5,Colusa,Colusa County Transportation Commission,3
6,Contra Costa,Metropolitan Transportation Commission,4
7,Del Norte,Del Norte Local Transportation Commission,1
8,El Dorado,El Dorado County Transportation Commission,3
9,Fresno,Fresno Council of Governments,6


## RTPA by size?
- tie in NTD ID to the agencies in the RTPAs to get VOMS, UPT?
- or by # of agencies in each RTPA?

In [33]:
ntd_id_rtpa = pd.read_csv("gs://calitp-analytics-data/data-analyses/ntd/ntd_id_rtpa_crosswalk.csv")

In [34]:
display(
    ntd_id_rtpa.shape,
    ntd_id_rtpa.columns
)

(117, 6)

Index(['NTD ID', 'Legacy NTD ID', 'Agency', 'UZA Name', 'RTPA_open_data',
       'RTPA'],
      dtype='object')

In [35]:
ntd_id_rtpa[ntd_id_rtpa["Agency"].str.contains("Los")]

Unnamed: 0,NTD ID,Legacy NTD ID,Agency,UZA Name,RTPA_open_data,RTPA
26,90147,9147,City of Los Angeles,"Los Angeles--Long Beach--Anaheim, CA",Southern California Association of Governments,Los Angeles County Metropolitan Transportation...
36,90154,9154,Los Angeles County Metropolitan Transportation...,"Los Angeles--Long Beach--Anaheim, CA",Southern California Association of Governments,Los Angeles County Metropolitan Transportation...


In [36]:
ntd_id_rtpa["RTPA_open_data"].value_counts()

Southern California Association of Governments          40
Metropolitan Transportation Commission                  27
Sacramento Area Council of Governments                  12
San Diego Association of Governments                     8
Tulare County Association of Governments                 4
Stanislaus Council of Governments                        4
San Joaquin Council of Governments                       4
San Luis Obispo Council of Governments                   3
Kern Council of Governments                              2
Santa Barbara County Association of Governments          2
Kings County Association of Governments                  2
Butte County Association of Governments                  2
Merced County Association of Governments                 1
Tahoe Regional Planning Agency                           1
Alpine County Local Transportation Commission            1
Shasta Regional Transportation Agency                    1
Fresno Council of Governments                           

In [37]:
ntd_id_rtpa["RTPA"].value_counts()

Metropolitan Transportation Commission                      27
Los Angeles County Metropolitan Transportation Authority    23
Sacramento Area Council of Governments                      12
San Diego Association of Governments                         8
Riverside County Transportation Commission                   5
Tulare County Association of Governments                     4
Ventura County Transportation Commission                     4
San Joaquin Council of Governments                           4
Stanislaus Council of Governments                            4
San Luis Obispo Council of Governments                       3
San Bernardino County Transportation Authority               3
Orange County Transportation Authority                       3
Kern Council of Governments                                  2
Santa Barbara County Association of Governments              2
Kings County Association of Governments                      2
Imperial County Transportation Commission              