## Visuals

In [16]:
import altair as alt

import numpy as np
import pandas as pd
from shared_utils import calitp_color_palette
from shared_utils import styleguide
from siuba import *

In [17]:
parquet = pd.read_parquet(
    "gs://calitp-analytics-data/data-analyses/grants/BlackCat_TIRCP_DLA.parquet",
    engine="auto",
)

In [18]:
parquet.head()

Unnamed: 0,locode,name,caltrans_district,BC_TIRCP_DLA_merge
0,6302.0,Humboldt Bay Harbor Recreation & Conservation ...,District 1: Eureka,DLA Only
1,6330.0,Willow Creek Community Services District,District 1: Eureka,DLA Only
2,5036.0,Trinidad,District 1: Eureka,DLA Only
3,5049.0,Ukiah,District 1: Eureka,DLA Only
4,5082.0,Willits,District 1: Eureka,DLA Only


In [19]:
#Aggregating
agg = (
    parquet.drop_duplicates()
    .groupby(["BC_TIRCP_DLA_merge", "name"])
    .agg({"locode": "max"})
    .drop( columns = ['locode']) 
)

In [20]:
agg

BC_TIRCP_DLA_merge,name
BlackCat Only,Alegria Community Living
BlackCat Only,Alpine County Community Development
BlackCat Only,Amador Transit
BlackCat Only,Angel View
BlackCat Only,Arc Bakersfield
...,...
"TIRCP, BlackCat and DLA",San Joaquin Regional Transit District
"TIRCP, BlackCat and DLA",San Mateo County Transit District
"TIRCP, BlackCat and DLA",Santa Clara Valley Transportation Authority
"TIRCP, BlackCat and DLA",Solano Transportation Authority


In [21]:
with pd.ExcelWriter("./Master_Agreement_Overlaps.xlsx") as writer:
    agg.to_excel(writer, sheet_name="Organizations", index=True)

In [22]:
parquet.name.nunique()

1168

In [23]:
summary = (
    parquet.BC_TIRCP_DLA_merge.value_counts()
    .to_frame()
    .reset_index()
    .rename(columns={"index": "Grants",
                    "BC_TIRCP_DLA_merge": "Count_of_Organizations"})
)

In [24]:
summary

Unnamed: 0,Grants,Count_of_Organizations
0,DLA Only,935
1,BlackCat Only,123
2,BlackCat and DLA,74
3,TIRCP and DLA,24
4,"TIRCP, BlackCat and DLA",13
5,TIRCP Only,11


In [25]:
legend = ['DLA Only', 'BlackCat Only', 'BlackCat and DLA', 'TIRCP Only', 'TIRCP, BlackCat, and DLA', 'TIRCP and BlackCat']

In [26]:
# Advanced bar chart: label the bars & re order the legend.
def labeling(word):
    # Add specific use cases where it's not just first letter capitalized
    LABEL_DICT = { "prepared_y": "Year",
              "dist": "District",
              "nunique":"Number of Unique",
              "project_no": "Project Number"}
    
    if (word == "mpo") or (word == "rtpa"):
        word = word.upper()
    elif word in LABEL_DICT.keys():
        word = LABEL_DICT[word]
    else:
        #word = word.replace('n_', 'Number of ').title()
        word = word.replace('unique_', "Number of Unique ").title()
        word = word.replace('_', ' ').title()
    
    return word

def base_bar(df):
    chart = alt.Chart(df).mark_bar()
    return chart


# Function
def make_bar(
    df, y_col, x_col, label_col, chart_title=""
):  # label_col is the column I want the bar graph to be labeled by.

    if chart_title == "":
        chart_title = (
            f"{labeling(x_col)} by {labeling(y_col)}"  # Allows me to customize title.
        )

    bar = base_bar(df)

    bar = bar.encode(
        x=alt.X(x_col, title=labeling(x_col)),
        y=alt.Y(
            y_col, title=labeling(y_col), sort=("-x")
        ),  # Sorts chart with X column from descending.
        color=alt.Color(
            y_col,
            scale=alt.Scale(
                domain=legend,  # List that specifies the order of the legend.
                range=calitp_color_palette.CALITP_CATEGORY_BRIGHT_COLORS,
            ),
        ),
    )
    # https://stackoverflow.com/questions/54015250/altair-setting-constant-label-color-for-bar-chart
    text = bar.mark_text(align="left", baseline="middle", color="black", dy=3).encode(
        text=label_col,
        # Set color here, because encoding for mark_text gets
        # superseded by alt.Color
        color=alt.value("black"),
    )

    chart = bar + text

    chart = (
        styleguide.preset_chart_config(chart)
        .properties(title=chart_title)
        .configure_axis(grid=False)
    )

    display(chart)

In [27]:
make_bar(summary, 'Grants','Count_of_Organizations','Count_of_Organizations', '# of Organizations by Grant')