In [None]:
import warnings
warnings.filterwarnings("ignore")

import altair as alt
import altair_saver
import pandas as pd

from siuba import *
from IPython.display import Markdown

import clean_data
import _dla_utils
from shared_utils import styleguide
from shared_utils import calitp_color_palette as cp

alt.renderers.enable('altair_saver', fmts=['png'])

In [None]:
# Need to get a parameters cell
district = 7

In [None]:
display(Markdown(f"# District {district} Analysis"))

In [None]:
#df = _clean_data.make_clean_data()
df= pd.read_parquet("dla_df.parquet")

df = df>>filter(_.dist==district)

df_years = _dla_utils.count_all_years(df)
df_top = _dla_utils.find_top(df)

In [None]:
# Relabel the long df
# Filter by it, then relabel "value" column to be the variable
def filter_relabel(df, variable):
    col_name = variable.replace('_', ' ').title()
    df = (df[df.variable==variable]
          [["value", "count"]]
          .rename(columns = {"value": col_name})
         )
    
    display(df)

In [None]:
# Add this chart function to take out saving it for now...display directly
def labeling(word):
    # Add specific use cases where it's not just first letter capitalized
    LABEL_DICT = { "prepared_y": "Year",
              "dist": "District",
              "total_requested": "Total Requested",
              "fed_requested":"Fed Requested",
              "ac_requested": "Advance Construction Requested",
              "nunique":"Number of Unique",
              "project_no": "Project Number"}
    
    if (word == "mpo") or (word == "rtpa"):
        word = word.upper()
    elif word in LABEL_DICT.keys():
        word = LABEL_DICT[word]
    else:
        word = word.replace('n_', 'Number of ').title()
        word = word.replace('unique_', "Number of Unique ").title()
        word = word.replace('_', ' ').title()
    
    return word

def basic_bar_chart(df, x_col, y_col):
    
    chart = (alt.Chart(df)
             .mark_bar()
             .encode(
                 x=alt.X(x_col, title=labeling(x_col), sort=('-y')),
                 y=alt.Y(y_col, title=labeling(y_col)),
                 #column = "payment:N",
                 color = alt.Color(y_col,
                                  scale=alt.Scale(
                                      range=cp.CALITP_SEQUENTIAL_COLORS),
                                      legend=alt.Legend(title=(labeling(y_col)))
                                  ))
             .properties( 
                          title=f"Highest {labeling(x_col)} by {labeling(y_col)}")
    )

    chart=styleguide.preset_chart_config(chart)
    display(chart)

In [None]:
unique_agencies = (_dla_utils.calculate_data_all(df, 'primary_agency_name', 
                                                 aggfunc="nunique") 
                   .primary_agency_name.iloc[0]
)

display(
    Markdown(f"## Obligations"),
    Markdown(f"There are {unique_agencies} unique agencies in District {district}.")
)

In [None]:
display(
    Markdown("### Agencies with the Most Obligations")
)

filter_relabel(df_top, "primary_agency_name")

In [None]:
display_cols = ["prefix", 
                "type_of_work", "project_location"]
for c in display_cols:
    header = c.replace('_', ' ').title()
    display(Markdown(f"### {header}"))
    
    # Subset data
    subset = (df_top >> filter(_.variable==c))
    

    by_primary_agency = (df >> group_by(_.primary_agency_name)
                         >> count(_[c])
                         >> arrange(-_.n)
                        ).rename(
        columns = {"primary_agency_name": "Primary Agency Name"}
    ).reset_index(drop=True)
    
    basic_bar_chart(subset, 'value', 'count')
    
    print("#### Some other header for subset df")
    filter_relabel(subset, c)
    
    print(f"#### Top 10 {header} by Primary Agency")
    display(by_primary_agency.head(10))