# Script work for Generating Charting Outputs

In [111]:
import numpy as np
import pandas as pd
from siuba import *

import altair as alt
import altair_saver
from plotnine import *

from IPython.display import Markdown

from shared_utils import altair_utils
from shared_utils import geography_utils
from shared_utils import calitp_color_palette as cp
from shared_utils import styleguide

from calitp import to_snakecase
import intake

import clean_data
import _dla_utils

In [None]:
pd.set_option('display.max_columns', None)

In [29]:
df= pd.read_parquet("dla_df.parquet")

In [30]:
subset= 7

In [31]:
df = df>>filter(_.dist==subset)

In [32]:
def labeling(word):
    # Add specific use cases where it's not just first letter capitalized
    LABEL_DICT = { "prepared_y": "Year",
              "dist": "District",
              "total_requested": "Total Requested",
              "fed_requested":"Fed Requested",
              "ac_requested": "Advance Construction Requested",
              "nunique":"Number of Unique",
              "project_no": "Project Number"}
    
    if (word == "mpo") or (word == "rtpa"):
        word = word.upper()
    elif word in LABEL_DICT.keys():
        word = LABEL_DICT[word]
    else:
        word = word.replace('n_', 'Number of ').title()
        word = word.replace('unique_', "Number of Unique ").title()
        word = word.replace('_', ' ').title()
    
    return word


In [112]:
def basic_bar_chart_test(df, x_col, y_col, color_col, chart_title=''):

    if chart_title == "":
        chart_title = (f"{labeling(x_col)} by {labeling(y_col)}")

    
    chart = (alt.Chart(df)
             .mark_bar()
             .encode(
                 x=alt.X(x_col, title=labeling(x_col), sort=('-y')),
                 y=alt.Y(y_col, title=labeling(y_col)),
                 color = alt.Color(color_col,
                                  scale=alt.Scale(
                                      range=altair_utils.CALITP_SEQUENTIAL_COLORS),
                                      legend=alt.Legend(title=(labeling(color_col)))
                                  ))
             .properties( 
                          title=chart_title)
    )

    chart=styleguide.preset_chart_config(chart)
   # chart.save(f"./chart_outputs/d{subset}_outputs/bar_{x_col}_by_{y_col}.png")
    
    return chart

In [113]:
df_years= _dla_utils.count_all_years(df)

In [114]:
basic_bar_chart_test(df_years, 'prepared_y', 'total_requested_sum', 'prepared_y')

In [110]:
#df = (df>>filter(_.dist==subset))
df_top = _dla_utils.find_top(df)

basic_bar_chart_test((df_top>>filter(_.variable=='primary_agency_name')), 'value', 'count', 'count', chart_title='Primary Agency name')

In [92]:
df_top = _dla_utils.find_top(df)

In [93]:
df_top>>filter(_.variable=='prefix')


Unnamed: 0,value,count,variable
0,HSIPL,705,prefix
1,STPL,441,prefix
2,CML,275,prefix
3,ER,175,prefix
4,HPLUL,143,prefix
5,ATPL,135,prefix
6,BHLS,101,prefix
7,FTACML,94,prefix
8,SRTSL,72,prefix
9,RPSTPL,63,prefix


In [103]:
def make_charts(df):     
    df= pd.read_parquet("dla_df.parquet")
    
    df = (df>>filter(_.dist==subset))
    
    #subsetting the data
    df_years = _dla_utils.count_all_years(df)
    df_top = _dla_utils.find_top(df)
    
    ##print statements
    
    display(Markdown(f'There are {(df.primary_agency_name.nunique())} Unique Agencies'))
    
    
    
    
    ##tables
    
    
    ##charting df_years
    
    column_names = list(df_years)

    for column in column_names:
    
        display(Markdown(f"**{labeling(column)} Over Prepared Year**"))
        display(basic_bar_chart_test(df_years, "prepared_y", column, "prepared_y"))
    
    
    ##charting df_tops 
    
    values = sorted(df_top['variable'].unique())

    for value in values:
        display(Markdown(f"**Top Values in {labeling(value)}**"))
        display(basic_bar_chart_test((df_top>>filter(_.variable==value)), 'value', 'count', 'count',
                                     chart_title=f'Top 20 {labeling(value)}'))

In [104]:
#parameters cell
#subset = 11

In [105]:
make_charts(df)

There are 108 Unique Agencies

**Year Over Prepared Year**

**District Over Prepared Year**

**Ac Requested Sum Over Prepared Year**

**Fed Requested Sum Over Prepared Year**

**Total Requested Sum Over Prepared Year**

**Ac Requested Mean Over Prepared Year**

**Fed Requested Mean Over Prepared Year**

**Total Requested Mean Over Prepared Year**

**Unique Mpo Over Prepared Year**

**Unique Prefix Over Prepared Year**

**Unique Primary Agency Name Over Prepared Year**

**Unique Project Location Over Prepared Year**

**Unique Project No Over Prepared Year**

**Unique Type Of Work Over Prepared Year**

**Top Values in District**

**Top Values in MPO**

**Top Values in Prefix**

**Top Values in Year**

**Top Values in Primary Agency Name**

**Top Values in Project Location**

**Top Values in Seq**

**Top Values in Status Comment**

**Top Values in Type Of Work**

In [93]:
unqiue_agencies = (df.primary_agency_name.nunique())

In [94]:
unqiue_agencies

31