# Script work for Generating Charting Outputs

In [3]:
#! pip install cpi

In [11]:
import numpy as np
import pandas as pd
from siuba import *

import altair as alt
import altair_saver
from plotnine import *

from IPython.display import Markdown, HTML

from shared_utils import altair_utils
from shared_utils import geography_utils
from shared_utils import calitp_color_palette as cp
from shared_utils import styleguide

from calitp import to_snakecase
import intake

import _clean_data
import _dla_utils

In [4]:
pd.set_option('display.max_columns', None)
pd.options.display.float_format = '{:,.2f}'.format

In [5]:
df= pd.read_parquet("dla_df.parquet")

In [6]:
len(df)

21127

## Charts

In [52]:
def make_charts(df, subset):     
    df= pd.read_parquet("dla_df.parquet")
    
    df = (df>>filter(_.dist==subset))
    
    #subsetting the data
    df_years = _dla_utils.count_all_years(df)
    df_top = _dla_utils.find_top(df)
    
    
    ##print statements
    display(HTML(f"<h1><strong>District {subset} Analysis</strong></h1>"))
    
    display(HTML("<h2>Quick Stats</h2>"))
    
    display(HTML(f"There are <strong>{(df.primary_agency_name.nunique())} Unique Agencies</strong>"))
    
    transit = (df>>filter(_.transit==1))
    display(HTML(f"Out of <strong>{len(df)}</strong> obligations, <strong>{len(transit)} are transit-related</strong>."))
    display(HTML(f"<strong>{(transit>>count(_.primary_agency_name)>>arrange(-_.n)).iloc[0, 0]}</strong> has the <strong>highest transit</strong> obligations"))
    
    q = df>>count(_.primary_agency_name)>>arrange(_.n)
    
    q2 = q.n.quantile(.95)
    display(HTML(f"There are <strong>{len(q>>filter(_.n> (q2)))} agencies have over {q2}</strong> obligations (95th percentile) since {(df.prepared_y.min())}")
           )   
    
    q3 = q.n.quantile(.1)
    display(HTML((f"There are <strong>{len(q>>filter(_.n< (q3)))} agencies have less than {q3}</strong> obligations (5th percentile) since {(df.prepared_y.min())}")))


    ##tables
    
    display(HTML("<strong>Number of Unique Prefix Codes by Agency</stromg>"))
    display((_dla_utils.get_nunique(df, 'prefix', 'primary_agency_name'))
            .rename(columns={'primary_agency_name':'Agency',
                            'n':'Number of Unqiue Prefix Codes'})
            .head(5))
    
    display(HTML("<strong>Number of Unique Agencies by Prefix Codes</strong>"))
    display((_dla_utils.get_nunique(df, 'primary_agency_name', 'prefix'))
            .rename(columns={'prefix':'Prefix',
                            'n':'Number of Unqiue Agencies'})
            .head(5))
    
    display(HTML("<strong>Top 5 Types of Work</strong>"))
    display((transit>>count(_.type_of_work)>>arrange(-_.n)>>select(_.type_of_work))
            .rename(columns={'type_of_work':'Type of Work'})
            .head(5))

       
    #Visual Charts

    display(HTML("<h2>Number of Obligations</h2"))
    
    #Line chart for Obligations by Year
    chart_df = (df_top>>filter(_.variable=='prepared_y')).rename(columns= {"value":"Year"})
    
    chart1= (_dla_utils.basic_line_chart_test_no_save(chart_df, 'Year:O', 'count', subset, 'Obligations by Year'))
    
    display(chart1)
    
    # Unique Agencies by Dist
    dist_years_agency = ((
        df
        >> group_by(_.prepared_y, _.dist)
        >> summarize(n=_.primary_agency_name.nunique())
        >> arrange(-_.prepared_y)
    )
        .rename(columns={'dist':'District', 'n':'Count'})
    )
    

    chart10 = (alt.Chart(dist_years_agency).mark_bar().encode(
        column='District:N',
        x=alt.X('prepared_y:O', title='Prepared Year'),
        y=alt.Y('Count:Q', title='Number of Unique Agencies'),
        color = alt.Color("District:N", 
                              scale=alt.Scale(
                                  range=altair_utils.CALITP_SEQUENTIAL_COLORS),  
                               legend=alt.Legend(title="Prepared Year")
                              )
                              ).properties( 
                       title="Number of Unique Agencies by District"))
    chart10 = styleguide.preset_chart_config(chart10)
    chart10 = _dla_utils.add_tooltip(chart10, 'prepared_y', 'Count')
    display(chart10)
    
  
    #Bar chart Agencies With The Most Obligations
    chart_df = (df_top>>filter(_.variable=='primary_agency_name')).rename(columns={"value":"Agency",
                                 "count":"Number of Obligations"})
    chart2= (_dla_utils.basic_bar_chart_no_save(chart_df, 'Agency', 'Number of Obligations', 'Agency', subset, 'Agencies With The Most Obligations'))
    display(chart2)
    
    
    display(HTML("<h2>Prefix Codes</h2>"))
    
    # Unique Prefixes by Dist
    dist_years_prefix = ((
        df
        >> group_by(_.prepared_y, _.dist)
        >> summarize(n=_.prefix.nunique())
        >> arrange(-_.prepared_y)
    ).rename(columns={'dist':'District', 'n':'Count'}))

    chart11 = (alt.Chart(dist_years_prefix).mark_bar().encode(
        column='District:N',
        x=alt.X('prepared_y:O', title='Prepared Year'),
        y=alt.Y('Count:Q', title='Number of Unique Agencies'),
        color = alt.Color("District:N", 
                              scale=alt.Scale(
                                  range=altair_utils.CALITP_SEQUENTIAL_COLORS),  
                               legend=alt.Legend(title="District")
                              )
                              ).properties( 
                       title="Number of Unique Prefix Codes by District"))
    chart11 = styleguide.preset_chart_config(chart11)
    chart11 = _dla_utils.add_tooltip(chart11, 'prepared_y','Count')
    display(chart11)

    #Bar chart with the Most Used Prefix Counts
    chart_df = (df_top>>filter(_.variable=='prefix')).rename(columns={"value":"Prefix",
                                 "count":"Number of Obligations"})
    chart9= (_dla_utils.basic_bar_chart_no_save(chart_df, 'Prefix', 'Number of Obligations', 'Prefix', subset, 'Most Used Prefix Codes'))
    display(chart9)
    
    
    #Bar chart Agencies With The Most Unique Prefix Codes
    
    chart3 = (_dla_utils.basic_bar_chart_no_save(((_dla_utils.get_nunique(df, 'prefix', 'primary_agency_name')).head(30)),
                            'primary_agency_name', 'n', 'primary_agency_name', subset, 'Agencies With The Most Unique Prefix Codes'))
    display(chart3)
    
    
    display(HTML("<h2>Funding Distribution</h2>"))
    
    #Bar chart Average Total Requested Funds by Agency
    chart4=(_dla_utils.basic_bar_chart_no_save((((_dla_utils.calculate_data_all(df, 'adjusted_total_requested', 'primary_agency_name', aggfunc="mean"))
                          >>arrange(-_.adjusted_total_requested)).head(30)
                        ), 'primary_agency_name','adjusted_total_requested', 'primary_agency_name', subset,
                           'Average Total Requested Funds by Agency ($2021)'
                       ))
    display(chart4)
    

    
    #Bar chart Bottom Average Total Requested Funds by Agency
    avg_funds_bottom = (df>>group_by(_.primary_agency_name)>>summarize(avg_funds=_.adjusted_total_requested.mean())>>arrange(-_.avg_funds)).tail(50)

    chart5=( _dla_utils.basic_bar_chart_no_save((avg_funds_bottom.tail(40)), 'primary_agency_name','avg_funds', 'primary_agency_name', subset, 
                          'Lowest Average Total Funds by Agency ($2021'))
    display(chart5)
    
    # Bar chart Average Total Requested Funds by Prefix
    chart8 = (_dla_utils.basic_bar_chart_no_save((((_dla_utils.calculate_data_all(df, 'adjusted_total_requested', 'prefix', aggfunc="mean"))
                          >>arrange(-_.adjusted_total_requested)).head(30)), 'prefix','adjusted_total_requested', 'prefix', subset,
                           'Average Total Requested Funds by Prefix ($2021)'
                       ))
    display(chart8)
    
    
    #work categories info and charts
    display(HTML("<h2>Work Categories</h2>"))
    
    work_cat = ['active_transp', 'transit', 'bridge', 'street','freeway', 'infra_resiliency_er',
       'congestion_relief']
    
    for i in work_cat:
        subset = (df>>filter(_[i]==1))
        subset_2 = ((_dla_utils.find_top(subset))>>filter(_.variable=='primary_agency_name')
                >>select(_.value,_.count)).head(5)
        subset_2['Percent of Category'] = (((subset_2['count'])/(len(subset)))*100)
        subset_2 =subset_2.rename(columns = {'value':'Agency', 'count':f'{_dla_utils.labeling(i)} Obligations'})
    
    
        #generate chart:
    
        subset_3= ((subset.groupby(['primary_agency_name']).agg({i:'sum',
                                                    'process_days':'mean',
                                                    'adjusted_total_requested':'mean',
                                                    'adjusted_fed_requested':'mean',
                                                    'adjusted_ac_requested':'mean'}).reset_index())>>arrange(-_[i])).head(5)
    
        subset_3 =subset_3.rename(columns = {'primary_agency_name':'Agency',
                                         'adjusted_total_requested':'Total Requested',
                                         'adjusted_fed_requested':'Fed Requested',
                                         'adjusted_ac_requested':'AC Requested'})
    
        subset_4 = pd.melt(subset_3, id_vars=['Agency'],
                           value_vars=['Total Requested','Fed Requested','AC Requested'],
                           var_name='Categories', value_name='Funding Amount')

    
        chart = (alt.Chart(subset_4).mark_bar().encode(
            x=alt.X('Funding Amount', axis=alt.Axis(format='$.2s', title='Obligated Funding ($2021)')),
            y=alt.Y("Agency"),
            color = alt.Color("Categories:N", 
                                   scale=alt.Scale(
                                      range=altair_utils.CALITP_CATEGORY_BRIGHT_COLORS)),
            row='Categories:N'
        ))
        
        chart = _dla_utils.add_tooltip(chart, 'Agency','Funding Amount')
    
        display(HTML(f'<strong>Top Agencies using {_dla_utils.labeling(i)} Projects</strong>'))
        display(subset_2.style.format(formatter={('Percent of Category'): "{:.2f}%"}))
        display(chart)


In [53]:
#parameters cell
subset = 6

In [54]:
make_charts(df, subset)

Unnamed: 0,Agency,Number of Unqiue Prefix Codes
19,Fresno County,16
26,Kern County,16
51,Tulare County,16
2,Bakersfield,15
18,Fresno,15


Unnamed: 0,Prefix,Number of Unqiue Agencies
20,CML,45
67,STPL,29
46,HSIPL,26
23,CMLNI,23
54,RPSTPL,16


Unnamed: 0,Type of Work
4,Purchase (1) One Diesel School Bus
3,Purchase & Install 10 Bus Shelters
7,Purchase One Propane Powered School Bus
9,Railroad Depot Restoration (tc)
1,Highway Railroad Grade Crossing Hazards Elimin...


Unnamed: 0,Agency,Active Transportation Obligations,Percent of Category
0,Fresno,94,13.02%
1,Kern County,54,7.48%
2,Visalia,50,6.93%
3,Clovis,38,5.26%
4,Sanger,33,4.57%


Unnamed: 0,Agency,Transit Obligations,Percent of Category
0,Kingsburg,4,15.38%
1,Taft,3,11.54%
2,Visalia,3,11.54%
3,Chowchilla,2,7.69%
4,Clovis Unified School District,2,7.69%


Unnamed: 0,Agency,Bridge Obligations,Percent of Category
0,Tulare County,110,31.07%
1,Fresno County,108,30.51%
2,Madera County,43,12.15%
3,Fresno,18,5.08%
4,Kings County,17,4.80%


Unnamed: 0,Agency,Street Obligations,Percent of Category
0,Fresno,194,12.90%
1,Bakersfield,177,11.77%
2,Kern County,176,11.70%
3,Clovis,114,7.58%
4,Visalia,75,4.99%


Unnamed: 0,Agency,Freeway Obligations,Percent of Category
0,Bakersfield,13,44.83%
1,Kern County,10,34.48%
2,Fowler,2,6.90%
3,Fresno,2,6.90%
4,Mendota,2,6.90%


Unnamed: 0,Agency,Infrastructure & Emergency Relief Obligations,Percent of Category
0,Fresno,211,11.66%
1,Fresno County,206,11.39%
2,Kern County,197,10.89%
3,Bakersfield,179,9.89%
4,Tulare County,149,8.24%


Unnamed: 0,Agency,Congestion Relief Obligations,Percent of Category
0,Kern County Council of Governments,20,100.00%
