# Script work for Generating Charting Outputs

In [1]:
import pandas as pd
from siuba import *

import altair as alt

from IPython.display import HTML

from shared_utils import calitp_color_palette as cp

import _dla_utils

E0407 19:10:10.337130279    1069 fork_posix.cc:70]           Fork support is only compatible with the epoll1 and poll polling strategies
E0407 19:10:10.841870300    1069 fork_posix.cc:70]           Fork support is only compatible with the epoll1 and poll polling strategies


In [2]:
import ipywidgets as widgets


In [3]:
pd.set_option('display.max_columns', None)
pd.options.display.float_format = '{:,.2f}'.format

In [4]:
df= pd.read_parquet("dla_df.parquet")

In [5]:
len(df)

21144

## Charts

In [15]:
def make_charts(df, subset):     
    df= pd.read_parquet("dla_df.parquet")
    
    df = (df>>filter(_.dist==subset))
    
    #subsetting the data
    df_years = _dla_utils.count_all_years(df)
    df_top = _dla_utils.find_top(df)
    
    
    ##print statements
    display(HTML(f"<h1><strong>District {subset} Analysis</strong></h1>"))
    
    display(HTML("<h2>Quick Stats</h2>"))
    
    display(HTML(f"There are <strong>{(df.primary_agency_name.nunique())} Unique Agencies</strong>"))
    
    transit = (df>>filter(_.transit==1))
    
    display(HTML(f"Out of <strong>{len(df)}</strong> obligations, <strong>{len(transit)} are transit-related</strong>."))
    display(HTML(f"<strong>{(transit>>count(_.primary_agency_name)>>arrange(-_.n)).iloc[0, 0]}</strong> has the <strong>highest transit</strong> obligations"))
    
    q = df>>count(_.primary_agency_name)>>arrange(_.n)
    
    q2 = q.n.quantile(.95)
    # formatting decimals from: https://stackoverflow.com/questions/2389846/python-decimals-format
    display(HTML(f"There are <strong>{len(q>>filter(_.n> (q2)))} agencies have over {('%.2f'%(q2))}</strong> obligations (95th percentile) since {(df.prepared_y.min())}")
           )   
    
    q3 = q.n.quantile(.1)
    display(HTML((f"There are <strong>{len(q>>filter(_.n< (q3)))} agencies have less than {('%.2f'%(q3))}</strong> obligations (5th percentile) since {(df.prepared_y.min())}")))


    ##tables
    
    display(HTML("<strong>Number of Unique Prefix Codes by Agency</stromg>"))
    display((_dla_utils.get_nunique(df, 'prefix', 'primary_agency_name'))
            .rename(columns={'primary_agency_name':'Agency',
                            'n':'Number of Unqiue Prefix Codes'})
            .head(5))
    
    display(HTML("<strong>Number of Unique Agencies by Prefix Codes</strong>"))
    display((_dla_utils.get_nunique(df, 'primary_agency_name', 'prefix'))
            .rename(columns={'prefix':'Prefix',
                            'n':'Number of Unqiue Agencies'})
            .head(5))
    
    display(HTML("<strong>Top 5 Types of Work</strong>"))
    display((transit>>count(_.type_of_work)>>arrange(-_.n)>>select(_.type_of_work))
            .rename(columns={'type_of_work':'Type of Work'})
            .head(5))

       
    #Visual Charts

    display(HTML("<h2>Number of Obligations</h2"))
    
    #Line chart for Obligations by Year
    chart_df = (df_top>>filter(_.variable=='prepared_y')).rename(columns= {"value":"Year"})
    
    chart1= (_dla_utils.basic_line_chart_test_no_save(chart_df, 'Year', 'count', subset, 'Obligations by Year')).encode(x=alt.X('Year:O', title='Prepared Year'))
    
    display(chart1)
    
    # Unique Agencies by Dist
    dist_years_agency = ((
        df
        >> group_by(_.prepared_y, _.dist)
        >> summarize(n=_.primary_agency_name.nunique())
        >> arrange(-_.prepared_y)
    )
        .rename(columns={'dist':'District', 'n':'Count'})
    )
    

    chart10 = (alt.Chart(dist_years_agency).mark_bar().encode(
        column='District:N',
        x=alt.X('prepared_y:O', title='Prepared Year'),
        y=alt.Y('Count:Q', title='Number of Unique Agencies'),
        color = alt.Color("District:N", 
                              scale=alt.Scale(
                                  range=altair_utils.CALITP_SEQUENTIAL_COLORS),  
                               legend=alt.Legend(title="Prepared Year")
                              )
                              ).properties( 
                       title="Number of Unique Agencies by District"))
    chart10 = styleguide.preset_chart_config(chart10)
    chart10 = _dla_utils.add_tooltip(chart10, 'prepared_y', 'Count')
    display(chart10)
    
  
    #Bar chart Agencies With The Most Obligations
    chart_df = (df_top>>filter(_.variable=='primary_agency_name')).rename(columns={"value":"Agency",
                                 "count":"Number of Obligations"})
    chart2= (_dla_utils.basic_bar_chart_no_save(chart_df, 'Agency', 'Number of Obligations', 'Agency', subset, 'Agencies With The Most Obligations'))
    display(chart2)
    
    
    display(HTML("<h2>Prefix Codes</h2>"))
    
    # Unique Prefixes by Dist
    dist_years_prefix = ((
        df
        >> group_by(_.prepared_y, _.dist)
        >> summarize(n=_.prefix.nunique())
        >> arrange(-_.prepared_y)
    ).rename(columns={'dist':'District', 'n':'Count'}))

    chart11 = (alt.Chart(dist_years_prefix).mark_bar().encode(
        column='District:N',
        x=alt.X('prepared_y:O', title='Prepared Year'),
        y=alt.Y('Count:Q', title='Number of Unique Agencies'),
        color = alt.Color("District:N", 
                              scale=alt.Scale(
                                  range=altair_utils.CALITP_SEQUENTIAL_COLORS),  
                               legend=alt.Legend(title="District")
                              )
                              ).properties( 
                       title="Number of Unique Prefix Codes by District"))
    chart11 = styleguide.preset_chart_config(chart11)
    chart11 = _dla_utils.add_tooltip(chart11, 'prepared_y','Count')
    display(chart11)

    #Bar chart with the Most Used Prefix Counts
    chart_df = (df_top>>filter(_.variable=='prefix')).rename(columns={"value":"Prefix",
                                 "count":"Number of Obligations"})
    chart9= (_dla_utils.basic_bar_chart_no_save(chart_df, 'Prefix', 'Number of Obligations', 'Prefix', subset, 'Most Used Prefix Codes'))
    display(chart9)
    
    
    #Bar chart Agencies With The Most Unique Prefix Codes
    
    chart3 = (_dla_utils.basic_bar_chart_no_save(((_dla_utils.get_nunique(df, 'prefix', 'primary_agency_name')).head(30)),
                            'primary_agency_name', 'n', 'primary_agency_name', subset, 'Agencies With The Most Unique Prefix Codes'))
    display(chart3)
    
    
    display(HTML("<h2>Funding Distribution</h2>"))
    
    #Bar chart Average Total Requested Funds by Agency
    chart4=(_dla_utils.basic_bar_chart_no_save((((_dla_utils.calculate_data_all(df, 'adjusted_total_requested', 'primary_agency_name', aggfunc="mean"))
                          >>arrange(-_.adjusted_total_requested)).head(30)
                        ), 'primary_agency_name','adjusted_total_requested', 'primary_agency_name', subset,
                           'Average Total Requested Funds by Agency ($2021)'
                       ))
    display(chart4)
    

    
    #Bar chart Bottom Average Total Requested Funds by Agency
    avg_funds_bottom = (df>>group_by(_.primary_agency_name)>>summarize(avg_funds=_.adjusted_total_requested.mean())>>arrange(-_.avg_funds)).tail(50)

    chart5=( _dla_utils.basic_bar_chart_no_save((avg_funds_bottom.tail(40)), 'primary_agency_name','avg_funds', 'primary_agency_name', subset, 
                          'Lowest Average Total Funds by Agency ($2021'))
    display(chart5)
    
    # Bar chart Average Total Requested Funds by Prefix
    chart8 = (_dla_utils.basic_bar_chart_no_save((((_dla_utils.calculate_data_all(df, 'adjusted_total_requested', 'prefix', aggfunc="mean"))
                          >>arrange(-_.adjusted_total_requested)).head(30)), 'prefix','adjusted_total_requested', 'prefix', subset,
                           'Average Total Requested Funds by Prefix ($2021)'
                       ))
    display(chart8)
    
    
    #work categories info and charts
    display(HTML("<h2>Work Categories</h2>"))

    
    # code help: https://stackoverflow.com/questions/50842160/how-to-display-matplotlib-plots-in-a-jupyter-tab-widget
    out1 = widgets.Output()
    out2 = widgets.Output()
    out3 = widgets.Output()
    out4 = widgets.Output()
    out5 = widgets.Output()
    out6 = widgets.Output()
    out7 = widgets.Output()
    
    work_cat = ['active_transp', 'transit', 'bridge', 'street','freeway', 'infra_resiliency_er','congestion_relief']

    tab = widgets.Tab(children = [out1, out2, out3, out4, out5, out6, out7])

    for ii in enumerate(work_cat): 
        tab.set_title(ii[0], f"{_dla_utils.labeling(ii[1])}")

    with out1:
        _dla_utils.project_cat(df, 'active_transp') 
    with out2:
        _dla_utils.project_cat(df, 'transit')
    with out3: 
        _dla_utils.project_cat(df, 'bridge')
    with out4:
        _dla_utils.project_cat(df, 'street')
    with out5:
        _dla_utils.project_cat(df, 'freeway')
    with out6:
        _dla_utils.project_cat(df, 'infra_resiliency_er')
    with out7:
        _dla_utils.project_cat(df, 'congestion_relief')

    display(tab)

In [16]:
#parameters cell
subset = 4

In [17]:
make_charts(df, subset)

Unnamed: 0,Agency,Number of Unqiue Prefix Codes
28,Contra Costa County,18
107,San Francisco County,17
110,San Jose,17
119,Santa Clara County,17
2,Alameda County,16


Unnamed: 0,Prefix,Number of Unqiue Agencies
91,STPL,112
19,CML,90
62,HSIPL,52
16,BRLS,32
75,RPSTPL,30


Unnamed: 0,Type of Work
6,Construct Pedestrian Safety And Transit Access
14,"Implement Bike, Pedestrian, And Transit Access..."
28,"Ped. Crossing, Bike Racks, Bus Shelter"
4,Clipper Fare Collection System Phase 3 (tc)
7,Construction Of Transit Center Depot Building


Tab(children=(Output(), Output(), Output(), Output(), Output(), Output(), Output()), _titles={'0': 'Active Tra…

## Tabs Work

In [18]:
work_cat = ['active_transp', 'transit', 'bridge', 'street','freeway', 'infra_resiliency_er','congestion_relief']

In [19]:
children = [widgets.Text(description=name) for name in work_cat]
tab = widgets.Tab()
tab.children = children
   
for ii in enumerate(work_cat): 
    tab.set_title(ii[0], f"{_dla_utils.labeling(ii[1])}")
    #print(_dla_utils.labeling(ii[1]))
tab

Tab(children=(Text(value='', description='active_transp'), Text(value='', description='transit'), Text(value='…

In [20]:
output1 = widgets.Output()
output2 = widgets.Output()
output3 = widgets.Output()
output4 = widgets.Output()
output5 = widgets.Output()
output6 = widgets.Output()
output7 = widgets.Output()

In [21]:
output1

Output()

In [22]:
children = [output1, output2, output3, output4, output5, output6, output7]
tab = widgets.Tab(children)

for ii in enumerate(work_cat): 
    tab.set_title(ii[0], f"{_dla_utils.labeling(ii[1])}")
    #print(ii[0])
    

    # with ii in children: 
    #     _dla_utils.project_cat(df, f"{i[1]}") 
    
display(tab)

Tab(children=(Output(), Output(), Output(), Output(), Output(), Output(), Output()), _titles={'0': 'Active Tra…

In [23]:
# code help: https://stackoverflow.com/questions/50842160/how-to-display-matplotlib-plots-in-a-jupyter-tab-widget

out1 = widgets.Output()
out2 = widgets.Output()
out3 = widgets.Output()
out4 = widgets.Output()
out5 = widgets.Output()
out6 = widgets.Output()
out7 = widgets.Output()

# children2 = [widgets.Text(description=name) for name in work_cat]
# tab2 = widgets.Tab(children2)
tab2 = widgets.Tab(children = [out1, out2, out3, out4, out5, out6, out7])

for ii in enumerate(work_cat): 
    tab2.set_title(ii[0], f"{_dla_utils.labeling(ii[1])}")
    

with out1:
    _dla_utils.project_cat(df, 'active_transp') 
with out2:
    _dla_utils.project_cat(df, 'transit')
with out3: 
    _dla_utils.project_cat(df, 'bridge')
with out4:
    _dla_utils.project_cat(df, 'street')
with out5:
    _dla_utils.project_cat(df, 'freeway')
with out6:
    _dla_utils.project_cat(df, 'infra_resiliency_er')
with out7:
    _dla_utils.project_cat(df, 'congestion_relief')

display(tab2)


Tab(children=(Output(), Output(), Output(), Output(), Output(), Output(), Output()), _titles={'0': 'Active Tra…

In [24]:
ts = [1,2,3,4,5,6,7]

In [25]:
ts

[1, 2, 3, 4, 5, 6, 7]

In [26]:
for t in ts:
    # with f'out{t}':
    #     result = _dla_utils.project_cat(df, 'transit')
    print(f'out{t}')


out1
out2
out3
out4
out5
out6
out7


In [27]:
# o1 = widgets.Output()
# o2 = widgets.Output()
# o3 = widgets.Output()
# o4 = widgets.Output()
# o5 = widgets.Output()
# o6 = widgets.Output()
# o7 = widgets.Output()

#outs = [op1, op2, op3, op4, op5, op6, op7]

# for i in outs:
#     i = widgets.Output()

op0 = op1 = op2 = op3 = op4 = op5 = op6 = widgets.Output()

outs = [op0, op1, op2, op3, op4, op5, op6]


In [35]:
work_cat = ['active_transp', 'transit', 'bridge', 'street','freeway', 'infra_resiliency_er','congestion_relief']

tab3 = widgets.Tab(children = outs)

for ii in enumerate(work_cat): 
    tab3.set_title(ii[0], f"{_dla_utils.labeling(ii[1])}")
    #print(ii[1])
    
    #out{ii[0]} = (_dla_utils.project_cat(df, f"{ii[1]}"))
    
display(tab3)

Tab(children=(Output(), Output(), Output(), Output(), Output(), Output(), Output()), _titles={'0': 'Active Tra…

In [34]:
# sub_tab=[widgets.Output() for i in (work_cat)]
# tab = widgets.Tab(sub_tab)
# for i in range (len(work_cat)):
#     tab.set_title(i,"Tab {}".format(i+1))
    
#     for cat in work_cat:
#         with sub_tab[i]:
#             project_cat(df, cat)
# display(tab)