# **District 10**

How does funding for local agencies differ district to district? Using e-76 obligation data, we can gain insight to how agencies in District 10 use federal program funds, and help identify DLA‘s core customers.

In [1]:
%%capture

import numpy as np
import pandas as pd
from siuba import *

import altair as alt
import altair_saver
from plotnine import *

from IPython.display import Markdown, HTML, Image
import ipywidgets as widgets
from ipywidgets import interact, interactive

from shared_utils import altair_utils
from shared_utils import geography_utils
from shared_utils import calitp_color_palette as cp
from shared_utils import styleguide

from calitp import to_snakecase
import intake

import _dla_utils_reports


In [2]:
#Parameter Cell

district = 2

In [3]:
# Parameters
district = 10


In [4]:
df= pd.read_parquet("gs://calitp-analytics-data/data-analyses/dla/e-76Obligated/dla_df.parquet")


In [5]:
df = (df>>filter(_.dist==district))
    
    
#subsetting the data
df_years = _dla_utils_reports.count_all_years(df)
df_top = _dla_utils_reports.find_top(df)
    

In [6]:
display(HTML("<h2>Quick Stats</h2>"))

display(
    HTML(
        f"There are <strong>{(df.primary_agency_name.nunique())} Unique Agencies</strong>"
    )
)

transit = df >> filter(_.transit == 1)

def find_transit(df):
    if (len(transit)) == 0: 
        return display((HTML(f"Out of <strong>{len(df)}</strong> obligations, <strong>0 are transit realted</strong>")))
    
    else:
        return display(
            HTML(
                f"Out of <strong>{len(df)}</strong> obligations, <strong>{len(transit)} are transit-related</strong>."
                f"<br><strong>{(transit>>count(_.primary_agency_name)>>arrange(-_.n)).iloc[0, 0]}</strong> has the <strong>highest transit</strong> obligations"
            ))
    
find_transit(df)




q = df >> count(_.primary_agency_name) >> arrange(_.n)

q2 = q.n.quantile(0.95)

display(
    HTML(
        f"There are <strong>{len(q>>filter(_.n> (q2)))} agencies have over {('%.2f'%(q2))}</strong> obligations (95th percentile) since {(df.prepared_y.min())}"
    )
)

q3 = q.n.quantile(0.1)
display(
    HTML(
        (
            f"There are <strong>{len(q>>filter(_.n< (q3)))} agencies have less than {('%.2f'%(q3))}</strong> obligations (5th percentile) since {(df.prepared_y.min())}"
        )
    )
)

## tables
display(HTML("<h4><strong>Number of Unique Prefix Codes by Agency</strong></h4>"))
display(
    (_dla_utils_reports.get_nunique(df, "prefix", "primary_agency_name"))
    .rename(
        columns={"primary_agency_name": "Agency", "n": "Number of Unqiue Prefix Codes"}
    )
    .head(5)
)

display(HTML("<h4><strong>Number of Unique Agencies by Prefix Codes</strong></h4>"))
display(
    (_dla_utils_reports.get_nunique(df, "primary_agency_name", "prefix"))
    .rename(columns={"prefix": "Prefix", "n": "Number of Unqiue Agencies"})
    .head(5)
)

display(HTML("<h4><strong>Top 5 Types of Work</strong></h4>"))
work_types = (
    (df >> count(_.type_of_work) >> arrange(-_.n) >> select(_.type_of_work))
    .rename(columns={"type_of_work": "Type of Work"})
    .head(5)
)
display(work_types.style)

Unnamed: 0,Agency,Number of Unqiue Prefix Codes
44,Stockton,23
29,Modesto,17
43,Stanislaus County,15
9,Calaveras County,12
25,Mariposa County,11


Unnamed: 0,Prefix,Number of Unqiue Agencies
22,CML,38
57,STPL,20
44,HSIPL,16
16,BRLS,14
13,BRLO,11


Unnamed: 0,Type of Work
59,Bridge Replacement (tc)
58,Bridge Replacement
47,Bridge Preventive Maintenance
416,Pavement Rehabilitation (tc)
184,FTA Transfer


## Number of Obligations

Obligations indicate a unique entry in the E-76 dataset. By counting the obligations for each year, district, and organization, we can see what the volume each as well which organizations are the most and last frequent customers. 

Metrics:
* Obligations by Year
* Number of Unique Agencies by District
* Agencies With The Most Obligations

### Obligations by Year

In [7]:
 #Line chart for Obligations by Year
chart_df = (df_top>>filter(_.variable=='prepared_y')).rename(columns= {"value":"Year"})
    
chart1= (_dla_utils_reports.basic_line_chart_test_no_save(chart_df, 'Year', 'count', district)).encode(x=alt.X('Year:O', title='Prepared Year'))
display(chart1)

### Number of Unique Agencies by District

In [8]:
# Unique Agencies by Dist
dist_years_agency = ((
        df
        >> group_by(_.prepared_y, _.dist)
        >> summarize(n=_.primary_agency_name.nunique())
        >> arrange(-_.prepared_y)
    )
        .rename(columns={'dist':'District', 'n':'Count'})
    )
chart10 = (alt.Chart(dist_years_agency).mark_bar().encode(
        column='District:N',
        x=alt.X('prepared_y:O', title='Prepared Year'),
        y=alt.Y('Count:Q', title='Number of Unique Agencies'),
        color = alt.Color("District:N", 
                              scale=alt.Scale(
                                  range=altair_utils.CALITP_SEQUENTIAL_COLORS),  
                               legend=alt.Legend(title="Prepared Year")
                              )))
                              
chart10 = styleguide.preset_chart_config(chart10)
chart10 = _dla_utils_reports.add_tooltip(chart10, 'prepared_y', 'Count')

display(chart10)

### Agencies With The Most Obligations

In [9]:
#Bar chart Agencies With The Most Obligations
chart_df = (df_top>>filter(_.variable=='primary_agency_name')).rename(columns={"value":"Agency",
                                 "count":"Number of Obligations"})
chart2 = (_dla_utils_reports.basic_bar_chart_no_save(chart_df, 'Agency', 'Number of Obligations', 'Agency', district))
    

display(chart2)


## Prefix Codes

Prefix Codes refer to the program an obligation is in. Similar to the number of obligations, calcuating the unique prefix codes provides insight to how many progams DLA is involved in each year as well as workload at the district and organization level.

Metrics: 
* Number of Unique Prefix Codes by Districts
* Most Used Prefix Codes
* Agencies With The Most Unique Prefix Codes

### Number of Unique Prefix Codes by District

In [10]:
#Unique Prefixes by Dist
dist_years_prefix = ((
        df
        >> group_by(_.prepared_y, _.dist)
        >> summarize(n=_.prefix.nunique())
        >> arrange(-_.prepared_y)
    ).rename(columns={'dist':'District', 'n':'Count'}))

chart11 = (alt.Chart(dist_years_prefix).mark_bar().encode(
        column='District:N',
        x=alt.X('prepared_y:O', title='Prepared Year'),
        y=alt.Y('Count:Q', title='Number of Unique Agencies'),
        color = alt.Color("District:N", 
                              scale=alt.Scale(
                                  range=altair_utils.CALITP_SEQUENTIAL_COLORS),  
                               legend=alt.Legend(title="District")
                              )
                              ))
chart11 = styleguide.preset_chart_config(chart11)
chart11 = _dla_utils_reports.add_tooltip(chart11, 'prepared_y','Count')
    

display(chart11)

## Most Used Prefix Codes

In [11]:
#Bar chart with the Most Used Prefix Counts
chart_df = (df_top>>filter(_.variable=='prefix')).rename(columns={"value":"Prefix",
                                 "count":"Number of Obligations"})
chart9= (_dla_utils_reports.basic_bar_chart_no_save(chart_df, 'Prefix', 'Number of Obligations', 'Prefix', district))
    

display(chart9)
    

## Agencies With The Most Unique Prefix Codes

In [12]:
#Bar chart Agencies With The Most Unique Prefix Codes
    
chart3 = (_dla_utils_reports.basic_bar_chart_no_save(((_dla_utils_reports.get_nunique(df, 'prefix', 'primary_agency_name')).head(30)),
                            'primary_agency_name', 'n', 'primary_agency_name', district))
    
display(chart3)

## Funding Distribution

With each E-76, three types of funding amounts are included in the obligations: 
* Total Requested (`total_requested`)
* Advance Construction Requested (`ac_requested`)
* Federal Requested (`fed_requested`)

Using this information, we can determine how much on average an organization recieves with these funds, and the distribution of the funds.


Metrics:
* Average Total Requested Funds by Agency
* Lowest Average Total Funds by Agency
* Average Total Requested Funds by Prefix

### Average Total Requested Funds by Agency ($2021)

In [13]:
#Bar chart Average Total Requested Funds by Agency
chart4=(_dla_utils_reports.basic_bar_chart_no_save((((_dla_utils_reports.calculate_data_all(df, 'adjusted_total_requested', 'primary_agency_name', aggfunc="mean"))
                          >>arrange(-_.adjusted_total_requested)).head(30)
                        ), 'primary_agency_name','adjusted_total_requested', 'primary_agency_name', district
                          
                       ))
    
display(chart4)

### Lowest Average Total Funds by Agency ($2021)

In [14]:
#Bar chart Bottom Average Total Requested Funds by Agency
avg_funds_bottom = (df>>group_by(_.primary_agency_name)>>summarize(avg_funds=_.adjusted_total_requested.mean())>>arrange(-_.avg_funds)).tail(50)

chart5=( _dla_utils_reports.basic_bar_chart_no_save((avg_funds_bottom.tail(40)), 'primary_agency_name','avg_funds', 'primary_agency_name', district))
    
display(chart5)

### Average Total Requested Funds by Prefix ($2021)

In [15]:
# Bar chart Average Total Requested Funds by Prefix
chart8 = (_dla_utils_reports.basic_bar_chart_no_save((((_dla_utils_reports.calculate_data_all(df, 'adjusted_total_requested', 'prefix', aggfunc="mean"))
                          >>arrange(-_.adjusted_total_requested)).head(30)), 'prefix','adjusted_total_requested', 'prefix', district
                       ))
    
display(chart8)
    


## Work Categories

While the data includes a description column, organizations have the option to manually input the descriptions. Using the organizations descriptions of the obligattion type, we can categorize the obligations in terms of types of work. We used the following type of work categories:
* Active Transportation
* Transit
* Bridge
* Street
* Freeway
* Infrastructure/Resiliency/Emergency Relief 
* Congestion Relief

With these categories, we can determine which organizations have the most obligations in that category and what percent of the category that organization accounts for. 

In [16]:
# create loop:

work_cat = ['active_transp', 'transit', 'bridge', 'street','freeway', 'infra_resiliency_er','congestion_relief']

for cat in work_cat:
    _dla_utils_reports.project_cat(df, cat, district)

HTML(value='<h3> Top Agencies using Active Transportation Projects </h3>')

Unnamed: 0,Agency,Active Transportation Obligations,Percent of Category
0,Stockton,58,15.22%
1,Merced,46,12.07%
2,Ceres,29,7.61%
3,Merced County,25,6.56%
4,Atwater,24,6.30%


HTML(value='<h3> Top Agencies using Transit Projects </h3>')

Unnamed: 0,Agency,Transit Obligations,Percent of Category
0,Stanislaus County,18,20.93%
1,Stockton,14,16.28%
2,Yosemite Area Regional Transportation System JPA,14,16.28%
3,Transit Joint Powers Authority For Merced County,12,13.95%
4,Modesto,8,9.30%


HTML(value='<h3> Top Agencies using Bridge Projects </h3>')

Unnamed: 0,Agency,Bridge Obligations,Percent of Category
0,San Joaquin County,85,21.91%
1,Merced County,61,15.72%
2,Stanislaus County,59,15.21%
3,Calaveras County,38,9.79%
4,Tuolumne County,38,9.79%


HTML(value='<h3> Top Agencies using Street Projects </h3>')

Unnamed: 0,Agency,Street Obligations,Percent of Category
0,Stockton,182,18.06%
1,Modesto,136,13.49%
2,San Joaquin County,120,11.90%
3,Stanislaus County,84,8.33%
4,Turlock,73,7.24%


HTML(value='<h3> Top Agencies using Freeway Projects </h3>')

Unnamed: 0,Agency,Freeway Obligations,Percent of Category
0,San Joaquin County,9,60.00%
1,Modesto,3,20.00%
2,Calaveras County,1,6.67%
3,Stanislaus Council of Governments,1,6.67%
4,Stockton,1,6.67%


HTML(value='<h3> Top Agencies using Infrastructure & Emergency Relief Projects </h3>')

Unnamed: 0,Agency,Infrastructure & Emergency Relief Obligations,Percent of Category
0,San Joaquin County,178,13.09%
1,Stanislaus County,147,10.81%
2,Stockton,128,9.41%
3,Calaveras County,109,8.01%
4,Merced County,90,6.62%


HTML(value='<h3> Top Agencies using Congestion Relief Projects </h3>')

Unnamed: 0,Agency,Congestion Relief Obligations,Percent of Category
0,Stanislaus Council of Governments,18,32.14%
1,Modesto,17,30.36%
2,San Joaquin Council of Governments,8,14.29%
3,Merced County Association of Governments,7,12.50%
4,Stanislaus County,6,10.71%


In [17]:
#  # code help: https://stackoverflow.com/questions/50842160/how-to-display-matplotlib-plots-in-a-jupyter-tab-widget
    
# work_cat = ['active_transp', 'transit', 'bridge', 'street','freeway', 'infra_resiliency_er','congestion_relief']

# out1 = widgets.Output()
# out2 = widgets.Output()
# out3 = widgets.Output()
# out4 = widgets.Output()
# out5 = widgets.Output()
# out6 = widgets.Output()
# out7 = widgets.Output()

#     # children2 = [widgets.Text(description=name) for name in work_cat]
#     # tab2 = widgets.Tab(children2)
# tab2 = widgets.Tab(children = [out1, out2, out3, out4, out5, out6, out7])

# for ii in enumerate(work_cat): 
#     tab2.set_title(ii[0], f"{_dla_utils_reports.labeling(ii[1])}")

# display(tab2)
    
    
# with out1:
#      _dla_utils_reports.project_cat(df, 'active_transp', district) 

# #out1.append_display_data(_dla_utils_reports.project_cat(df, 'active_transp', district))
# #out1.append_display_data(Image(filename=(f'./chart_outputs/project_cat/bar_{district}_active_transp.png')))

# with out2:
#      _dla_utils_reports.project_cat(df, 'transit', district)
# with out3: 
#      _dla_utils_reports.project_cat(df, 'bridge', district)
# with out4:
#     _dla_utils_reports.project_cat(df, 'street', district)
# with out5:
#      _dla_utils_reports.project_cat(df, 'freeway', district)
# with out6:
#     _dla_utils_reports.project_cat(df, 'infra_resiliency_er', district)
# with out7:
#     _dla_utils_reports.project_cat(df, 'congestion_relief', district)
    

In [18]:
# # work_cat = ['active_transp', 'transit', 'bridge', 'street','freeway', 'infra_resiliency_er','congestion_relief']

# # out1 = widgets.Output()
# # out2 = widgets.Output()
# # out3 = widgets.Output()
# # out4 = widgets.Output()
# # out5 = widgets.Output()
# # out6 = widgets.Output()
# # out7 = widgets.Output()

# # tab2 = widgets.Tab(children = [out1, out2, out3, out4, out5, out6, out7])

# # for ii in enumerate(work_cat): 
# #     tab2.set_title(ii[0], f"{_dla_utils.labeling(ii[1])}")

# # display(tab2)
    
# # with out1:
# #     _dla_utils.project_cat(df, 'active_transp', district) 
# # with out2:
# #      _dla_utils.project_cat(df, 'transit', district)
# # with out3: 
# #      _dla_utils.project_cat(df, 'bridge', district)
# # with out4:
# #     _dla_utils.project_cat(df, 'street', district)
# # with out5:
# #      _dla_utils.project_cat(df, 'freeway', district)
# # with out6:
# #     _dla_utils.project_cat(df, 'infra_resiliency_er', district)
# # with out7:
# #     _dla_utils.project_cat(df, 'congestion_relief', district)
    
    
# ## https://medium.com/analytics-vidhya/how-to-use-python-to-build-an-image-display-app-in-jupyter-notebook-337c4fd3775f
# def show_im():
#     CSS = """
#     .output {
#         flex-direction: row;
#     }
#     """
#     HTML('<style>{}</style>'.format(CSS))

# def image_viewer(district, i): 
#     image_grid = Image(filename=(f'./chart_outputs/project_cat/bar_{district}_{i}.png'))
#     #image_grid_html = HTML(image_grid.to_html(escape=False ,formatters=dict(a=path_to_image_html, 
#     #b=path_to_image_html)))
#     #display(image_grid_html)
#     show_im()    
    

# tab_nest = widgets.Tab()


# #interact function in isolation

# image = (image_viewer(district, 'active_transp'))
# f1 = interactive(image);
# tab_nest.children = [widgets.VBox(children = f1.children)]
# display(tab_nest)

In [19]:
# Image(filename=('./chart_outputs/project_cat/bar_4_active_transp.png'))

In [20]:
## to markdown
#![pic](./chart_outputs/project_cat/bar_4_active_transp.png)