# Obligations in District 3

Using data from the Division of Local Assistance [Obligation list](https://dot.ca.gov/programs/local-assistance/reports/e-76-obligated)

In [1]:
import numpy as np
import pandas as pd
from siuba import *

import altair as alt
import altair_saver
from plotnine import *

from IPython.display import Markdown

from shared_utils import altair_utils
from shared_utils import geography_utils
from shared_utils import calitp_color_palette as cp
from shared_utils import styleguide

from calitp import to_snakecase
import intake

import clean_data
import _dla_utils



In [2]:
#df = _clean_data.make_clean_data()
df= pd.read_parquet("dla_df.parquet")

In [3]:
#parameters cell
subset = 3

In [4]:
df.sample()

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,...,ftip_no,project_location,type_of_work,seq,date_request_initiated,date_completed_request,mpo,projectID,prepared_y,primary_agency_name
9790,Obligated,CML,5197(022),Sanger,2015-02-03,2015-02-19,2015-03-03,2015-03-03,2015-03-03,144500.0,...,,Annadale Ave From Academy Ave To Bennett Way,Bike Lanes And Ped Crosswalk (tc),2,NaT,NaT,CFCG,5197,2015.0,Sanger


In [5]:
df = (df>>filter(_.dist==subset))

In [6]:
df_years = _dla_utils.count_all_years(df)

In [7]:
df_years

Unnamed: 0,prepared_y,dist,ac_requested_sum,fed_requested_sum,total_requested_sum,ac_requested_mean,fed_requested_mean,total_requested_mean,unique_mpo,unique_prefix,unique_primary_agency_name,unique_project_location,unique_project_no,unique_type_of_work
8,2021,3,103523700.0,108927700.0,237814100.0,417434.091169,439224.708387,958927.6,5.0,34.0,45.0,191.0,198.0,164.0
7,2020,3,-6858257.0,155129900.0,238918200.0,-22486.088393,508622.595902,783338.5,5.0,38.0,43.0,227.0,234.0,189.0
6,2019,3,-10332570.0,180079000.0,211567200.0,-25831.42805,450197.418575,528918.1,4.0,51.0,46.0,316.0,321.0,254.0
0,2018,3,25276740.0,135271300.0,190472800.0,73693.127668,394376.846851,555314.4,4.0,38.0,47.0,277.0,282.0,219.0
1,2017,3,48434870.0,171321900.0,201490900.0,156747.156019,554439.921909,652074.2,4.0,42.0,44.0,266.0,270.0,220.0
2,2016,3,723015.0,126715500.0,162393400.0,2362.794118,414102.874412,530697.4,4.0,44.0,47.0,257.0,269.0,218.0
3,2015,3,-3169509.0,95224790.0,114306600.0,-11652.606618,350091.136213,420244.7,4.0,36.0,46.0,223.0,232.0,184.0
4,2014,3,-8798279.0,134341400.0,182470400.0,-27580.811912,421132.762978,572007.5,6.0,34.0,43.0,254.0,264.0,200.0
5,2013,3,210000.0,6672364.0,21777250.0,17500.0,556030.3625,1814771.0,2.0,11.0,10.0,12.0,12.0,11.0


In [8]:
df_top = _dla_utils.find_top(df)

In [9]:
df_top

Unnamed: 0,value,count,variable
0,CML,396,prefix
1,BRLO,384,prefix
2,HSIPL,378,prefix
3,STPL,276,prefix
4,ER,158,prefix
...,...,...,...
16,Placer County Transportation Planning Agency,43,primary_agency_name
17,Folsom,41,primary_agency_name
18,Glenn County,38,primary_agency_name
19,Lincoln,37,primary_agency_name


## Which Agencies have Obligations

### How many Unique Agencies

In [10]:
_dla_utils.calculate_data_all(df, 'primary_agency_name', aggfunc="nunique")

Unnamed: 0,dist,primary_agency_name
0,3,62


### Agencies with the most Obligations

In [11]:
df_top>>filter(_.variable == 'primary_agency_name')

Unnamed: 0,value,count,variable
0,El Dorado County,293,primary_agency_name
1,Sacramento County,221,primary_agency_name
2,Sacramento,182,primary_agency_name
3,Placer County,173,primary_agency_name
4,Yuba County,141,primary_agency_name
5,Nevada County,139,primary_agency_name
6,Butte County,133,primary_agency_name
7,Yolo County,89,primary_agency_name
8,Elk Grove,64,primary_agency_name
9,Sierra County,61,primary_agency_name


### Agencies with the least Obligations

In [12]:
(df>>count(_.primary_agency_name)>>arrange(-_.n)).tail(10)

Unnamed: 0,primary_agency_name,n
27,Modoc County,4
51,"U.S. Forest Service, Pacific Southwest Region",4
52,University of California - Davis,4
7,Capital Southeast Connector,3
31,Orland,3
5,California Tahoe Conservancy,2
26,Marysville,2
14,Dixon,1
28,Natomas Unified School District,1
48,Tahoe Regional Planning Agency,1


### Charting

In [14]:
_dla_utils.basic_bar_chart((df_top>>filter(_.variable=='primary_agency_name')), 'value', 'count', 'value', subset)

In [15]:
df_top>>filter(_.variable=='prefix')

Unnamed: 0,value,count,variable
0,CML,396,prefix
1,BRLO,384,prefix
2,HSIPL,378,prefix
3,STPL,276,prefix
4,ER,158,prefix
5,BRLS,151,prefix
6,RPSTPL,65,prefix
7,HRRRL,60,prefix
8,BPMP,50,prefix
9,ACSTP,43,prefix


In [16]:
df>>group_by(_.primary_agency_name)>>count(_.prefix)>>arrange(-_.n)

Unnamed: 0,primary_agency_name,prefix,n
71,El Dorado County,BRLO,67
141,Nevada County,BRLO,63
162,Placer County,BRLO,58
232,Sacramento,HSIPL,54
260,Sacramento County,HSIPL,53
...,...,...,...
316,Winters,HPLUL,1
319,Woodland,ATPSB1L,1
323,Woodland,RPSTPL,1
328,Yolo County,BRLOZE,1


## Number of Unique Prefix Codes

In [17]:
_dla_utils.get_nunique(df, 'prefix', 'primary_agency_name')

Unnamed: 0,primary_agency_name,n
15,El Dorado County,21
40,Sacramento,18
42,Sacramento County,18
34,Placer County,14
2,Butte County,13
...,...,...
28,Natomas Unified School District,1
31,Orland,1
48,Tahoe Regional Planning Agency,1
52,University of California - Davis,1


In [18]:
_dla_utils.basic_bar_chart(((_dla_utils.get_nunique(df, 'prefix', 'primary_agency_name')).head(30)),
                            'primary_agency_name', 'n', 'primary_agency_name', subset)

## Most Common Types of Work by Agnecy

In [19]:
_dla_utils.get_nunique(df, 'primary_agency_name', 'type_of_work')

Unnamed: 0,type_of_work,n
89,Bridge Replacement,17
90,Bridge Replacement (tc),15
307,FTA Transfer,10
82,Bridge Rehabilitation,6
83,Bridge Rehabilitation (tc),6
...,...,...
818,Widen Travel Lanes; Construct Drainage (tc),1
819,Widen Travel Lanes; Construct Drainage Imprvmnts,1
820,"Widen/aux. Lanes, On/off Ramp",1
821,"Widening, Trainl And Intersection Improvements",1


In [20]:
_dla_utils.calculate_data_head(df, "primary_agency_name",'type_of_work', aggfunc="count")

Unnamed: 0,type_of_work,primary_agency_name
90,Bridge Replacement (tc),250
89,Bridge Replacement,126
307,FTA Transfer,57
82,Bridge Rehabilitation,22
83,Bridge Rehabilitation (tc),22
441,New Engineered Embankment System Will Need To ...,18
49,Bike Path (tc),11
275,Emergency Opening,10
513,Preventative Maintenance,9
488,Pedestrian And Bike Path,9


In [21]:
df>>group_by(_.primary_agency_name)>>count(_.type_of_work)>>arrange(-_.n)>>filter(_.n>=8)

Unnamed: 0,primary_agency_name,type_of_work,n
148,El Dorado County,Bridge Replacement (tc),45
718,Sacramento Regional Transit District,FTA Transfer,30
863,Yuba County,Bridge Replacement (tc),29
315,Nevada County,Bridge Replacement (tc),28
821,Yolo County,Bridge Replacement (tc),27
381,Placer County,Bridge Replacement (tc),25
147,El Dorado County,Bridge Replacement,22
633,Sacramento County,Bridge Replacement (tc),21
749,Sutter County,Bridge Replacement (tc),20
380,Placer County,Bridge Replacement,19


## Most Common Project Locations

In [22]:
df_top>>filter(_.variable=='project_location')

Unnamed: 0,value,count,variable
0,Sacog Region,29,project_location
1,Various Locations (see Comments),10,project_location
2,5th Street At 2nd Street And 5th Street Over F...,9,project_location
3,4 Corridors In Western Nevada County,8,project_location
4,Along Newtown Road Between Hwy 49 And Beckvill...,8,project_location
5,El Dorado Trail From Los Trampas Dr To Halcon Rd,8,project_location
6,Nevada City Highway Starting At Banner Lava Ca...,8,project_location
7,Pctpa Region,8,project_location
8,Bradshaw & Sheldon At Laguna Crk Br #24c-0308,7,project_location
9,Ed Hills Csd From The West End Of Ny Trail Eas...,7,project_location


## Number of Obligations by Year

In [23]:
df_top>>filter(_.variable=='prepared_y')

Unnamed: 0,value,count,variable
0,2019.0,400,prepared_y
1,2018.0,343,prepared_y
2,2014.0,319,prepared_y
3,2017.0,309,prepared_y
4,2016.0,306,prepared_y
5,2020.0,305,prepared_y
6,2015.0,272,prepared_y
7,2021.0,248,prepared_y
8,2013.0,12,prepared_y


In [24]:
df>>group_by(_.primary_agency_name)>>count(_.prepared_date)>>arrange(-_.n)

Unnamed: 0,primary_agency_name,prepared_date,n
1617,Sacramento County,2018-10-18,9
1739,Sacramento Regional Transit District,2020-04-16,8
888,Nevada County,2020-10-08,7
863,Nevada County,2019-03-20,6
1357,Sacramento,2016-04-29,6
...,...,...,...
2211,Yuba County,2021-04-30,1
2212,Yuba County,2021-06-30,1
2213,Yuba County,2021-08-05,1
2214,Yuba County,2021-08-19,1


### Chart

In [25]:
_dla_utils.basic_line_chart((df_top>>filter(_.variable=='prepared_y')), 'value', 'count', subset)

## Agencies with the most unique project numbers

In [26]:
df>>group_by(_.primary_agency_name, _.prefix)>>count(_.project_no)>>arrange(-_.n)>>filter(_.n>5)

Unnamed: 0,primary_agency_name,prefix,project_no,n
370,Nevada County,BRLO,5917(081),9
941,Yuba City,BRLS,5163(027),9
172,El Dorado County,BRLO,5925(095),8
195,El Dorado County,CML,5925(125),8
197,El Dorado County,CML,5925(130),8
250,Elk Grove,BRLS,5479(012),8
369,Nevada County,BRLO,5917(080),8
371,Nevada County,BRLO,5917(082),8
378,Nevada County,CML,5917(070),8
379,Nevada County,CML,5917(090),8


## Average Funds

In [27]:
_dla_utils.calculate_data_all(df, 'total_requested', 'primary_agency_name', aggfunc="mean")

Unnamed: 0,primary_agency_name,total_requested
44,Sacramento Regional Transit District,2585441.0
60,Yuba City,2415526.0
52,University of California - Davis,1916021.0
57,Woodland,1854887.0
26,Marysville,1609720.0
59,Yolo County Transportation District,1541054.0
17,Folsom,1239564.0
43,Sacramento Metropolitan Air Quality Management...,1187539.0
42,Sacramento County,1068965.0
53,West Sacramento,1007069.0


### Top 50 Agencies with highest funds

In [28]:
((_dla_utils.calculate_data_all(df, 'total_requested', 'primary_agency_name', aggfunc="mean"))>>arrange(-_.total_requested)).head(50)

Unnamed: 0,primary_agency_name,total_requested
44,Sacramento Regional Transit District,2585441.0
60,Yuba City,2415526.0
52,University of California - Davis,1916021.0
57,Woodland,1854887.0
26,Marysville,1609720.0
59,Yolo County Transportation District,1541054.0
17,Folsom,1239564.0
43,Sacramento Metropolitan Air Quality Management...,1187539.0
42,Sacramento County,1068965.0
53,West Sacramento,1007069.0


In [29]:
_dla_utils.basic_bar_chart((((_dla_utils.calculate_data_all(df, 'total_requested', 'primary_agency_name', aggfunc="mean"))
                          >>arrange(-_.total_requested)).head(50)
                        ), 'primary_agency_name','total_requested', 'primary_agency_name', subset
                       )

WARN Symbol legend count exceeds limit, filtering items.


### Bottom 50 Agencies with lowest funds

In [30]:
avg_funds_bottom = (df>>group_by(_.primary_agency_name)>>summarize(avg_funds=_.total_requested.mean())>>arrange(-_.avg_funds)).tail(50)



In [31]:
avg_funds_bottom

Unnamed: 0,primary_agency_name,avg_funds
39,Roseville,877869.0
40,Sacramento,803820.4
7,Capital Southeast Connector,759034.3
12,Davis,750910.5
37,Rancho Cordova,734859.6
22,Lincoln,723955.4
16,Elk Grove,716788.3
6,Caltrans,714147.4
50,Truckee,671955.9
13,Department of Parks and Recreation,670868.6


In [32]:
_dla_utils.basic_bar_chart(avg_funds_bottom, 'primary_agency_name','avg_funds', 'primary_agency_name', subset)

WARN Symbol legend count exceeds limit, filtering items.


In [33]:
((
    _dla_utils.calculate_data_all(
        df, 'total_requested', 'primary_agency_name', aggfunc="mean"))
>>arrange(-_.total_requested)).tail(50)

Unnamed: 0,primary_agency_name,total_requested
17,Folsom,1239564.0
43,Sacramento Metropolitan Air Quality Management...,1187539.0
42,Sacramento County,1068965.0
53,West Sacramento,1007069.0
0,Auburn,995759.8
49,Tahoe Transportation District,892835.1
39,Roseville,877869.0
40,Sacramento,803820.4
7,Capital Southeast Connector,759034.3
12,Davis,750910.5


In [34]:
_dla_utils.basic_bar_chart((((_dla_utils.calculate_data_all(df, 'total_requested', 'primary_agency_name', aggfunc="mean"))
                          >>arrange(-_.total_requested)).tail(50)
                        ), 'primary_agency_name','total_requested', 'total_requested', subset
                       )

## Functions to look by Prefix and by Agency: 

### By Agency:

In [35]:
_dla_utils.interactive_widget(df, 'primary_agency_name')

Dropdown(description='Primary_Agency_Name', options=('Auburn', 'Biggs', 'Butte County', 'Butte County Associat…

Output()

### By Prefix:

In [36]:
_dla_utils.interactive_widget(df, 'prefix')

Dropdown(description='Prefix', options=('ACSTER', 'ACSTP', 'ATPCML', 'ATPL', 'ATPLNI', 'ATPSB1L', 'ATPSTC', 'A…

Output()