# Obligations in District 10

Using data from the Division of Local Assistance [Obligation list](https://dot.ca.gov/programs/local-assistance/reports/e-76-obligated)

In [4]:
import numpy as np
import pandas as pd
from siuba import *

import altair as alt
import altair_saver
from plotnine import *

from IPython.display import Markdown

from shared_utils import altair_utils
from shared_utils import geography_utils
from shared_utils import calitp_color_palette as cp
from shared_utils import styleguide

from calitp import to_snakecase
import intake

import clean_data
import _dla_utils

In [3]:
pd.options.display.float_format = '{:,.2f}'.format

In [5]:
#df = _clean_data.make_clean_data()
df= pd.read_parquet("dla_df.parquet")

In [6]:
#parameters cell
subset = 10

In [7]:
df.sample()

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,...,ftip_no,project_location,type_of_work,seq,date_request_initiated,date_completed_request,mpo,projectID,prepared_y,primary_agency_name
18522,Obligated,STPCML,5924(222),Sacramento County,2021-03-03,2021-03-03,2021-03-03,2021-03-03,2021-03-05,0.0,...,SAC24625,Hazel Ave Btwn Curragh Downs Drive And Sunset ...,Road Widening And Ped/bike Facilities,2,2021-02-25,2021-02-26,SACOG,5924,2021.0,Sacramento County


In [8]:
df = (df>>filter(_.dist==subset))

In [9]:
df_years = _dla_utils.count_all_years(df)

In [10]:
df_years

Unnamed: 0,prepared_y,dist,ac_requested_sum,fed_requested_sum,total_requested_sum,ac_requested_mean,fed_requested_mean,total_requested_mean,unique_mpo,unique_prefix,unique_primary_agency_name,unique_project_location,unique_project_no,unique_type_of_work
8,2021,10,2157720.7,41198040.68,52975882.79,10373.66,198067.5,254691.74,6,25,35,167,171,147
7,2020,10,-3478058.76,85299546.72,350173497.15,-13124.75,321885.08,1321409.42,5,25,38,219,234,196
6,2019,10,9112268.45,128604727.82,146082897.87,32428.0,457668.07,519867.96,6,31,44,242,246,205
0,2018,10,5279122.45,69297099.88,90357990.92,15758.57,206857.01,269725.35,6,27,44,243,257,216
1,2017,10,15847048.0,87032288.52,115556868.5,57209.56,314195.99,417172.81,5,26,39,220,225,174
2,2016,10,-20717404.0,125715796.11,123437079.25,-75336.01,457148.35,448862.11,4,23,39,243,254,196
3,2015,10,-2522131.0,55833316.99,76113056.95,-9663.34,213920.75,291620.91,11,29,40,226,230,186
4,2014,10,23239535.0,70565097.42,114193397.2,78511.94,238395.6,385788.5,11,30,42,258,263,205
5,2013,10,0.0,339817.0,243494.0,0.0,67963.4,48698.8,2,4,5,5,5,5


In [9]:
df_top = _dla_utils.find_top(df)

In [10]:
df_top

Unnamed: 0,value,count,variable
0,CML,755,prefix
1,STPL,377,prefix
2,HSIPL,219,prefix
3,BRLO,207,prefix
4,BRLS,103,prefix
...,...,...,...
16,Oakdale,31,primary_agency_name
17,Patterson,31,primary_agency_name
18,Gustine,22,primary_agency_name
19,Jackson,22,primary_agency_name


## Which Agencies have Obligations

### How many Unique Agencies

In [11]:
_dla_utils.calculate_data_all(df, 'primary_agency_name', aggfunc="nunique")

Unnamed: 0,dist,primary_agency_name
0,10,56


In [49]:
df>>count(_.primary_agency_name)>>arrange(-_.n)

Unnamed: 0,primary_agency_name,n
38,San Joaquin County,266
44,Stockton,261
29,Modesto,243
43,Stanislaus County,202
9,Calaveras County,120
27,Merced County,119
51,Turlock,87
11,Ceres,86
49,Tuolumne County,82
26,Merced,62


### Agencies with the most Obligations

In [12]:
df_top>>filter(_.variable == 'primary_agency_name')

Unnamed: 0,value,count,variable
0,San Joaquin County,266,primary_agency_name
1,Stockton,261,primary_agency_name
2,Modesto,243,primary_agency_name
3,Stanislaus County,202,primary_agency_name
4,Calaveras County,120,primary_agency_name
5,Merced County,119,primary_agency_name
6,Turlock,87,primary_agency_name
7,Ceres,86,primary_agency_name
8,Tuolumne County,82,primary_agency_name
9,Merced,62,primary_agency_name


### Agencies with the least Obligations

In [13]:
(df>>count(_.primary_agency_name)>>arrange(-_.n)).tail(10)

Unnamed: 0,primary_agency_name,n
30,Modoc County,3
52,"U.S. Forest Service, Pacific Southwest Region",3
55,Yuba County,3
1,Amador City,2
3,Amador County Transportation Commission,2
4,Amador Rapid Transit System,2
17,Ione,2
22,Lodi Unified School District,2
50,Tuolumne County Transportation Council,2
10,Caltrans,1


In [14]:
df>>filter(_.prefix=="CML")>>select(_.primary_agency_name,_.type_of_work)>>count(_.type_of_work)>>arrange(-_.n)>>filter(_.n<4)

Unnamed: 0,type_of_work,n
19,Class I Bike Trail(tc),3
22,Construct A Right Turn Lane,3
31,"Construct Curb, Gutter, Sidewalk, Drainage, An...",3
35,Construct New Traffic Signals (tc),3
40,Construct Pedestrian Infills,3
...,...,...
280,Transit Fare Subsidy Program.,1
281,Transit Fare Subsidy(tfs) Start/max Employee R...,1
284,Transportation Management Center Equipment. - ...,1
290,Upgrade The Atms Software And Communications F...,1


## Transit Funding

In [99]:
transit = df[df['type_of_work'].str.contains(
              'Transit|tranist|bus|Bus|Yarts|Rideshare',
              case=False, na=False)]

In [1]:
print(f"Out of {len(df)} obligations, {len(transit)} are transit related")

NameError: name 'df' is not defined

In [101]:
len(transit)

126

In [102]:
transit>>count(_.primary_agency_name)>>arrange(-_.n)

Unnamed: 0,primary_agency_name,n
4,Modesto,23
9,Stanislaus County,22
8,Stanislaus Council of Governments,18
13,Yosemite Area Regional Transportation System JPA,13
10,Stockton,12
11,Transit Joint Powers Authority For Merced County,12
6,San Joaquin Council of Governments,8
7,Sonora,4
12,Tuolumne County,4
2,Mariposa County,3


In [103]:
#transit>>group_by(_.prefix)>>count(_.type_of_work)>>arrange(-_.n)

In [113]:
transit>>count(_.type_of_work)>>arrange(-_.n)

Unnamed: 0,type_of_work,n
17,Regional Rideshare Program,13
13,"Public Outreach And Marketing For ""the Bus"" (tc)",9
23,Rideshare Program,6
24,Rideshare Program (tc),6
43,Yarts Public Outreach And Marketing (tc),6
1,"Const Ped,transit Facility And Traffic Improv",4
27,San Joaquin County And Merced County Regional ...,4
6,Install Bus Turnout,3
7,Install Bus Turnout (tc),3
10,New Stanislaus Regional Transit Commuter Expre...,3


In [105]:
transit>>count(_.prefix)>>arrange(-_.n)

Unnamed: 0,prefix,n
0,CML,104
3,HSIPL,9
1,CMLNI,8
4,RPSTPL,4
2,DEM06U,1


In [106]:
transit_agencies = df[df['primary_agency_name'].str.contains(
              'Transit|tranist|Rail',
              case=False, na=False)]

In [107]:
transit_agencies>>count(_.primary_agency_name)

Unnamed: 0,primary_agency_name,n
0,Amador Rapid Transit System,2
1,San Joaquin Regional Rail Commission,9
2,San Joaquin Regional Transit District,6
3,Transit Joint Powers Authority For Merced County,15


In [114]:
transit_agencies>>count(_.type_of_work)>>arrange(-_.n)
#using this to determine how many FTA transfers there are  

Unnamed: 0,type_of_work,n
1,FTA Transfer,17
6,"Public Outreach And Marketing For ""the Bus"" (tc)",9
0,Accuring Bus Wraps For Public Outreach(tc),1
2,"In Stockton, Near The Robert J. Cabral Station...",1
3,"In Stockton, Near The Robert J. Cabral Station...",1
4,Install Decorative Bike Racks At Brt Stops,1
5,"Public Outreach & Marketing For ""the Bus"" Tran...",1
7,"Tjpamc: Public Outreach & Marketing For ""the B...",1


In [110]:
_dla_utils.calculate_data_all(transit_agencies, 'total_requested', aggregate_by='primary_agency_name', aggfunc="mean")

Unnamed: 0,primary_agency_name,total_requested
2,San Joaquin Regional Transit District,3748224.0
1,San Joaquin Regional Rail Commission,2244211.0
3,Transit Joint Powers Authority For Merced County,272259.9
0,Amador Rapid Transit System,211130.0


### Charting

In [23]:
_dla_utils.basic_bar_chart((df_top>>filter(_.variable=='primary_agency_name')), 'value', 'count', 'value', subset, 'Agencies With The Most Obligations')

In [24]:
df_top>>filter(_.variable=='prefix')

Unnamed: 0,value,count,variable
0,CML,755,prefix
1,STPL,377,prefix
2,HSIPL,219,prefix
3,BRLO,207,prefix
4,BRLS,103,prefix
5,ER,82,prefix
6,HRRRL,76,prefix
7,BPMPL,61,prefix
8,ATPL,31,prefix
9,RPSTPL,31,prefix


In [25]:
df>>group_by(_.primary_agency_name)>>count(_.prefix)>>arrange(-_.n)

Unnamed: 0,primary_agency_name,prefix,n
107,Modesto,CML,138
197,Stockton,HSIPL,90
192,Stockton,CML,84
157,San Joaquin County,STPL,75
177,Stanislaus County,CML,66
...,...,...,...
216,Tracy,DEM03L,1
217,Tracy,FTACML,1
220,Transit Joint Powers Authority For Merced County,CMLNI,1
223,Tuolumne County,BPMP,1


## Number of Unique Prefix Codes

In [26]:
_dla_utils.get_nunique(df, 'prefix', 'primary_agency_name')

Unnamed: 0,primary_agency_name,n
44,Stockton,23
29,Modesto,17
43,Stanislaus County,15
9,Calaveras County,11
27,Merced County,11
38,San Joaquin County,11
25,Mariposa County,10
49,Tuolumne County,10
2,Amador County,8
11,Ceres,7


In [27]:
_dla_utils.basic_bar_chart(((_dla_utils.get_nunique(df, 'prefix', 'primary_agency_name')).head(30)),
                            'primary_agency_name', 'n', 'primary_agency_name', subset, 'Agencies With The Most Unique Prefix Codes')

## Most Common Types of Work by Agnecy

In [28]:
_dla_utils.get_nunique(df, 'primary_agency_name', 'type_of_work')

Unnamed: 0,type_of_work,n
58,Bridge Replacement,14
59,Bridge Replacement (tc),10
182,FTA Transfer,8
47,Bridge Preventive Maintenance,6
51,Bridge Rehabilitation,5
...,...,...
806,"Widening Road, C&g, Lights, Landscape, Restris...",1
807,Yarts Public Outreach And Marketing (tc),1
808,Yarts: Public Outreach And Marketing (yosemite...,1
809,Yosemite Area Regional Transportation System (...,1


In [29]:
_dla_utils.calculate_data_head(df, "primary_agency_name",'type_of_work', aggfunc="count")

Unnamed: 0,type_of_work,primary_agency_name
59,Bridge Replacement (tc),171
58,Bridge Replacement,59
47,Bridge Preventive Maintenance,43
414,Pavement Rehabilitation (tc),32
182,FTA Transfer,24
671,Roundabout,15
563,Repairs To Existing Scour Countermeasures,15
506,Regional Rideshare Program,13
54,Bridge Rehabilitation (tc),13
413,Pavement Rehabilitation,12


In [30]:
df>>group_by(_.primary_agency_name)>>count(_.type_of_work)>>arrange(-_.n)>>filter(_.n>=8)

Unnamed: 0,primary_agency_name,type_of_work,n
271,Merced County,Bridge Replacement (tc),34
466,San Joaquin County,Bridge Replacement (tc),28
570,Stanislaus County,Bridge Replacement (tc),28
461,San Joaquin County,Bridge Preventive Maintenance,25
53,Calaveras County,Bridge Replacement (tc),22
352,Modesto,Pavement Rehabilitation (tc),21
512,San Joaquin County,Repairs To Existing Scour Countermeasures,15
802,Tuolumne County,Bridge Replacement (tc),15
225,Mariposa County,Bridge Replacement (tc),14
270,Merced County,Bridge Replacement,14


## Most Common Project Locations

In [31]:
df_top>>filter(_.variable=='project_location')

Unnamed: 0,value,count,variable
0,In Stanislaus County,12,project_location
1,Stanislaus County,12,project_location
2,City Of Atwater,11,project_location
3,City Of Modesto,10,project_location
4,Throughout Stanislaus County,10,project_location
5,Wb D Street To Nb 9th Street (sr 132),9,project_location
6,Tully Rd. From Santa Fe To Whitmore Ave.,8,project_location
7,11th Street - East Tracy Overhead (bridge 29c0...,7,project_location
8,Baxter Road Over Deadman Creek (bridge 39c0230),7,project_location
9,In Merced County,7,project_location


## Number of Obligations by Year

In [32]:
df_top>>filter(_.variable=='prepared_y')

Unnamed: 0,value,count,variable
0,2018.0,335,prepared_y
1,2014.0,296,prepared_y
2,2019.0,281,prepared_y
3,2017.0,277,prepared_y
4,2016.0,275,prepared_y
5,2020.0,265,prepared_y
6,2015.0,261,prepared_y
7,2021.0,208,prepared_y
8,2013.0,5,prepared_y


In [33]:
df>>group_by(_.primary_agency_name)>>count(_.prepared_date)>>arrange(-_.n)

Unnamed: 0,primary_agency_name,prepared_date,n
166,Calaveras County,2020-11-04,15
702,Modesto,2018-04-09,12
708,Modesto,2018-05-08,11
715,Modesto,2018-06-05,9
719,Modesto,2018-06-18,6
...,...,...,...
1843,Yosemite Area Regional Transportation System JPA,2019-07-31,1
1844,Yosemite Area Regional Transportation System JPA,2020-01-08,1
1845,Yuba County,2018-08-15,1
1846,Yuba County,2019-04-25,1


### Chart

In [34]:
_dla_utils.basic_line_chart((df_top>>filter(_.variable=='prepared_y')), 'value', 'count', subset, 'Obligations by Year')

## Agencies with the most unique project numbers

In [35]:
df>>group_by(_.primary_agency_name, _.prefix)>>count(_.project_no)>>arrange(-_.n)>>filter(_.n>5)

Unnamed: 0,primary_agency_name,prefix,project_no,n
283,Merced County,BRLS,5939(078),9
341,Modesto,CML,5059(198),9
161,Hughson,STPL,5411(014),8
273,Merced County,BRLO,5939(084),7
347,Modesto,CML,5059(207),7
443,Plymouth,HPLUL,5236(003),7
793,Tracy,BHLS,5192(020),7
841,Tuolumne County,HSIPL,5932(083),7
9,Amador County,BRLO,5926(031),6
119,Ceres,CML,5241(056),6


## Funding Amounts

In [36]:
_dla_utils.calculate_data_all(df, 'total_requested', 'primary_agency_name', aggfunc="mean")

Unnamed: 0,primary_agency_name,total_requested
40,San Joaquin Regional Transit District,3748224.0
10,Caltrans,2964646.0
45,Stockton Port District,2760150.0
39,San Joaquin Regional Rail Commission,2244211.0
43,Stanislaus County,2180271.0
19,Lathrop,1689527.0
21,Lodi,1390685.0
47,Tracy,1329912.0
22,Lodi Unified School District,1148058.0
37,San Joaquin Council of Governments,923653.7


In [132]:
df.describe()

Unnamed: 0,fed_requested,ac_requested,total_requested,dist,dist_processing_days,hq_processing_days,fhwa_processing_days,seq,prepared_y
count,2203.0,2203.0,2203.0,2203.0,2023.0,2194.0,2194.0,2203.0,2203.0
mean,301355.3,13126.69,485308.29,10.0,13.05,16.56,4.82,3.01,2017.37
std,1221377.7,909865.61,5817909.82,0.0,36.21,26.78,5.52,1.91,2.23
min,-4494094.27,-18568334.0,-22294827.23,10.0,-99.0,0.0,0.0,1.0,2013.0
25%,0.0,0.0,0.0,10.0,0.0,0.0,2.0,2.0,2015.0
50%,12000.0,0.0,40000.0,10.0,3.0,8.0,4.0,3.0,2017.0
75%,229775.0,0.0,272790.0,10.0,12.0,17.0,7.0,4.0,2019.0
max,20000000.0,23239535.0,259547213.0,10.0,1096.0,261.0,141.0,14.0,2021.0


In [131]:
transit.describe()

Unnamed: 0,fed_requested,ac_requested,total_requested,dist,dist_processing_days,hq_processing_days,fhwa_processing_days,seq,prepared_y
count,126.0,126.0,126.0,126.0,112.0,126.0,126.0,126.0,126.0
mean,129595.99,5119.05,159279.34,10.0,7.14,14.6,4.53,2.0,2017.66
std,317638.37,128866.22,439094.89,0.0,10.91,23.45,3.93,1.11,2.19
min,-183102.22,-911000.0,-1297265.0,10.0,0.0,0.0,0.0,1.0,2014.0
25%,0.0,0.0,0.0,10.0,0.0,0.0,2.0,1.0,2016.0
50%,6000.0,0.0,9037.35,10.0,3.0,6.0,4.0,2.0,2018.0
75%,100000.0,0.0,111250.0,10.0,8.0,16.75,6.0,2.0,2019.0
max,2032749.0,911000.0,2296116.0,10.0,51.0,122.0,30.0,7.0,2021.0


### Top 50 Agencies with highest funds

In [37]:
((_dla_utils.calculate_data_all(df, 'total_requested', 'primary_agency_name', aggfunc="mean"))>>arrange(-_.total_requested)).head(50)

Unnamed: 0,primary_agency_name,total_requested
40,San Joaquin Regional Transit District,3748224.0
10,Caltrans,2964646.0
45,Stockton Port District,2760150.0
39,San Joaquin Regional Rail Commission,2244211.0
43,Stanislaus County,2180271.0
19,Lathrop,1689527.0
21,Lodi,1390685.0
47,Tracy,1329912.0
22,Lodi Unified School District,1148058.0
37,San Joaquin Council of Governments,923653.7


In [39]:
_dla_utils.basic_bar_chart((((_dla_utils.calculate_data_all(df, 'total_requested', 'primary_agency_name', aggfunc="mean"))
                          >>arrange(-_.total_requested)).head(30)
                        ), 'primary_agency_name','total_requested', 'primary_agency_name', subset,
                           'Average Total Requested Funds by Agency'
                       )

In [98]:
_dla_utils.basic_bar_chart((((_dla_utils.calculate_data_all(transit, 'total_requested', 'primary_agency_name', aggfunc="mean"))
                          >>arrange(-_.total_requested)).head(30)
                        ), 'primary_agency_name','total_requested', 'primary_agency_name', subset,
                           'Average Total Requested Funds by Agency: Transit Related Funding'
                       )

In [112]:
_dla_utils.basic_bar_chart((_dla_utils.calculate_data_all(transit_agencies, 'total_requested', aggregate_by='primary_agency_name', aggfunc="mean")),
                           'primary_agency_name', 'total_requested', 'primary_agency_name', subset,
                           'Transit Agencies Average Funding')



In [56]:
df>>filter(_.primary_agency_name=='San Joaquin Regional Transit District')>>count(_.type_of_work)

Unnamed: 0,type_of_work,n
0,FTA Transfer,5
1,Install Decorative Bike Racks At Brt Stops,1


In [59]:
df>>filter(_.primary_agency_name=='San Joaquin Regional Transit District')>>count(_.prefix)

Unnamed: 0,prefix,n
0,FTACML,5
1,RPSTPL,1


### Bottom 50 Agencies with lowest funds

In [40]:
avg_funds_bottom = (df>>group_by(_.primary_agency_name)>>summarize(avg_funds=_.total_requested.mean())>>arrange(-_.avg_funds)).tail(50)



In [41]:
avg_funds_bottom

Unnamed: 0,primary_agency_name,avg_funds
21,Lodi,1390685.0
47,Tracy,1329912.0
22,Lodi Unified School District,1148058.0
37,San Joaquin Council of Governments,923653.7
7,Calabasas,885466.7
0,Alpine County,790063.4
24,Manteca,775062.2
46,Sutter Creek,594334.9
34,Plymouth,528235.0
38,San Joaquin County,438396.7


In [43]:
_dla_utils.basic_bar_chart((avg_funds_bottom.tail(40)), 'primary_agency_name','avg_funds', 'primary_agency_name', subset, 
                          'Lowest Average Total Funds by Agency')

WARN Symbol legend count exceeds limit, filtering items.


In [44]:
((
    _dla_utils.calculate_data_all(
        df, 'total_requested', 'primary_agency_name', aggfunc="mean"))
>>arrange(-_.total_requested)).tail(50)

Unnamed: 0,primary_agency_name,total_requested
10,Caltrans,2964646.0
45,Stockton Port District,2760150.0
39,San Joaquin Regional Rail Commission,2244211.0
43,Stanislaus County,2180271.0
19,Lathrop,1689527.0
21,Lodi,1390685.0
47,Tracy,1329912.0
22,Lodi Unified School District,1148058.0
37,San Joaquin Council of Governments,923653.7
7,Calabasas,885466.7


In [42]:
_dla_utils.basic_bar_chart((((_dla_utils.calculate_data_all(df, 'total_requested', 'primary_agency_name', aggfunc="mean"))
                          >>arrange(-_.total_requested)).tail(50)
                        ), 'primary_agency_name','total_requested', 'total_requested', subset
                       )

In [65]:
df.sample()

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,ac_requested,total_requested,status_comment,locode,dist,status,dist_processing_days,hq_processing_days,fhwa_processing_days,ftip_no,project_location,type_of_work,seq,date_request_initiated,date_completed_request,mpo,projectID,prepared_y,primary_agency_name
14830,Obligated,STPL,6089(076),Stanislaus Council Of Governments,2019-08-29,2019-08-29,2019-08-29,2019-08-29,2019-09-04,0.0,0.0,118038.44,Authorized,6089,10,E-76 approved on,7.0,0.0,6.0,05STA002R,In Stanilaus County,"Planning, Programming, And Monitoring",2,2019-08-22,2019-08-23,STANCOG,6089,2019.0,Stanislaus Council of Governments


## Prefix Code Funding 

In [69]:
_dla_utils.basic_bar_chart((((_dla_utils.calculate_data_all(df, 'total_requested', 'prefix', aggfunc="mean"))
                          >>arrange(-_.total_requested)).head(30)
                        ), 'prefix','total_requested', 'prefix', subset,
                           'Average Total Requested Funds by Prefix'
                       )

In [67]:
_dla_utils.basic_bar_chart((((_dla_utils.calculate_data_all(df, 'fed_requested', 'prefix', aggfunc="mean"))
                          >>arrange(-_.fed_requested)).head(30)
                        ), 'prefix','fed_requested', 'prefix', subset,
                           'Average Federal Requested Funds by Prefix'
                       )

In [70]:
_dla_utils.basic_bar_chart((((_dla_utils.calculate_data_all(df, 'ac_requested', 'prefix', aggfunc="mean"))
                          >>arrange(-_.ac_requested)).head(30)
                        ), 'prefix','ac_requested', 'prefix', subset,
                           'Average Advanced Construction Requested Funds by Prefix'
                       )

## Functions to look by Prefix and by Agency: 

### By Agency:

In [45]:
_dla_utils.interactive_widget(df, 'primary_agency_name')

Dropdown(description='Primary_Agency_Name', options=('Alpine County', 'Amador City', 'Amador County', 'Amador …

Output()

### By Prefix:

In [46]:
_dla_utils.interactive_widget(df, 'prefix')

Dropdown(description='Prefix', options=('ACSTER', 'ACSTP', 'ATPCML', 'ATPL', 'ATPLNI', 'ATPSTPL', 'BDGL', 'BHL…

Output()