# Obligations in District 7

Using data from the Division of Local Assistance [Obligation list](https://dot.ca.gov/programs/local-assistance/reports/e-76-obligated)

In [1]:
import numpy as np
import pandas as pd
from siuba import *

import altair as alt
import altair_saver
from plotnine import *

from shared_utils import altair_utils
alt.themes.enable("fivethirtyeight")

from dla_utils import _dla_utils

from IPython.display import Markdown
from IPython.core.display import display



In [2]:
df= pd.read_parquet("gs://calitp-analytics-data/data-analyses/dla/e-76Obligated/dla_df.parquet")

In [3]:
df.sample()

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,...,adjusted_ac_requested,obligation_cat,active_transp,transit,bridge,street,freeway,infra_resiliency_er,congestion_relief,work_categories
15859,Obligated,HSIPL,5008(162),Stockton,2020-01-10,2020-01-10,2020-01-17,2020-01-21,2020-01-23,435870.0,...,0.0,Large,0,0,0,1,0,0,0,1


In [14]:
subset = 7

In [15]:
df = df>>filter(_.dist==subset)

In [5]:
df_years = _dla_utils.count_all_years(df)

In [6]:
df_years

Unnamed: 0,prepared_y,dist,adjusted_ac_requested_x,adjusted_fed_requested_x,adjusted_total_requested_x,adjusted_ac_requested_y,adjusted_fed_requested_y,adjusted_total_requested_y,unique_mpo,unique_prefix,unique_primary_agency_name,unique_project_location,unique_project_no,unique_type_of_work
11,2022,7,-11026947.22,262002155.55,262454620.78,-119858.12,2847849.52,2852767.62,2.0,22.0,39.0,85.0,86.0,84.0
10,2021,7,-8394078.23,453907980.82,665165684.86,-29247.66,1581560.91,2317650.47,2.0,44.0,63.0,239.0,247.0,227.0
9,2020,7,-30590342.62,439813880.26,1003589707.82,-95894.49,1378726.9,3146049.24,1.0,43.0,60.0,276.0,282.0,266.0
8,2019,7,-13773273.82,355599947.26,388486894.55,-40390.83,1042815.09,1139257.76,1.0,36.0,65.0,287.0,298.0,279.0
0,2018,7,-63834279.07,357599773.31,355523531.33,-191694.53,1073873.19,1067638.23,1.0,39.0,63.0,277.0,291.0,264.0
1,2017,7,-66766131.25,343468449.65,330425191.26,-204804.08,1053584.2,1013574.21,2.0,38.0,73.0,280.0,294.0,265.0
2,2016,7,-105948519.78,474523581.35,532576409.01,-281777.98,1265396.22,1416426.62,2.0,43.0,75.0,307.0,322.0,304.0
3,2015,7,154146238.84,296153503.82,669872521.76,416611.46,800414.88,1810466.28,8.0,41.0,85.0,299.0,312.0,269.0
5,2014,7,268939567.03,333654043.83,669762642.12,625440.85,775939.64,1557587.54,10.0,42.0,77.0,362.0,375.0,323.0
6,2013,7,33210263.09,17037728.82,-26761827.54,1037820.72,532429.03,-836307.11,4.0,20.0,18.0,30.0,32.0,31.0


In [7]:
df_top = _dla_utils.find_top(df)

In [8]:
df_top

Unnamed: 0,value,count,variable
0,HSIPL,745,prefix
1,STPL,451,prefix
2,CML,290,prefix
3,ER,187,prefix
4,ATPL,150,prefix
...,...,...,...
16,Camarillo,35,primary_agency_name
17,Culver City,32,primary_agency_name
18,Burbank,31,primary_agency_name
19,Whittier,31,primary_agency_name


## Which Agencies have Obligations

### How many Unique Agencies

In [9]:
_dla_utils.calculate_data_all(df, 'primary_agency_name', aggfunc="nunique")

Unnamed: 0,dist,primary_agency_name
0,7,108


### Agencies with the most Obligations

In [10]:
df_top>>filter(_.variable == 'primary_agency_name')

Unnamed: 0,value,count,variable
0,Los Angeles,508,primary_agency_name
1,Los Angeles County,481,primary_agency_name
2,Ventura County,120,primary_agency_name
3,Los Angeles County Metropolitan Transportation...,88,primary_agency_name
4,Santa Clarita,88,primary_agency_name
5,Long Beach,84,primary_agency_name
6,Oxnard,76,primary_agency_name
7,Pasadena,75,primary_agency_name
8,Downey,64,primary_agency_name
9,Lancaster,63,primary_agency_name


### Agencies with the least Obligations

In [11]:
(df>>count(_.primary_agency_name)>>arrange(-_.n)).tail(10)

Unnamed: 0,primary_agency_name,n
56,Maywood,2
65,Palos Verdes Est,2
71,Port Of Long Beach,2
86,Sierra Madre,2
2,Alameda Corridor Transportation Authority,1
4,Antelope Valley Transit Authority,1
35,Hermosa Beach,1
70,Port Hueneme,1
74,Rolling Hills Estates,1
100,"U.S. Forest Service, Pacific Southwest Region",1


### Charting

In [16]:
_dla_utils.basic_bar_chart((df_top>>filter(_.variable=='primary_agency_name')), 'value', 'count', 'value', subset, 'Agencies With The Most Obligations')


In [17]:
df_top>>filter(_.variable=='prefix')

Unnamed: 0,value,count,variable
0,HSIPL,745,prefix
1,STPL,451,prefix
2,CML,290,prefix
3,ER,187,prefix
4,ATPL,150,prefix
5,HPLUL,147,prefix
6,BHLS,100,prefix
7,FTACML,95,prefix
8,SRTSL,71,prefix
9,RPSTPL,63,prefix


In [18]:
df>>group_by(_.primary_agency_name)>>count(_.prefix)>>arrange(-_.n)

Unnamed: 0,primary_agency_name,prefix,n
222,Los Angeles County,ER,160
202,Los Angeles,STPL,91
228,Los Angeles County,HSIPL,85
196,Los Angeles,HSIPL,75
185,Los Angeles,CML,69
...,...,...,...
475,Vernon,HSIP,1
476,Vernon,HSIPL,1
478,West Covina,HPLUL,1
482,Westlake Village,HSIPL,1


## Number of Unique Prefix Codes

In [19]:
_dla_utils.get_nunique(df, 'prefix', 'primary_agency_name')

Unnamed: 0,primary_agency_name,n
49,Los Angeles,35
50,Los Angeles County,31
47,Long Beach,19
51,Los Angeles County Metropolitan Transportation...,18
63,Oxnard,14
...,...,...
89,South Coast Air Quality Management District,1
90,South Coast Area Transit,1
91,South El Monte,1
100,"U.S. Forest Service, Pacific Southwest Region",1


In [20]:
_dla_utils.basic_bar_chart(((_dla_utils.get_nunique(df, 'prefix', 'primary_agency_name')).head(30)),
                            'primary_agency_name', 'n','primary_agency_name', subset, 'Agencies With The Most Unique Prefix Codes')

## Most Common Types of Work by Agnecy

In [21]:
_dla_utils.get_nunique(df, 'primary_agency_name', 'type_of_work')

Unnamed: 0,type_of_work,n
356,FTA Transfer,16
943,Road Rehabilitation (tc),7
320,Emergency Opening,6
95,Bridge Rehabilitation,5
1183,Upgrade Signals,5
...,...,...
1249,"Widening, Right Turn Lanes, La",1
1250,"Widening,install Sidewalks, Traffic Improvements,",1
1251,"Widenning Roadway(install Right Turn Lanes, Etc)",1
1252,Xwalks; Ped Heads; Flash Beacons; Signs; Bulb ...,1


In [22]:
_dla_utils.calculate_data_head(df, "primary_agency_name",'type_of_work', aggfunc="count")

Unnamed: 0,type_of_work,primary_agency_name
356,FTA Transfer,131
320,Emergency Opening,35
107,Bridge Replacement,24
943,Road Rehabilitation (tc),20
95,Bridge Rehabilitation,18
109,Bridge Replacement (tc),15
1120,Traffic Signal Improvements,13
747,Pedestrian Improvements,13
1183,Upgrade Signals,12
61,Bikeway Access Improvements,11


In [24]:
df>>group_by(_.primary_agency_name)>>count(_.type_of_work)>>arrange(-_.n)>>filter(_.n>=8)

Unnamed: 0,primary_agency_name,type_of_work,n
814,Los Angeles County Metropolitan Transportation...,FTA Transfer,58
665,Los Angeles County,Emergency Opening,26
0,Access Services,FTA Transfer,18
1294,Ventura County Transportation Commission,FTA Transfer,15
630,Los Angeles County,Bridge Replacement,12
359,Los Angeles,Bridge Rehabilitation,11
610,Los Angeles County,Bikeway Access Improvements,11
266,Lancaster,Construct Roundabout Within Existing R/w,10
501,Los Angeles,Pedestrian Improvements,10
631,Los Angeles County,Bridge Replacement (tc),10


## Most Common Project Locations

In [25]:
df_top>>filter(_.variable=='project_location')

Unnamed: 0,value,count,variable
0,"Sixth Street Viaduct Over La River, Us 101, An...",13,project_location
1,Within The County Of Ventura,9,project_location
2,Citywide,8,project_location
3,Sixth Street Viaduct Over La River And East Sa...,8,project_location
4,Various Locations In Los Angeles County,7,project_location
5,10th Str. West Road Diet And Bikeway Improvem...,6,project_location
6,Amar Road Street Improvements: Officer Chiles ...,6,project_location
7,Community Of Florence-firestone In Los Angeles...,6,project_location
8,Down Town Area Of City Of Culver City,6,project_location
9,Intersection Of Avalon Blvd And Carson St. In ...,6,project_location


## Number of Obligations by Year

In [26]:
df_top>>filter(_.variable=='prepared_y')

Unnamed: 0,value,count,variable
0,2014.0,430,prepared_y
1,2016.0,376,prepared_y
2,2015.0,370,prepared_y
3,2019.0,341,prepared_y
4,2018.0,333,prepared_y
5,2017.0,326,prepared_y
6,2020.0,319,prepared_y
7,2021.0,287,prepared_y
8,2022.0,92,prepared_y
9,2013.0,32,prepared_y


In [28]:
df>>group_by(_.primary_agency_name)>>count(_.prepared_date)>>arrange(-_.n)

Unnamed: 0,primary_agency_name,prepared_date,n
1488,Los Angeles County Metropolitan Transportation...,2015-07-15,9
1510,Los Angeles County Metropolitan Transportation...,2018-05-31,9
2,Access Services,2016-03-02,7
957,Los Angeles,2019-02-07,7
1669,Oxnard,2016-06-24,6
...,...,...,...
2537,Whittier,2020-09-03,1
2538,Whittier,2021-09-24,1
2539,Whittier,2021-10-20,1
2540,Whittier,2021-12-14,1


### Chart

In [30]:
_dla_utils.basic_line_chart((df_top>>filter(_.variable=='prepared_y')), 'value', 'count', subset, 'Obligations by Year')

## Agencies with the most unique project numbers

In [31]:
df>>group_by(_.primary_agency_name, _.prefix)>>count(_.project_no)>>arrange(-_.n)>>filter(_.n>5)

Unnamed: 0,primary_agency_name,prefix,project_no,n
377,Los Angeles,BRLSZD,5006(839),9
849,Los Angeles County Metropolitan Transportation...,FTAATPL,6065(225),9
857,Los Angeles County Metropolitan Transportation...,FTACML,6065(199),9
373,Los Angeles,BRLSZD,5006(664),8
2,Access Services,FTASTPL,6312(022),7
62,Burbank,CML,5200(046),6
114,Carson,HSIPL,5403(022),6
146,Culver City,CML,5240(025),6
255,Lancaster,ATPL,5419(049),6
294,Long Beach,CML,5108(181),6


## Average Funds

In [32]:
_dla_utils.calculate_data_all(df, 'total_requested', 'primary_agency_name', aggfunc="mean")

Unnamed: 0,primary_agency_name,total_requested
0,Access Services,35232367.61
51,Los Angeles County Metropolitan Transportation...,18368332.19
15,Caltrans,15656801.78
95,Southern California Regional Rail Authority,7468825.00
47,Long Beach,4726597.42
...,...,...
35,Hermosa Beach,33179.73
13,Calabasas,7658.33
70,Port Hueneme,6841.71
106,Westlake Village,6042.51


### Top 50 Agencies with highest funds

In [33]:
((_dla_utils.calculate_data_all(df, 'total_requested', 'primary_agency_name', aggfunc="mean"))>>arrange(-_.total_requested)).head(50)

Unnamed: 0,primary_agency_name,total_requested
0,Access Services,35232367.61
51,Los Angeles County Metropolitan Transportation...,18368332.19
15,Caltrans,15656801.78
95,Southern California Regional Rail Authority,7468825.0
47,Long Beach,4726597.42
96,Sunline Transit Agency,4622594.07
80,San Gabriel Valley Council of Governments,2697375.33
4,Antelope Valley Transit Authority,2475103.0
90,South Coast Area Transit,2109462.57
48,Long Beach Transportation Company,1764100.5


In [36]:
_dla_utils.basic_bar_chart((((_dla_utils.calculate_data_all(df, 'total_requested', 'primary_agency_name', aggfunc="mean"))
                          >>arrange(-_.total_requested)).head(30)
                        ), 'primary_agency_name','total_requested', 'primary_agency_name', subset,
                           'Average Total Requested Funds by Agency'
                       )

### Bottom 50 Agencies with lowest funds

In [37]:
avg_funds_bottom = (df>>group_by(_.primary_agency_name)>>summarize(avg_funds=_.total_requested.mean())>>arrange(-_.avg_funds)).tail(50)



In [38]:
avg_funds_bottom

Unnamed: 0,primary_agency_name,avg_funds
43,La Verne,242217.88
8,Baldwin Park,242026.12
101,Ventura County,219162.13
16,Camarillo,212175.88
25,Diamond Bar,210080.8
42,La Puente,206681.29
66,Paramount,203748.56
18,Cerritos,183759.84
75,Rosemead,181069.7
86,Sierra Madre,176777.5


In [40]:
_dla_utils.basic_bar_chart(avg_funds_bottom, 'primary_agency_name','avg_funds', 'primary_agency_name', subset, 
                          'Lowest Average Total Funds by Agency')

In [41]:
((
    _dla_utils.calculate_data_all(
        df, 'total_requested', 'primary_agency_name', aggfunc="mean"))
>>arrange(-_.total_requested)).tail(50)

Unnamed: 0,primary_agency_name,total_requested
73,Redondo Beach,362835.53
68,Pico Rivera,348192.55
20,Commerce,332105.41
78,San Fernando,331761.47
31,Glendale,325208.9
76,San Buenaventura,292997.63
88,Simi Valley,281380.67
44,Lakewood,279118.16
11,Bellflower,275618.66
59,Monterey Park,266915.49


In [43]:
_dla_utils.basic_bar_chart((((_dla_utils.calculate_data_all(df, 'total_requested', 'primary_agency_name', aggfunc="mean"))
                          >>arrange(-_.total_requested)).tail(50)
                        ), 'primary_agency_name','total_requested', 'total_requested', subset
                       )

In [44]:
#intersting that these are very different charts.

## Functions to look by Prefix and by Agency: 

### By Agency:

In [45]:
_dla_utils.interactive_widget(df, 'primary_agency_name')

Dropdown(description='Primary_Agency_Name', options=('Access Services', 'Agoura Hills', 'Alameda Corridor Tran…

Output()

### By Prefix:

In [46]:
_dla_utils.interactive_widget(df, 'prefix')

Dropdown(description='Prefix', options=('ACNHPI', 'ACSTER', 'ACSTP', 'ATCMTD', 'ATPCML', 'ATPL', 'ATPLNI', 'AT…

Output()