# Obligations in District 7

Using data from the Division of Local Assistance [Obligation list](https://dot.ca.gov/programs/local-assistance/reports/e-76-obligated)

In [2]:
import numpy as np
import pandas as pd
from siuba import *

import altair as alt
import altair_saver
from plotnine import *

from shared_utils import altair_utils
alt.themes.enable("fivethirtyeight")


import clean_data
import _dla_utils

import ipywidgets as widgets
from ipywidgets import *
from IPython.display import Markdown
from IPython.core.display import display

In [3]:
#df = _clean_data.make_clean_data()
df= pd.read_parquet("dla_df.parquet")

In [4]:
df.sample()

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,...,ftip_no,project_location,type_of_work,seq,date_request_initiated,date_completed_request,mpo,projectID,prepared_y,primary_agency_name
12160,Obligated,SRTSLNI,5193(034),Taft,2013-12-31,2014-03-25,2014-03-25,2014-04-15,2014-04-15,409650.0,...,KER110602,Various Locations (site Plan Attached),"Install Ramps, Speed Signs, In-pavement Lighting",2,NaT,NaT,KCOG,5193,2013.0,Taft


In [9]:
df = df>>filter(_.dist==7)

In [14]:
df_years = _dla_utils.count_all_years(df)

In [15]:
df_years

Unnamed: 0,prepared_y,dist,ac_requested_sum,fed_requested_sum,total_requested_sum,ac_requested_mean,fed_requested_mean,total_requested_mean,unique_mpo,unique_prefix,unique_primary_agency_name,unique_project_location,unique_project_no,unique_type_of_work
10,2021.0,7,-19826820.0,444283900.0,633692600.0,-82268.967718,1843502.0,2629430.0,2.0,41.0,62.0,203.0,210.0,196.0
9,2020.0,7,-27852110.0,418932800.0,958805200.0,-87037.856375,1309165.0,2996266.0,1.0,44.0,60.0,277.0,283.0,268.0
8,2019.0,7,-12994940.0,363282700.0,398693500.0,-37449.397695,1046924.0,1148973.0,2.0,37.0,67.0,293.0,304.0,285.0
0,2018.0,7,-59154980.0,333529800.0,334612400.0,-176056.483304,992648.3,995870.1,1.0,39.0,63.0,280.0,294.0,267.0
1,2017.0,7,-60396680.0,311587700.0,299707100.0,-184136.219878,949962.6,913741.1,2.0,39.0,73.0,282.0,296.0,266.0
2,2016.0,7,-93842130.0,430301300.0,490366200.0,-248918.123767,1144418.0,1300706.0,2.0,43.0,76.0,308.0,323.0,304.0
3,2015.0,7,134831500.0,259045000.0,585936400.0,364409.335135,700121.5,1583612.0,8.0,41.0,85.0,299.0,312.0,269.0
5,2014.0,7,234962200.0,291500800.0,585146100.0,546423.818605,677908.9,1360805.0,10.0,42.0,77.0,362.0,375.0,323.0
6,2013.0,7,28551370.0,14647600.0,-23007550.0,892230.4375,457737.4,-718986.1,4.0,20.0,18.0,30.0,32.0,31.0
7,2012.0,7,0.0,-109457.0,-123505.0,0.0,-109457.0,-123505.0,1.0,1.0,1.0,1.0,1.0,1.0


In [18]:
df_top = _dla_utils.find_top(df)

In [19]:
df_top

Unnamed: 0,value,count,variable
0,HSIPL,705,prefix
1,STPL,441,prefix
2,CML,275,prefix
3,ER,175,prefix
4,HPLUL,143,prefix
...,...,...,...
16,Culver City,32,primary_agency_name
17,Burbank,30,primary_agency_name
18,Glendale,30,primary_agency_name
19,Ventura County Transportation Commission,29,primary_agency_name


## Which Agencies have Obligations

### How many Unique Agencies

In [12]:
_dla_utils.calculate_data_all(df, 'primary_agency_name', aggfunc="nunique")

Unnamed: 0,dist,primary_agency_name
0,7,108


### Agencies with the most Obligations

In [20]:
df_top>>filter(_.variable == 'primary_agency_name')

Unnamed: 0,value,count,variable
0,Los Angeles,490,primary_agency_name
1,Los Angeles County,451,primary_agency_name
2,Ventura County,115,primary_agency_name
3,Los Angeles County Metropolitan Transportation...,85,primary_agency_name
4,Santa Clarita,85,primary_agency_name
5,Long Beach,81,primary_agency_name
6,Oxnard,74,primary_agency_name
7,Pasadena,71,primary_agency_name
8,Downey,63,primary_agency_name
9,Simi Valley,61,primary_agency_name


### Agencies with the least Obligations

In [21]:
(df>>count(_.primary_agency_name)>>arrange(-_.n)).tail(10)

Unnamed: 0,primary_agency_name,n
71,Port Of Long Beach,2
86,Sierra Madre,2
2,Alameda Corridor Transportation Authority,1
4,Antelope Valley Transit Authority,1
14,Calaveras County,1
35,Hermosa Beach,1
36,Huntington Park,1
70,Port Hueneme,1
74,Rolling Hills Estates,1
100,"U.S. Forest Service, Pacific Southwest Region",1


### Charting

In [25]:
_dla_utils.basic_bar_chart((df_top>>filter(_.variable=='primary_agency_name')), 'value', 'count')

In [30]:
df_top>>filter(_.variable=='prefix')

Unnamed: 0,value,count,variable
0,HSIPL,705,prefix
1,STPL,441,prefix
2,CML,275,prefix
3,ER,175,prefix
4,HPLUL,143,prefix
5,ATPL,135,prefix
6,BHLS,101,prefix
7,FTACML,94,prefix
8,SRTSL,72,prefix
9,RPSTPL,63,prefix


In [27]:
df>>group_by(_.primary_agency_name)>>count(_.prefix)>>arrange(-_.n)

Unnamed: 0,primary_agency_name,prefix,n
219,Los Angeles County,ER,150
199,Los Angeles,STPL,84
225,Los Angeles County,HSIPL,81
193,Los Angeles,HSIPL,71
183,Los Angeles,CML,63
...,...,...,...
468,Vernon,HSIP,1
469,Vernon,HSIPL,1
471,West Covina,HPLUL,1
475,Westlake Village,HSIPL,1


## Number of Unique Prefix Codes

In [32]:
_dla_utils.get_nunique(df, 'prefix', 'primary_agency_name')

Unnamed: 0,primary_agency_name,n
49,Los Angeles,34
50,Los Angeles County,31
47,Long Beach,18
51,Los Angeles County Metropolitan Transportation...,18
63,Oxnard,14
...,...,...
90,South Coast Area Transit,1
91,South El Monte,1
95,Southern California Regional Rail Authority,1
100,"U.S. Forest Service, Pacific Southwest Region",1


In [35]:
_dla_utils.basic_bar_chart(((_dla_utils.get_nunique(df, 'prefix', 'primary_agency_name')).head(30)),
                            'primary_agency_name', 'n')

## Most Common Types of Work by Agnecy

In [38]:
_dla_utils.get_nunique(df, 'primary_agency_name', 'type_of_work')

Unnamed: 0,type_of_work,n
342,FTA Transfer,16
920,Road Rehabilitation (tc),7
306,Emergency Opening,6
91,Bridge Rehabilitation,5
1161,Upgrade Signals,5
...,...,...
1225,Widening Wilmington Ave + I-405 Ramp Modification,1
1226,"Widening, Right Turn Lanes, La",1
1227,"Widening,install Sidewalks, Traffic Improvements,",1
1228,"Widenning Roadway(install Right Turn Lanes, Etc)",1


In [44]:
_dla_utils.calculate_data_head(df, "primary_agency_name",'type_of_work', aggfunc="count")

Unnamed: 0,type_of_work,primary_agency_name
342,FTA Transfer,130
306,Emergency Opening,34
920,Road Rehabilitation (tc),20
103,Bridge Replacement,19
91,Bridge Rehabilitation,18
1096,Traffic Signal Improvements,13
727,Pedestrian Improvements,13
104,Bridge Replacement (tc),12
1161,Upgrade Signals,12
59,Bikeway Access Improvements,11


In [45]:
la_df>>group_by(_.primary_agency_name)>>count(_.type_of_work)>>arrange(-_.n)>>filter(_.n>=8)

Unnamed: 0,primary_agency_name,type_of_work,n
801,Los Angeles County Metropolitan Transportation...,FTA Transfer,58
655,Los Angeles County,Emergency Opening,25
0,Access Services,FTA Transfer,17
1270,Ventura County Transportation Commission,FTA Transfer,15
352,Los Angeles,Bridge Rehabilitation,11
603,Los Angeles County,Bikeway Access Improvements,11
496,Los Angeles,Pedestrian Improvements,10
179,Downey,Upgrade Traffic Signals; Install Left-turn Pha...,9
263,Lancaster,Construct Roundabout Within Existing R/w,9
354,Los Angeles,Bridge Rehabilitation/widening,9


## Most Common Project Locations

In [47]:
df_top>>filter(_.variable=='project_location')

Unnamed: 0,value,count,variable
0,"Sixth Street Viaduct Over La River, Us 101, An...",13,project_location
1,Within The County Of Ventura,9,project_location
2,Citywide,8,project_location
3,Sixth Street Viaduct Over La River And East Sa...,8,project_location
4,Down Town Area Of City Of Culver City,7,project_location
5,Various Locations In Los Angeles County,7,project_location
6,Amar Road Street Improvements: Officer Chiles ...,6,project_location
7,Community Of Florence-firestone In Los Angeles...,6,project_location
8,Intersection Of Avalon Blvd And Carson St. In ...,6,project_location
9,Pasadena Ave./sr 210 Wb On-ramp At Walnut St.,6,project_location


## Number of Obligations by Year

In [49]:
df_top>>filter(_.variable=='prepared_y')

Unnamed: 0,value,count,variable
0,2014.0,430,prepared_y
1,2016.0,377,prepared_y
2,2015.0,370,prepared_y
3,2019.0,347,prepared_y
4,2018.0,336,prepared_y
5,2017.0,328,prepared_y
6,2020.0,320,prepared_y
7,2021.0,241,prepared_y
8,2013.0,32,prepared_y
9,2012.0,1,prepared_y


In [50]:
la_df>>group_by(_.primary_agency_name)>>count(_.prepared_date)>>arrange(-_.n)

Unnamed: 0,primary_agency_name,prepared_date,n
1419,Los Angeles County Metropolitan Transportation...,2015-07-15,9
1441,Los Angeles County Metropolitan Transportation...,2018-05-31,9
2,Access Services,2016-03-02,7
926,Los Angeles,2019-02-07,7
1594,Oxnard,2016-06-24,6
...,...,...,...
2432,Whittier,2020-05-21,1
2433,Whittier,2020-08-23,1
2434,Whittier,2020-09-03,1
2435,Whittier,2021-09-24,1


### Chart

In [51]:
_dla_utils.basic_line_chart((df_top>>filter(_.variable=='prepared_y')), 'value', 'count')

## Agencies with the most unique project numbers

In [25]:
la_df>>group_by(_.primary_agency_name, _.prefix)>>count(_.project_no)>>arrange(-_.n)>>filter(_.n>5)

Unnamed: 0,primary_agency_name,prefix,project_no,n
368,Los Angeles,BRLSZD,5006(839),9
829,Los Angeles County Metropolitan Transportation...,FTAATPL,6065(225),9
837,Los Angeles County Metropolitan Transportation...,FTACML,6065(199),9
364,Los Angeles,BRLSZD,5006(664),8
2,Access Services,FTASTPL,6312(022),7
142,Culver City,CML,5240(025),7
110,Carson,HSIPL,5403(022),6
286,Long Beach,CML,5108(181),6
365,Los Angeles,BRLSZD,5006(811),6
982,Pasadena,HSIPL,5064(075),6


## Average Funds

In [59]:
_dla_utils.calculate_data_all(df, 'total_requested', 'primary_agency_name', aggfunc="mean")

Unnamed: 0,primary_agency_name,total_requested
0,Access Services,3.301074e+07
51,Los Angeles County Metropolitan Transportation...,1.696266e+07
15,Caltrans,1.552537e+07
95,Southern California Regional Rail Authority,8.633433e+06
37,Industry,6.324183e+06
...,...,...
13,Calabasas,7.658333e+03
70,Port Hueneme,6.841710e+03
106,Westlake Village,6.042513e+03
60,Moorpark,5.751582e+03


### Top 50 Agencies with highest funds

In [62]:
((_dla_utils.calculate_data_all(df, 'total_requested', 'primary_agency_name', aggfunc="mean"))>>arrange(-_.total_requested)).head(50)

Unnamed: 0,primary_agency_name,total_requested
0,Access Services,33010740.0
51,Los Angeles County Metropolitan Transportation...,16962660.0
15,Caltrans,15525370.0
95,Southern California Regional Rail Authority,8633433.0
37,Industry,6324183.0
47,Long Beach,4771285.0
96,Sunline Transit Agency,4622594.0
80,San Gabriel Valley Council of Governments,2697375.0
4,Antelope Valley Transit Authority,2475103.0
90,South Coast Area Transit,2109463.0


In [70]:
_dla_utils.basic_bar_chart((((_dla_utils.calculate_data_all(df, 'total_requested', 'primary_agency_name', aggfunc="mean"))
                          >>arrange(-_.total_requested)).head(50)
                        ), 'primary_agency_name','total_requested'
                       )

### Bottom 50 Agencies with lowest funds

In [85]:
avg_funds_bottom = (df>>group_by(_.primary_agency_name)>>summarize(avg_funds=_.total_requested.mean())>>arrange(-_.avg_funds)).tail(50)



In [86]:
avg_funds_bottom

Unnamed: 0,primary_agency_name,avg_funds
43,La Verne,267562.6
59,Monterey Park,266915.5
65,Palos Verdes Est,251243.9
39,Irwindale,237291.3
101,Ventura County,216950.0
25,Diamond Bar,210080.8
42,La Puente,206681.3
18,Cerritos,204092.0
66,Paramount,203748.6
58,Montebello,186486.9


In [87]:
_dla_utils.basic_bar_chart(avg_funds_bottom, 'primary_agency_name','avg_funds')

In [88]:
((
    _dla_utils.calculate_data_all(
        df, 'total_requested', 'primary_agency_name', aggfunc="mean"))
>>arrange(-_.total_requested)).tail(50)

Unnamed: 0,primary_agency_name,total_requested
22,Covina,396412.2
78,San Fernando,392081.732727
40,La Canada Flintridge,376627.693
68,Pico Rivera,372655.048
73,Redondo Beach,362835.53
8,Baldwin Park,315742.857143
76,San Buenaventura,292454.708286
12,Burbank,281775.839667
88,Simi Valley,281380.672951
44,Lakewood,279118.16


In [72]:
_dla_utils.basic_bar_chart((((_dla_utils.calculate_data_all(df, 'total_requested', 'primary_agency_name', aggfunc="mean"))
                          >>arrange(-_.total_requested)).tail(50)
                        ), 'primary_agency_name','total_requested'
                       )

In [77]:
#intersting that these are very different charts.

## Functions to look by Prefix and by Agency: 

### By Agency:

In [92]:
_dla_utils.interactive_widget(df, 'primary_agency_name')

Dropdown(description='Primary_Agency_Name', options=('Access Services', 'Agoura Hills', 'Alameda Corridor Tran…

Output()

### By Prefix:

In [93]:
_dla_utils.interactive_widget(df, 'prefix')

Dropdown(description='Prefix', options=('ACNHPI', 'ACSTER', 'ACSTP', 'ATCMTD', 'ATPCML', 'ATPL', 'ATPLNI', 'AT…

Output()