# Obligations in District 7

Using data from the Division of Local Assistance [Obligation list](https://dot.ca.gov/programs/local-assistance/reports/e-76-obligated)

In [1]:
import numpy as np
import pandas as pd
from siuba import *
import _clean_data
import altair as alt
import altair_saver
from calitp import *
from plotnine import *

from shared_utils import altair_utils
alt.themes.enable("fivethirtyeight")


import ipywidgets as widgets
from ipywidgets import *
from IPython.display import Markdown
from IPython.core.display import display



In [5]:
import _dla_utils

In [2]:
# df = _clean_data.read_data()
# df = _clean_data.clean_data(df)
# df = _clean_data.prefix_cleaning(df)
# df = _clean_data.clean_agency_names(df)

In [3]:
df= pd.read_parquet("dla_df.parquet")

In [4]:
df.sample()

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,...,ftip_no,project_location,type_of_work,seq,date_request_initiated,date_completed_request,mpo,projectID,prepared_y,primary_agency_name
14811,Obligated,RPSTCML,6088(068),San Joaquin Council Of Governments,2019-09-04,2019-09-04,2019-09-04,2019-09-04,2019-09-06,6539650.0,...,SJ07-1003,"Near Tracy, I-205 From The Alameda County Line...",I-205 Managed Lanes. Widen From 6 To 8 Lanes W...,1,2019-06-26,2019-06-27,SJCG,6088,2019.0,San Joaquin Council of Governments


In [6]:
df1 = _dla_utils.count_all_years((df>>filter(_.dist==7)))

In [8]:
df1

Unnamed: 0,prepared_y,dist,ac_requested_sum,fed_requested_sum,total_requested_sum,ac_requested_mean,fed_requested_mean,total_requested_mean,unique_mpo,unique_prefix,unique_primary_agency_name,unique_project_location,unique_project_no,unique_type_of_work
10,2021.0,7,-19826820.0,444283900.0,633692600.0,-82268.967718,1843502.0,2629430.0,2.0,41.0,62.0,203.0,210.0,196.0
9,2020.0,7,-27852110.0,418932800.0,958805200.0,-87037.856375,1309165.0,2996266.0,1.0,44.0,60.0,277.0,283.0,268.0
8,2019.0,7,-12994940.0,363282700.0,398693500.0,-37449.397695,1046924.0,1148973.0,2.0,37.0,67.0,293.0,304.0,285.0
0,2018.0,7,-59154980.0,333529800.0,334612400.0,-176056.483304,992648.3,995870.1,1.0,39.0,63.0,280.0,294.0,267.0
1,2017.0,7,-60396680.0,311587700.0,299707100.0,-184136.219878,949962.6,913741.1,2.0,39.0,73.0,282.0,296.0,266.0
2,2016.0,7,-93842130.0,430301300.0,490366200.0,-248918.123767,1144418.0,1300706.0,2.0,43.0,76.0,308.0,323.0,304.0
3,2015.0,7,134831500.0,259045000.0,585936400.0,364409.335135,700121.5,1583612.0,8.0,41.0,85.0,299.0,312.0,269.0
5,2014.0,7,234962200.0,291500800.0,585146100.0,546423.818605,677908.9,1360805.0,10.0,42.0,77.0,362.0,375.0,323.0
6,2013.0,7,28551370.0,14647600.0,-23007550.0,892230.4375,457737.4,-718986.1,4.0,20.0,18.0,30.0,32.0,31.0
7,2012.0,7,0.0,-109457.0,-123505.0,0.0,-109457.0,-123505.0,1.0,1.0,1.0,1.0,1.0,1.0


In [14]:
df= (df>>filter(_.dist==7))

## Adding Catalog Datasets

In [21]:
import intake
import geopandas as gpd

In [22]:
def read_catalog(df):    
    catalog = intake.open_catalog("catalog.yml")

    city_bound = catalog.ca_open_data.city_boundary.read()
    county_bound = catalog.ca_open_data.county_boundary.read()
 
    district_bound= catalog.district_bound.read()
    rtpa_bound= catalog.rtpa_bound.read()
    locode_df = pd.concat(pd.read_excel('gs://calitp-analytics-data/data-analyses/dla/e-76Obligated/locodes_updated7122021.xlsx', sheet_name=None), ignore_index=True)
    locode_df = to_snakecase(locode_df)

    #renaming
    county_bound['name'] =  county_bound['name'] + ' County'
    county_bound.rename(columns={'name': 'county_name', 'geometry': 'geometry2'}, inplace=True)

    # deleting Calaveras County because the location of the project is not in district 7
    delete_row = df[df["primary_agency_name"]== 'Calaveras County'].index
    df = df.drop(delete_row)

    new_df1 = pd.merge(df, locode_df,  how='left', left_on=['primary_agency_name'], right_on = ['agency_name'])
    new_df2 = pd.merge(new_df1, city_bound,  how='left', left_on=['primary_agency_name'], right_on = ['NAME'])
    new_df3 = left_join(new_df2, county_bound, on = "county_name")

    return new_df3



In [23]:
df_test = read_catalog(df)



## How Many Agencies are Cities?

In [24]:
df_test.NAME.notnull().value_counts()

True     1971
False    1031
Name: NAME, dtype: int64

### number of unique agency names that are **cities**

In [25]:
len(df_test>>filter(_.NAME.notnull())>>count(_.primary_agency_name))

87

### number of unique agency names that are **not** cities

In [26]:
len(df_test>>filter(_.NAME.isnull())>>count(_.primary_agency_name))

20

In [27]:
df_test>>filter(_.NAME.isnull())>>count(_.primary_agency_name)

Unnamed: 0,primary_agency_name,n
0,Access Services,17
1,Alameda Corridor Transportation Authority,1
2,Antelope Valley Transit Authority,1
3,Caltrans,234
4,Long Beach Transportation Company,2
5,Los Angeles County,451
6,Los Angeles County Metropolitan Transportation...,85
7,Los Angeles Unified School District,2
8,Palos Verdes Est,2
9,Port Of Long Beach,2


### Cities with the most obligations

In [28]:
topten_city =(df_test>>filter(_.NAME.notnull())>>count(_.primary_agency_name)>>arrange(-_.n)).head(10)

In [29]:
topten_city.rename(columns={'primary_agency_name': 'Primary Agency Name', 'n': 'Count'}, inplace=True)

In [30]:
topten_city.style.set_caption('District 7 Cities with the Most Oblgiations')

Unnamed: 0,Primary Agency Name,Count
43,Los Angeles,490
69,Santa Clarita,85
42,Long Beach,81
54,Oxnard,74
57,Pasadena,71
21,Downey,63
75,Simi Valley,61
40,Lancaster,57
55,Palmdale,46
58,Pico Rivera,40


In [31]:
test= (df_test
       >>group_by(_.primary_agency_name)
       >>summarize(Total_Funds = _.total_requested.sum())
       >>arrange(-_.Total_Funds))

In [32]:
test.rename(columns={'primary_agency_name': 'Primary Agency Name', 'Total_Funds': 'Total Funds'}, inplace=True)

In [33]:
test.style

Unnamed: 0,Primary Agency Name,Total Funds
14,Caltrans,3632937185.67
50,Los Angeles County Metropolitan Transportation Authority,1441826242.94
48,Los Angeles,777759360.65
0,Access Services,561182617.0
46,Long Beach,386474097.64
49,Los Angeles County,192452850.83
79,San Gabriel Valley Council of Governments,48552755.85
83,Santa Monica,45649754.7
66,Pasadena,41592589.15
81,Santa Clarita,40362443.14


In [53]:
#test['Total Funds'] = test['Total Funds'].map('${:,.2f}'.format)

In [34]:
test.style.format(precision=2, na_rep='MISSING', thousands=",")

Unnamed: 0,Primary Agency Name,Total Funds
14,Caltrans,3632937185.67
50,Los Angeles County Metropolitan Transportation Authority,1441826242.94
48,Los Angeles,777759360.65
0,Access Services,561182617.0
46,Long Beach,386474097.64
49,Los Angeles County,192452850.83
79,San Gabriel Valley Council of Governments,48552755.85
83,Santa Monica,45649754.7
66,Pasadena,41592589.15
81,Santa Clarita,40362443.14


In [35]:
test.style.set_caption('District 7 Agencies with the most Obligation Amounts')

Unnamed: 0,Primary Agency Name,Total Funds
14,Caltrans,3632937185.67
50,Los Angeles County Metropolitan Transportation Authority,1441826242.94
48,Los Angeles,777759360.65
0,Access Services,561182617.0
46,Long Beach,386474097.64
49,Los Angeles County,192452850.83
79,San Gabriel Valley Council of Governments,48552755.85
83,Santa Monica,45649754.7
66,Pasadena,41592589.15
81,Santa Clarita,40362443.14


In [36]:
chart = (alt.Chart(test.head(20))
         .mark_bar()
         .encode(
             x=alt.X("Primary Agency Name", title="Agency Name"),
             y=alt.Y("Total Funds", title="Total Funds Obligated"),
             #column = "payment:N",
             color = alt.Color("Primary Agency Name", 
                              scale=alt.Scale(
                                  range=altair_utils.FIVETHIRTYEIGHT_DIVERGING_COLORS
                              )
                              )
         )
        .properties( 
                       title="District 7 Agencies with the Most Total Funds Obligated")

)


#chart.save("dist7chart.png")
chart

In [57]:
def calculate_sum(df, col, aggfunc="sum"):
    df = (df.groupby("primary_agency_name")
          .agg({col: aggfunc})
          .reset_index()
          .sort_values(col, ascending=False)
         )
    
    return df

shared_utils.geography_utils.aggregate_by_geography(df_test, 
                                                    group_cols=["primary_agency_name"], 
                                                    sum_cols=["fed_requested", "some_other"], nunique_cols=[])

df2 = calculate_sum(df_test, "fed_requested", aggfunc="sum")

test2= (df_test
       >>group_by(_.primary_agency_name)
       >>summarize(Total_fed_Funds = _.fed_requested.sum())
       >>arrange(-_.Total_fed_Funds))
test2.rename(columns={'primary_agency_name': 'Primary Agency Name', 'Total_fed_Funds': 'Total Federal Funds'}, inplace=True)

In [58]:
test2.style.format(precision=2, na_rep='MISSING', thousands=",")

Unnamed: 0,Primary Agency Name,Total Federal Funds
14,Caltrans,1036315502.69
50,Los Angeles County Metropolitan Transportation Authority,945744425.18
0,Access Services,561182617.0
48,Los Angeles,540610799.85
49,Los Angeles County,126796237.25
46,Long Beach,71900830.44
101,Ventura County Transportation Commission,35855396.56
95,Sunline Transit Agency,32151844.67
83,Santa Monica,29458968.28
81,Santa Clarita,29207795.98


In [61]:
test3= (df_test
       >>group_by(_.primary_agency_name)
       >>summarize(Total_ac_Funds = _.ac_requested.sum())
       >>arrange(-_.Total_ac_Funds))
test3.rename(columns={'primary_agency_name': 'Primary Agency Name', 'Total_ac_Funds': 'Total Advance Construction Funds'}, inplace=True)

In [62]:
test3.style.format(precision=2, na_rep='MISSING', thousands=",")

Unnamed: 0,Primary Agency Name,Total Advance Construction Funds
14,Caltrans,939055000.0
48,Los Angeles,54175401.08
49,Los Angeles County,20415214.27
102,Vernon,3508709.0
0,Access Services,0.0
1,Agoura Hills,0.0
2,Alameda Corridor Transportation Authority,0.0
3,Alhambra,0.0
4,Antelope Valley Transit Authority,0.0
5,Arcadia,0.0


In [63]:
len(test2)

107

In [64]:
all_sum = full_join(test, test2, on = "Primary Agency Name")

In [65]:
all_sum2 = full_join(all_sum, test3, on = "Primary Agency Name")

In [66]:
all_sum3 = full_join(all_sum, test3, on = "Primary Agency Name")

In [67]:
all_sum3['Totals']= all_sum2['Total Funds']+all_sum2['Total Federal Funds'] +all_sum2['Total Advance Construction Funds']

In [68]:
all_sum3 = all_sum3>>arrange(-_.Totals)

In [69]:
all_sum3.style.format(precision=2, na_rep='MISSING', thousands=",")

Unnamed: 0,Primary Agency Name,Total Funds,Total Federal Funds,Total Advance Construction Funds,Totals
0,Caltrans,3632937185.67,1036315502.69,939055000.0,5608307688.36
1,Los Angeles County Metropolitan Transportation Authority,1441826242.94,945744425.18,0.0,2387570668.12
2,Los Angeles,777759360.65,540610799.85,54175401.08,1372545561.58
3,Access Services,561182617.0,561182617.0,0.0,1122365234.0
4,Long Beach,386474097.64,71900830.44,0.0,458374928.08
5,Los Angeles County,192452850.83,126796237.25,20415214.27,339664302.35
7,Santa Monica,45649754.7,29458968.28,-1543000.0,73565722.98
8,Pasadena,41592589.15,28583452.96,0.0,70176042.11
9,Santa Clarita,40362443.14,29207795.98,0.0,69570239.12
11,Sunline Transit Agency,32358158.46,32151844.67,0.0,64510003.13


In [70]:
all_sum2.style.format(precision=2, na_rep='MISSING', thousands=",")

Unnamed: 0,Primary Agency Name,Total Funds,Total Federal Funds,Total Advance Construction Funds
0,Caltrans,3632937185.67,1036315502.69,939055000.0
1,Los Angeles County Metropolitan Transportation Authority,1441826242.94,945744425.18,0.0
2,Los Angeles,777759360.65,540610799.85,54175401.08
3,Access Services,561182617.0,561182617.0,0.0
4,Long Beach,386474097.64,71900830.44,0.0
5,Los Angeles County,192452850.83,126796237.25,20415214.27
6,San Gabriel Valley Council of Governments,48552755.85,6482174.74,-315303.0
7,Santa Monica,45649754.7,29458968.28,-1543000.0
8,Pasadena,41592589.15,28583452.96,0.0
9,Santa Clarita,40362443.14,29207795.98,0.0


In [72]:
chart = (alt.Chart(all_sum3.head(20))
         .mark_bar()
         .encode(
             x=alt.X("Primary Agency Name", title="Agency Name"),
             y=alt.Y("Totals", title="Sum of Funds Obligated"),
             #column = "payment:N",
             color = alt.Color("Primary Agency Name", 
                              scale=alt.Scale(
                                  range=altair_utils.FIVETHIRTYEIGHT_DIVERGING_COLORS
                              )
                              )
         )
        .properties( 
                       title="District 7 Agencies with the Highest Sum of Obligated Funds")

)


#chart.save("dist7chart.png")
chart

### Non-Cities with the most obligations

In [73]:
topten_noncity =(df_test
 >>filter(_.NAME.isnull())
 >>count(_.primary_agency_name)
 >>arrange(-_.n)).head(10)

In [74]:
topten_noncity.rename(columns={'primary_agency_name': 'Primary Agency Name', 'n': 'Count'}, inplace=True)

In [75]:
topten_noncity.style.set_caption('District 7 Agencies with the Most Oblgiations (non-city)')

Unnamed: 0,Primary Agency Name,Count
5,Los Angeles County,451
3,Caltrans,234
18,Ventura County,115
6,Los Angeles County Metropolitan Transportation Authority,85
10,San Buenaventura,35
19,Ventura County Transportation Commission,29
11,San Gabriel Valley Council of Governments,18
0,Access Services,17
14,Southern California Association of Governments,13
13,South Coast Area Transit,7


## OLD Which Agencies have Obligations

### How many Unique Agencies

In [100]:
len(la_df>>count(_.primary_agency_name))

107

### Agencies with the most Obligations

In [101]:
(la_df>>count(_.primary_agency_name)>>arrange(-_.n)).head(10)

Unnamed: 0,primary_agency_name,n
48,Los Angeles,490
49,Los Angeles County,451
100,Ventura County,115
50,Los Angeles County Metropolitan Transportation...,85
81,Santa Clarita,85
46,Long Beach,81
62,Oxnard,74
66,Pasadena,71
25,Downey,63
87,Simi Valley,61


### Agencies with the least Obligations

In [102]:
(la_df>>count(_.primary_agency_name)>>arrange(-_.n)).tail(10)

Unnamed: 0,primary_agency_name,n
64,Palos Verdes Est,2
70,Port Of Long Beach,2
85,Sierra Madre,2
2,Alameda Corridor Transportation Authority,1
4,Antelope Valley Transit Authority,1
34,Hermosa Beach,1
35,Huntington Park,1
69,Port Hueneme,1
73,Rolling Hills Estates,1
99,"U.S. Forest Service, Pacific Southwest Region",1


### Charting

In [103]:
la_df_count = (la_df>>count(_.primary_agency_name)>>arrange(-_.n))

In [104]:
la_df_count.sample()

Unnamed: 0,primary_agency_name,n
45,Lawndale,3


In [105]:
chart = (alt.Chart(la_df_count)
         .mark_bar()
         .encode(
             x=alt.X("primary_agency_name", title="Agency Name"),
             y=alt.Y("n", title="Number of Obligations"),
             #column = "payment:N",
             color = alt.Color("n", 
                              scale=alt.Scale(
                                  range=altair_utils.FIVETHIRTYEIGHT_DIVERGING_COLORS
                              )
                              )
         )
        .properties( 
                       title="Number of Unique Prefixes by Agency in District 7")

)


#chart.save("dist7chart.png")
chart

In [106]:
la_df.sample()

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,ac_requested,total_requested,status_comment,locode,dist,status,dist_processing_days,hq_processing_days,fhwa_processing_days,ftip_no,project_location,type_of_work,seq,date_request_initiated,date_completed_request,mpo,projectID,prepared_y,primary_agency_name
44,Obligated,HSIPL,5108(178),Long Beach,2018-12-11,2018-12-11,2018-12-13,2018-12-14,2018-12-19,166500.0,0.0,366500.0,Authorized,5108,7,E-76 approved on,0.0,3.0,5.0,SCAG015,Anaheim Street: La River -pacific Coast Hwy,"Install Controller Access Medians, Signal Upgr...",1,2018-12-11,2018-12-11,SCAG,5108,2018.0,Long Beach


In [107]:
(la_df>>count(_.prefix)>>arrange(-_.n)).head(20)

Unnamed: 0,prefix,n
65,HSIPL,705
87,STPL,441
22,CML,275
39,ER,175
60,HPLUL,143
5,ATPL,135
9,BHLS,101
48,FTACML,94
81,SRTSL,72
77,RPSTPL,63


In [114]:
la_df>>group_by(_.primary_agency_name)>>count(_.prefix)>>arrange(-_.n)

Unnamed: 0,primary_agency_name,prefix,n
218,Los Angeles County,ER,150
198,Los Angeles,STPL,84
224,Los Angeles County,HSIPL,81
192,Los Angeles,HSIPL,71
182,Los Angeles,CML,63
...,...,...,...
467,Vernon,HSIP,1
468,Vernon,HSIPL,1
470,West Covina,HPLUL,1
474,Westlake Village,HSIPL,1


## Number of Unique Prefix Codes

In [109]:
la_df >> group_by(_.primary_agency_name) >> summarize(n=_.prefix.nunique()) >> arrange(-_.n)>>filter(_.n>=10)

Unnamed: 0,primary_agency_name,n
48,Los Angeles,34
49,Los Angeles County,31
46,Long Beach,18
50,Los Angeles County Metropolitan Transportation...,18
62,Oxnard,14
81,Santa Clarita,14
66,Pasadena,12
100,Ventura County,12


In [110]:
la_nunique= (la_df >> group_by(_.primary_agency_name) >> summarize(n=_.prefix.nunique()) >> arrange(-_.n)>>filter(_.n>=5))

In [112]:
chart = (alt.Chart(la_nunique)
         .mark_bar()
         .encode(
             x=alt.X("primary_agency_name", title="Agency Name"),
             y=alt.Y("n", title="Number of Unique Prefixes"),
             #column = "payment:N",
             color = alt.Color("n", 
                              scale=alt.Scale(
                                  range=altair_utils.FIVETHIRTYEIGHT_DIVERGING_COLORS
                              )
                              )
         )
         .properties( 
                      title="Number of Unique Prefixes by Agency in District 7")
)


#chart.save("dist7chart.png")
chart

## Most Common Types of Work by Agnecy

In [117]:
la_df>>group_by(_.prefix)>>count(_.type_of_work)>>arrange(-_.n)>>filter(_.n>=8)

Unnamed: 0,prefix,type_of_work,n
519,FTACML,FTA Transfer,93
435,ER,Emergency Opening,32
525,FTASTPL,FTA Transfer,17
1188,STPL,Road Rehabilitation (tc),17
144,BHLS,Bridge Rehabilitation,15
207,BRLS,Bridge Replacement,14
848,HSIPL,Traffic Signal Improvements,13
518,FTAATPL,FTA Transfer,12
899,HSIPL,Upgrade Signals,12
1187,STPL,Road Rehabilitation,10


In [119]:
la_df>>group_by(_.primary_agency_name)>>count(_.type_of_work)>>arrange(-_.n)>>filter(_.n>=8)

Unnamed: 0,primary_agency_name,type_of_work,n
800,Los Angeles County Metropolitan Transportation...,FTA Transfer,58
654,Los Angeles County,Emergency Opening,25
0,Access Services,FTA Transfer,17
1269,Ventura County Transportation Commission,FTA Transfer,15
351,Los Angeles,Bridge Rehabilitation,11
602,Los Angeles County,Bikeway Access Improvements,11
495,Los Angeles,Pedestrian Improvements,10
178,Downey,Upgrade Traffic Signals; Install Left-turn Pha...,9
262,Lancaster,Construct Roundabout Within Existing R/w,9
353,Los Angeles,Bridge Rehabilitation/widening,9


## Most Common Project Locations

In [120]:
la_df>>group_by(_.primary_agency_name)>>count(_.project_location)>>arrange(-_.n)>>filter(_.n>5)

Unnamed: 0,primary_agency_name,project_location,n
545,Los Angeles,"Sixth Street Viaduct Over La River, Us 101, An...",13
1271,Ventura County Transportation Commission,Within The County Of Ventura,9
544,Los Angeles,Sixth Street Viaduct Over La River And East Sa...,8
137,Culver City,Down Town Area Of City Of Culver City,7
798,Los Angeles County,Various Locations In Los Angeles County,7
105,Carson,Intersection Of Avalon Blvd And Carson St. In ...,6
659,Los Angeles County,Community Of Florence-firestone In Los Angeles...,6
978,Pasadena,Pasadena Ave./sr 210 Wb On-ramp At Walnut St.,6
1276,West Covina,Amar Road Street Improvements: Officer Chiles ...,6


In [121]:
la_df.sample()

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,ac_requested,total_requested,status_comment,locode,dist,status,dist_processing_days,hq_processing_days,fhwa_processing_days,ftip_no,project_location,type_of_work,seq,date_request_initiated,date_completed_request,mpo,projectID,prepared_y,primary_agency_name
2077,Obligated,CML,5006(873),Los Angeles,2018-03-07,2018-03-14,2018-03-22,2018-03-27,2018-04-02,337000.0,0.0,380662.0,Authorized,5006,7,E-76 approved on,27.0,13.0,6.0,LAF3647,Menlo Ave/mlk Vermont Expo Station,"Pedestrian Improvements: Install Sidewaks, Lan...",1,2018-02-15,2018-03-13,SCAG,5006,2018.0,Los Angeles


## Number of Obligations by Year

In [122]:
la_df>>group_by(_.primary_agency_name)>>count(_.prepared_date)>>arrange(-_.n)

Unnamed: 0,primary_agency_name,prepared_date,n
1418,Los Angeles County Metropolitan Transportation...,2015-07-15,9
1440,Los Angeles County Metropolitan Transportation...,2018-05-31,9
2,Access Services,2016-03-02,7
925,Los Angeles,2019-02-07,7
1593,Oxnard,2016-06-24,6
...,...,...,...
2431,Whittier,2020-05-21,1
2432,Whittier,2020-08-23,1
2433,Whittier,2020-09-03,1
2434,Whittier,2021-09-24,1


In [123]:
la_df>>group_by(_.primary_agency_name)>>count(_.prepared_y)>>arrange(-_.n)>>filter(_.n>10)

Unnamed: 0,primary_agency_name,prepared_y,n
252,Los Angeles,2014.0,86
266,Los Angeles County,2019.0,75
254,Los Angeles,2016.0,71
253,Los Angeles,2015.0,69
267,Los Angeles County,2020.0,69
261,Los Angeles County,2014.0,67
265,Los Angeles County,2018.0,64
258,Los Angeles,2020.0,59
264,Los Angeles County,2017.0,57
256,Los Angeles,2018.0,54


In [124]:
la_df_year = (la_df>>group_by(_.primary_agency_name)>>count(_.prepared_y)>>arrange(-_.n)>>filter(_.n>10))

### Chart

In [125]:
chart = (alt.Chart(la_df_year)
         .mark_bar()
         .encode(
             x=alt.X("prepared_y", title="Prepared Year"),
             y=alt.Y("n", title="Number of Obligations in each Year"),
             #column = "payment:N",
             color = alt.Color("primary_agency_name", 
                              scale=alt.Scale(
                                  range=altair_utils.FIVETHIRTYEIGHT_DIVERGING_COLORS
                              )
                              )
         )
         .properties( 
                      title="Number of Obliations per Year")
)


#chart.save("dist7chart.png")
chart

## Agencies with the most unique project numbers

In [126]:
la_df>>group_by(_.primary_agency_name, _.prefix)>>count(_.project_no)>>arrange(-_.n)>>filter(_.n>5)

Unnamed: 0,primary_agency_name,prefix,project_no,n
367,Los Angeles,BRLSZD,5006(839),9
828,Los Angeles County Metropolitan Transportation...,FTAATPL,6065(225),9
836,Los Angeles County Metropolitan Transportation...,FTACML,6065(199),9
363,Los Angeles,BRLSZD,5006(664),8
2,Access Services,FTASTPL,6312(022),7
141,Culver City,CML,5240(025),7
109,Carson,HSIPL,5403(022),6
285,Long Beach,CML,5108(181),6
364,Los Angeles,BRLSZD,5006(811),6
981,Pasadena,HSIPL,5064(075),6


## Average Funds

In [127]:
(la_df>>group_by(_.primary_agency_name)>>summarize(avg_funds=_.total_requested.mean())>>arrange(-_.avg_funds)).head(20)

Unnamed: 0,primary_agency_name,avg_funds
0,Access Services,33010740.0
50,Los Angeles County Metropolitan Transportation...,16962660.0
14,Caltrans,15525370.0
94,Southern California Regional Rail Authority,8633433.0
36,Industry,6324183.0
46,Long Beach,4771285.0
95,Sunline Transit Agency,4622594.0
79,San Gabriel Valley Council of Governments,2697375.0
4,Antelope Valley Transit Authority,2475103.0
89,South Coast Area Transit,2109463.0


### Top 50 Agencies with highest funds

In [128]:
avg_funds_top = (la_df
                 >>group_by(_.primary_agency_name)
                 >>summarize(avg_funds=_.total_requested.mean())
                 >>arrange(-_.avg_funds)).head(50)

In [129]:
chart = (alt.Chart(avg_funds_top)
         .mark_bar()
         .encode(
             x=alt.X("primary_agency_name", title="Primary Agency Name"),
             y=alt.Y("avg_funds", title="Average Total Funds Requested"),
             #column = "payment:N",
             color = alt.Color("avg_funds", 
                              scale=alt.Scale(
                                  range=altair_utils.FIVETHIRTYEIGHT_DIVERGING_COLORS
                              )
                              )
         )
         .properties( 
                      title="Average Total Funds Requested by Agency: Top 50")
)


#chart.save("dist7chart.png")
chart

### Bottom 50 Agencies with lowest funds

In [130]:
avg_funds_bottom = (la_df
                    >>group_by(_.primary_agency_name)
                    >>summarize(avg_funds=_.total_requested.mean())
                    >>arrange(-_.avg_funds)).tail(50)



In [131]:
chart = (alt.Chart(avg_funds_bottom)
         .mark_bar()
         .encode(
             x=alt.X("primary_agency_name", title="Primary Agency Name"),
             y=alt.Y("avg_funds", title="Average Total Funds Requested"),
             #column = "payment:N",
             color = alt.Color("avg_funds", 
                              scale=alt.Scale(
                                  range=altair_utils.FIVETHIRTYEIGHT_DIVERGING_COLORS
                              )
                              )
         )
         .properties( 
                      title="Average Total Funds Requested by Agency: Bottom 50")
)


#chart.save("dist7chart.png")
chart

## Functions to look by Prefix and by Agency: 

### By Agency:

In [31]:
    @interact
    
    def dla_get_prefix(place=la_df.primary_agency_name.sort_values().unique().tolist()):
        
        agencies = la_df[la_df.primary_agency_name==place]
    
        prefix_count_n = agencies >> count(_.prefix)
    
        display(Markdown(f"**Summary Statistics for {place}**"))
        display(Markdown(f"The number of obligations {place} has is {len(agencies)}"))
        
        display(Markdown(f"The number of prefix codes {place} uses is {len(prefix_count_n)}"))
        
        
        
        # for the table- using one as some agencies only have one entry
        pd.set_option("display.max_columns", None)
        display(df[df.agency == place][['fed_requested','ac_requested','total_requested']].describe())
        
        display(Markdown(f"**Top Project Types in {place}**"))
        display((la_df[la_df.primary_agency_name == place] >> count(_.type_of_work) >> arrange(-_.n)).head(5)) 
        # graphs 
    
         
        ax1 = (prefix_count_n
            >> ggplot(aes("prefix", "n", fill="prefix")) 
               + geom_col() 
               + theme(axis_text_x = element_text(angle = 45 , hjust=1))
               + labs(title='Agency Program Codes', x='Program Codes', y='Number of Obligations', fill="Program Type")
               #+ scale_fill_manual(altair_utils.FIVETHIRTYEIGHT_CATEGORY_COLORS)
               #FIVETHIRTYEIGHT_CATEGORY_COLORS doesnt have enough colors for values 
        )
        return ax1


interactive(children=(Dropdown(description='place', options=('Access Services', 'Agoura Hills', 'Alameda Corri…

### By Prefix:

In [32]:
@interact
def prefix_all_agencies_4(prefix=la_df.prefix.sort_values().unique().tolist()):
    
    
    # graphs 
    prefixes = la_df[la_df.prefix== prefix]
    
    prefix_count_num = (prefixes >> count(_.primary_agency_name) >> arrange(-_.n)).head(50)
    
    prefix_count = (prefixes >> count(_.primary_agency_name) >> arrange(-_.n)).head(20)
    
    display(Markdown(f"**The number of agencies using {prefix} is {len(prefix_count_num)}**"))
    
    # for the table- using one as some agencies only have one entry
    display(la_df[(la_df.prefix == prefix)].sample(1))
    
    
    ax1 = (prefix_count
            >> ggplot(aes("primary_agency_name", "n", fill="primary_agency_name")) 
                + geom_col() 
                + theme(axis_text_x = element_text(angle = 45 , hjust=1))
                + labs(title='Top Agencies using Prefix', x='Agency', y='Number of Obligations', fill="Agency")
            )    
    return ax1
              

interactive(children=(Dropdown(description='prefix', options=('ACNHPI', 'ACSTER', 'ACSTP', 'ATCMTD', 'ATPCML',…

## Additional Information