# Interactive Functions 

In [1]:
import pandas as pd
from siuba import *

import numpy as np
from plotnine import *

import ipywidgets as widgets
from ipywidgets import *
from IPython.display import Markdown
from IPython.core.display import display


from shared_utils import geography_utils, styleguide
from shared_utils import calitp_color_palette as cp 




In [2]:
import _dla_utils

In [3]:
pd.set_option('display.max_columns', None)
pd.options.display.float_format = "{:.2f}".format

In [4]:
#df = _clean_data.make_clean_data()
df= pd.read_parquet("dla_df.parquet")

In [5]:
df.sample(5)

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,ac_requested,total_requested,status_comment,locode,dist,status,dist_processing_days,hq_processing_days,fhwa_processing_days,ftip_no,project_location,type_of_work,seq,date_request_initiated,date_completed_request,mpo,projectID,prepared_y,primary_agency_name,adjusted_total_requested,adjusted_fed_requested,adjusted_ac_requested
11009,Obligated,CML,5937(200),Santa Clara County,2014-07-31,2014-08-15,2014-08-15,2014-08-15,2014-08-20,838000.0,0.0,946572.0,Authorized,5937,4.0,E-76 approved on E-76 approved on,15.0,0.0,5.0,SCL130021,Ni Srts Education And Encouragement Services,Santa Clara Countywide (tc),1.0,NaT,NaT,MTC,5937,2014.0,Santa Clara County,1083453.5,959181.17,0.0
7604,Obligated,CML,5208(126),Clovis,2016-01-06,2016-01-06,2016-01-06,2016-01-06,2016-01-13,0.0,0.0,6730.9,Authorized,5208,6.0,E-76 approved on Prepared on 10/6/2015,0.0,0.0,7.0,FRE110101,Ashlan Ave & Armstrong Ave Intersection,Traffic Signal - New (tc),3.0,NaT,NaT,CFCG,5208,2016.0,Clovis,7599.24,0.0,0.0
12416,Obligated,HSIPL,5482(016),Rancho Cordova,2014-03-25,2014-03-25,2014-03-25,2014-03-25,2014-03-25,0.0,0.0,9659.87,Authorized,5482,3.0,E-76 approved on E-76 approved on,0.0,0.0,0.0,VAR79010,White Rock Road From Zinfandel Dr To Laurelhur...,Bike/pedestrian Safety Improvements,5.0,NaT,NaT,SACOG,5482,2014.0,Rancho Cordova,11056.76,0.0,0.0
2051,Obligated,BHLO,5450(066),Santa Clarita,2018-03-29,2018-03-29,2018-03-29,2018-03-29,2018-04-04,-57275.53,0.0,-11400.53,Authorized,5450,7.0,E-76 approved on,,0.0,6.0,LA000800,Lost Canyon Road Over Sand Canyon Wash. 53c1024,Rehabilitate And Widen Existing Bridge.,5.0,NaT,NaT,SCAG,5450,2018.0,Santa Clarita,-12302.34,-61806.16,0.0
15080,Obligated,HSIPL,5181(180),Huntington Beach,2019-05-30,2019-10-07,2019-10-09,2019-10-09,2019-10-22,71415.0,0.0,76117.0,Authorized,5181,12.0,E-76 approved on,0.0,2.0,13.0,SCAG015,Intersection Of Newland Street At Slater Avenue,Upgrade Existing Traffic Signal For Protective...,3.0,2019-10-07,2019-10-07,SCAG,5181,2019.0,Huntington Beach,80676.03,75692.41,0.0


## Full Function for Prefix

In [6]:
    @interact
    
    def dla_get_prefix(place=df.primary_agency_name.sort_values().unique().tolist()):
        
        agencies = df[df.primary_agency_name==place]
    
        prefix_count_n = agencies >> count(_.prefix)
    
        display(Markdown(f"**Summary Statistics for {place}**"))
        display(Markdown(f"The number of obligations {place} has is {len(agencies)}"))
        
        display(Markdown(f"The number of prefix codes {place} uses is {len(prefix_count_n)}"))
        
        
        
        # for the table- using one as some agencies only have one entry
        pd.set_option("display.max_columns", None)
        display(df[df.agency == place][['fed_requested','ac_requested','total_requested']].describe())
        
        display(Markdown(f"**Top Project Types in {place}**"))
        display((df[df.primary_agency_name == place] >> count(_.type_of_work) >> arrange(-_.n)).head(5)) 
        # graphs 
    
         

    
        ax1 = (prefix_count_n
            >> ggplot(aes("prefix", "n", fill="prefix")) 
               + geom_col() 
               + theme(axis_text_x = element_text(angle = 45 , hjust=1))
               + labs(title='Agency Program Codes', x='Program Codes', y='Number of Obligations', fill="Program Type")
        )
        
        ax1 = (styleguide.preset_plotnine_config(ax1)
         + scale_fill_manual(values=cp.CALITP_CATEGORY_BOLD_COLORS)
        )
        
        return ax1



interactive(children=(Dropdown(description='place', options=('Access Services', 'Agoura Hills', 'Alameda', 'Al…

## Function for results by Prefix

In [7]:
@interact
def prefix_all_agencies_4(prefix_unique=df.prefix.sort_values().unique().tolist()):
    
    
    # graphs 
    prefixes = df[df.prefix== prefix_unique]
    
    prefix_count_num = (prefixes >> count(_.primary_agency_name) >> arrange(-_.n))
    
    prefix_count = (prefixes >> count(_.primary_agency_name) >> arrange(-_.n)).head(20)
    
    display(Markdown(f"**The number of agencies using {prefix_unique} is {len(prefix_count_num)}**"))
    
    # for the table- using one as some agencies only have one entry
    display(df[(df.prefix == prefix_unique)].sample(1))
    
    
    ax1 = (prefix_count
            >> ggplot(aes("primary_agency_name", "n", fill="primary_agency_name")) 
                + geom_col() 
                + theme(axis_text_x = element_text(angle = 45 , hjust=1))
                + labs(title='Top Agencies using Prefix', x='Agency', y='Number of Obligations', fill="Agency")
            )    
    return ax1
              

interactive(children=(Dropdown(description='prefix_unique', options=('6140000', 'ACNHPI', 'ACSTER', 'ACSTP', '…

## Function for Type of Work

In [9]:
top50 = (df >> count(_.type_of_work) >> arrange(-_.n)).head(50)

In [11]:
@interact
def findtypeofwork(work=top50.type_of_work.sort_values().unique().tolist()):
    
    display(Markdown(f"**Number of Obligations the top Agencies have in {work}**"))
    # graphs 
    worktypes = df[df.type_of_work== work]
    
    work_count = (worktypes >> count(_.primary_agency_name) >> arrange(-_.n)).head(20)
    display(work_count)

    ax1 = (work_count
            >> ggplot(aes("primary_agency_name", "n", fill="primary_agency_name")) 
                + geom_col() 
                + theme(axis_text_x = element_text(angle = 45 , hjust=1))
                + labs(title='Agencies using Type of Work', x='Agency', y='Number of Obligations', fill="Agency")
            )    
    return ax1

interactive(children=(Dropdown(description='work', options=('Ac Overlay', 'Ac Overlay (tc)', 'Asphalt Concrete…

## Function to Filter Data

In [12]:
df_query = df[['prefix', 'primary_agency_name', 'project_no','locode','dist','fed_requested', 'ac_requested', 'total_requested',
              'type_of_work','project_location', 'seq', 'mpo','prepared_y']]

In [13]:
#adding year
@interact
def get_query2(agency=df_query.primary_agency_name.unique().tolist(),
             prefix=df_query.prefix.unique().tolist(),
             year=df_query.prepared_y.unique().tolist()):
   
    filtering = df_query[(df_query.primary_agency_name==agency)&(df_query.prefix==prefix)&(df_query.prepared_y==year)]
    
    
    display(Markdown(f"**The number of obligations {agency} has in {prefix} during the year {year} is {len(filtering)}**"))
    
    display(filtering.head())
    
    

interactive(children=(Dropdown(description='agency', options=('Humboldt County', 'Mendocino County', 'Sacramen…