# Interactive Functions 

In [6]:
#! pip install plotnine

In [3]:
import pandas as pd
from siuba import *

import numpy as np
from plotnine import *

import ipywidgets as widgets
from ipywidgets import *

In [4]:
pd.set_option('display.max_columns', None)
pd.options.display.float_format = "{:.2f}".format

In [5]:
df = pd.read_csv('gs://calitp-analytics-data/data-analyses/dla/e-76Obligated/function_data.csv', low_memory=False).drop('Unnamed: 0', axis=1)



In [6]:
df.sample(5)

Unnamed: 0,prefix,project_no,agency,prepared_date,fed_requested,ac_requested,total_requested,status_comment,locode,dist,ftip_no,project_location,type_of_work,seq,mpo,prepared_y
1648,BHLO,5951(152),Santa Barbara County,2018-03-02,50000.0,0.0,50000.0,Authorized,5951,5,CT14,Alamo Pintado Bridge #51c0081 On Alamo Pintado...,Repair & Construction Of Scour Countermeasure ...,2,SBCG,2018-01-01
7421,BHLS,5060(159),Fresno,2015-11-16,-0.01,0.0,0.0,Authorized,5060,6,,Bridge 42c0136 On Jensen Avenue Over Union Pac...,Bridge Rehabilitation,4,CFCG,2015-01-01
2593,ER,32D0(004),Santa Cruz County,2018-01-19,32579.0,-32579.0,0.0,Authorized,5936,5,,Jamison Creek Road Pm 1.61,"Geotechnical Investigation, Plans, Canitilever...",2,AMBAG,2018-01-01
1320,CML,5441(065),Moreno Valley,2018-01-17,1542000.0,0.0,2262000.0,Authorized,5441,8,RIV151202,Various Intersections Throughout The City Of M...,Construct Its Ethernet Fiber-optic Backbone Sy...,1,SCAG,2018-01-01
13231,CML,5925(144),El Dorado County,2019-04-30,920269.0,300287.0,1225806.0,Authorized,5925,3,ELD19505,El Dorado Hills Bl From Governor Pl To Brittan...,Upgrade Existing Class I Trail And Construct A...,3,SACOG,2019-01-01


In [7]:
(df >> count(_.project_location) >> arrange(-_.n)).head(5)

Unnamed: 0,project_location,n
6925,San Francisco Bay Area,65
8196,Various Locations,31
6846,Sacog Region,29
3028,Golden Gate Bridge,27
7864,Throughout Kern County,22


## Full Function for Prefix

In [8]:
    @interact
    
    def dla_get_prefix(place=df.agency.sort_values().unique().tolist()):
        # for the table- using one as some agencies only have one entry
        pd.set_option("display.max_columns", None)
        from IPython.core.display import display
        display(df[df.agency == place].describe()) 
        display((df[df.agency == place] >> count(_.type_of_work) >> arrange(-_.n)).head(5)) 
        # graphs 
    
        agencies = df[df.agency==place]
    
        prefix_count_n = agencies >> count(_.prefix) 
    
        ax1 = (prefix_count_n
            >> ggplot(aes("prefix", "n", fill="prefix")) 
               + geom_col() 
               + theme(axis_text_x = element_text(angle = 45 , hjust=1))
               + labs(title='Agency Program Codes', x='Program Codes', y='Number of Obligations', fill="Program Type")
        )
        return ax1

        

interactive(children=(Dropdown(description='place', options=('Access Services', 'Agoura Hills', 'Ala-Con Costa…

## Function for results by Prefix

In [9]:
@interact
def prefix_all_agencies_4(prefix=df.prefix.sort_values().unique().tolist()):
    # for the table- using one as some agencies only have one entry
    display(df[(df.prefix == prefix)].sample(1))
    
    # graphs 
    prefixes = df[df.prefix== prefix]
    #prefixes = df[df.prefix==prefix]
    
    prefix_count = (prefixes >> count(_.agency) >> arrange(-_.n)).head(5)
    
    ax1 = (prefix_count
            >> ggplot(aes("agency", "n", fill="agency")) 
                + geom_col() 
                + theme(axis_text_x = element_text(angle = 45 , hjust=1))
                + labs(title='Top 5 Agencies using Prefix', x='Agency', y='Number of Obligations', fill="Agency")
            )    
    return ax1
              

interactive(children=(Dropdown(description='prefix', options=('6140000', 'ACNHPI', 'ACST-ER', 'ACSTP', 'ASCTP'…

## Function for Type of Work

In [10]:
top50 = (df >> count(_.type_of_work) >> arrange(-_.n)).head(50)

In [11]:
@interact
def findtypeofwork(work=top50.type_of_work.sort_values().unique().tolist()):
    
    # graphs 
    worktypes = df[df.type_of_work== work]
    
    work_count = (worktypes >> count(_.agency) >> arrange(-_.n)).head(10)
    display(work_count)

    ax1 = (work_count
            >> ggplot(aes("agency", "n", fill="agency")) 
                + geom_col() 
                + theme(axis_text_x = element_text(angle = 45 , hjust=1))
                + labs(title='Agencies using Type of Work', x='Agency', y='Number of Obligations', fill="Agency")
            )    
    return ax1

interactive(children=(Dropdown(description='work', options=('Ac Overlay', 'Ac Overlay (tc)', 'Asphalt Concrete…

## Function to Filter Data

In [12]:
df_query = df[['prefix', 'agency', 'project_no','locode','dist','fed_requested', 'ac_requested', 'total_requested',
              'type_of_work','project_location', 'seq', 'mpo','prepared_y']]

In [13]:
#adding year
@interact
def get_query2(agency=df_query.agency.unique().tolist(),
             prefix=df_query.prefix.unique().tolist(),
             year=df_query.prepared_y.unique().tolist()):
    
    filtering = df_query[(df_query.agency==agency)&(df_query.prefix==prefix)&(df_query.prepared_y==year)]
    
    
    print("The number of obligations this agency has in this prefix and year is", len(filtering))
    
    display(filtering.head())
    
    

interactive(children=(Dropdown(description='agency', options=('Humboldt County', 'Mendocino County', 'Sacramen…