# Script work for Generating Charting Outputs

In [1]:
import numpy as np
import pandas as pd
from siuba import *

import altair as alt
import altair_saver
from plotnine import *

from IPython.display import Markdown

from shared_utils import altair_utils
from shared_utils import geography_utils
from shared_utils import calitp_color_palette as cp
from shared_utils import styleguide

from calitp import to_snakecase
import intake

import clean_data
import _dla_utils



In [2]:
pd.set_option('display.max_columns', None)
pd.options.display.float_format = '{:,.2f}'.format

In [24]:
df= pd.read_parquet("dla_df.parquet")

In [25]:
len(df)

20117

In [26]:
subset= 11

In [27]:
df = df>>filter(_.dist==subset)

In [7]:
def labeling(word):
    # Add specific use cases where it's not just first letter capitalized
    LABEL_DICT = { "prepared_y": "Year",
              "dist": "District",
              "total_requested": "Total Requested",
              "fed_requested":"Fed Requested",
              "ac_requested": "Advance Construction Requested",
              "nunique":"Number of Unique",
              "project_no": "Project Number"}
    
    if (word == "mpo") or (word == "rtpa"):
        word = word.upper()
    elif word in LABEL_DICT.keys():
        word = LABEL_DICT[word]
    else:
        word = word.replace('n_', 'Number of ').title()
        word = word.replace('unique_', "Number of Unique ").title()
        word = word.replace('_', ' ').title()
    
    return word


In [8]:
def basic_bar_chart_test(df, x_col, y_col, color_col, subset, chart_title=''):

    if chart_title == "":
        chart_title = (f"{labeling(x_col)} by {labeling(y_col)}")

    
    chart = (alt.Chart(df)
             .mark_bar()
             .encode(
                 x=alt.X(x_col, title=labeling(x_col), sort=('-y')),
                 y=alt.Y(y_col, title=labeling(y_col)),
                 color = alt.Color(color_col,
                                  scale=alt.Scale(
                                      range=altair_utils.CALITP_SEQUENTIAL_COLORS),
                                      legend=alt.Legend(title=(labeling(color_col)))
                                  ))
             .properties( 
                          title=chart_title)
    )

    chart=styleguide.preset_chart_config(chart)
   # chart.save(f"./chart_outputs/d{subset}_outputs/bar_{x_col}_by_{y_col}.png")
    
    return chart


def basic_line_chart_test(df, x_col, y_col, subset, chart_title=''):
    
    if chart_title == "":
        chart_title = (f"{labeling(x_col)} by {labeling(y_col)}")
    
    
    chart = (alt.Chart(df)
             .mark_line()
             .encode(
                 x=alt.X(x_col, title=labeling(x_col)),
                 y=alt.Y(y_col, title=labeling(y_col))
                                   )
              ).properties( 
                          title=chart_title)

    chart=styleguide.preset_chart_config(chart)
    #chart.save(f"./chart_outputs/d{subset}_outputs/line_{x_col}_by_{y_col}.png")
    
    return chart


In [9]:
df_years= _dla_utils.count_all_years(df)

In [10]:
basic_bar_chart_test(df_years, 'prepared_y', 'total_requested_sum', 'prepared_y', subset, 'Total Requested Funds by Year')


In [11]:
#df = (df>>filter(_.dist==subset))
df_top = _dla_utils.find_top(df)

basic_bar_chart_test((df_top>>filter(_.variable=='primary_agency_name')), 'value', 'count', 'count', subset, chart_title='Primary Agency name')

In [12]:
df_top = _dla_utils.find_top(df)

In [13]:
df_top>>filter(_.variable=='prefix')


Unnamed: 0,value,count,variable
0,HSIPL,705,prefix
1,STPL,441,prefix
2,CML,275,prefix
3,ER,175,prefix
4,HPLUL,143,prefix
5,ATPL,135,prefix
6,BHLS,101,prefix
7,FTACML,94,prefix
8,SRTSL,72,prefix
9,RPSTPL,63,prefix


## Transit Agencies

In [14]:
from calitp.tables import tbl
from siuba import *


In [15]:
ta = (tbl.views.transitstacks())>>select(_.itp_id, _.transit_provider) >> collect()
#ta = ta.rename({'transit_provider': 'primary_agency_name'}, axis=1)



In [16]:
test = df.merge(ta, left_on='primary_agency_name', right_on='transit_provider', how='left')
#test

In [17]:
test>>filter(_.transit_provider.notnull())>>count(_.primary_agency_name)

Unnamed: 0,primary_agency_name,n
0,Antelope Valley Transit Authority,1
1,Bell Gardens,25
2,Inglewood,23
3,Ventura County Transportation Commission,29


In [18]:
transit_agencies = df[df['primary_agency_name'].str.contains(
              'Transit|tranist|Rail',
              case=False, na=False)]

In [19]:
transit_agencies>>count(_.primary_agency_name)

Unnamed: 0,primary_agency_name,n
0,Antelope Valley Transit Authority,1
1,South Coast Area Transit,7
2,Southern California Regional Rail Authority,3
3,Sunline Transit Agency,7


In [29]:
#Add to chart function
chart_title='Obligations by Year'
chart_title.replace(" ", "_")

'Obligations_by_Year'

## Charts

In [30]:
def make_charts(df):     
    df= pd.read_parquet("dla_df.parquet")
    
    df = (df>>filter(_.dist==subset))
    
    #subsetting the data
    df_years = _dla_utils.count_all_years(df)
    df_top = _dla_utils.find_top(df)
    
    transit = df[df['type_of_work'].str.contains(
              'Transit|tranist|bus|Bus|Rideshare',
              case=False, na=False)]
    
    transit_agencies = df[df['primary_agency_name'].str.contains(
              'Transit|tranist|Rail',
              case=False, na=False)]
    
    
    ##print statements
    display(Markdown(f"# **District {subset} Analysis**"))
    
    
    
    display(Markdown(f"**Unique Agencies Funding**"))
    display(Markdown(f'There are {(df.primary_agency_name.nunique())} Unique Agencies'))
    
    
    display(Markdown(f"**Transit Funding**"))
    display(Markdown(f"Out of {len(df)} obligations, {len(transit)} are transit related"))
    
    display(Markdown(f"**Transit Agencies**"))
    display(Markdown(f'There are {(len(transit_agencies>>count(_.primary_agency_name)))} Transit Agencies'))
    
    
    ##tables
    
    display(Markdown(f"**Number of Unique Prefix Codes by Agency**"))
    display((_dla_utils.get_nunique(df, 'prefix', 'primary_agency_name')).head(5))
    
    display(Markdown(f"**Number of Unique Agencies by Prefix Codes**"))
    display((_dla_utils.get_nunique(df, 'primary_agency_name', 'prefix')).head(5))
    
    display(Markdown(f"**Top 5 Types of Work**"))
    display((transit>>count(_.type_of_work)>>arrange(-_.n)>>select(_.type_of_work)).head(5))
    
    
#     ###charting df_years
    
#     column_names = list(df_years)

#     for column in column_names:
    
#         display(Markdown(f"**{labeling(column)} Over Prepared Year**"))
#         display(basic_bar_chart_test(df_years, "prepared_y", column, "prepared_y", subset))
    
    
#     ###charting df_tops 
    
#     values = sorted(df_top['variable'].unique())

#     for value in values:
#         display(Markdown(f"**Top Values in {labeling(value)}**"))
#         display(basic_bar_chart_test((df_top>>filter(_.variable==value)), 'value', 'count', 'count', subset,
#                                     chart_title=f'Top 20 {labeling(value)}'))
        
        
       
   #Other Charting
    
    #Line chart for Obligations by Year
    chart1= (basic_line_chart_test((df_top>>filter(_.variable=='prepared_y')), 'value', 'count', subset, 'Obligations by Year'))
    display(chart1)
    
    
    #Bar chart Agencies With The Most Obligations
    chart2= (basic_bar_chart_test((df_top>>filter(_.variable=='primary_agency_name')), 'value', 'count', 'value', subset, 'Agencies With The Most Obligations'))
    display(chart2)
    
    
    #Bar chart Agencies With The Most Unique Prefix Codes
    chart3 = (basic_bar_chart_test(((_dla_utils.get_nunique(df, 'prefix', 'primary_agency_name')).head(30)),
                            'primary_agency_name', 'n', 'primary_agency_name', subset, 'Agencies With The Most Unique Prefix Codes'))
    display(chart3)
    
    
    #Bar chart Average Total Requested Funds by Agency
    chart4=(basic_bar_chart_test((((_dla_utils.calculate_data_all(df, 'total_requested', 'primary_agency_name', aggfunc="mean"))
                          >>arrange(-_.total_requested)).head(30)
                        ), 'primary_agency_name','total_requested', 'primary_agency_name', subset,
                           'Average Total Requested Funds by Agency'
                       ))
    display(chart4)
    
    
    #Bar chart Bottom Average Total Requested Funds by Agency
    avg_funds_bottom = (df>>group_by(_.primary_agency_name)>>summarize(avg_funds=_.total_requested.mean())>>arrange(-_.avg_funds)).tail(50)

    chart5=( basic_bar_chart_test((avg_funds_bottom.tail(40)), 'primary_agency_name','avg_funds', 'primary_agency_name', subset, 
                          'Lowest Average Total Funds by Agency'))
    display(chart5)
   

    
    #Bar chart Average Total Requested Funds by Agency: Transit Related Funding
    chart6 = (basic_bar_chart_test((((_dla_utils.calculate_data_all(transit, 'total_requested', 'primary_agency_name', aggfunc="mean"))
                          >>arrange(-_.total_requested)).head(30)
                        ), 'primary_agency_name','total_requested', 'primary_agency_name', subset,
                           'Average Total Requested Funds by Agency: Transit Related Funding'
                       ))
    display(chart6)
    
    #Bar chart Transit Agencies Average Funding
    chart7 = ( basic_bar_chart_test((_dla_utils.calculate_data_all(transit_agencies, 'total_requested', aggregate_by='primary_agency_name', aggfunc="mean")),
                           'primary_agency_name', 'total_requested', 'primary_agency_name', subset,
                           'Transit Agencies Average Funding'))
    display(chart7)
    
    # Bar chart Average Total Requested Funds by Prefix
    chart8 = (basic_bar_chart_test((((_dla_utils.calculate_data_all(df, 'total_requested', 'prefix', aggfunc="mean"))
                          >>arrange(-_.total_requested)).head(30)
                        ), 'prefix','total_requested', 'prefix', subset,
                           'Average Total Requested Funds by Prefix'
                       ))
    display(chart8)
    

In [33]:
#parameters cell
subset = 7

In [34]:
make_charts(df)

# **District 7 Analysis**

**Unique Agencies Funding**

There are 108 Unique Agencies

**Transit Funding**

Out of 2787 obligations, 68 are transit related

**Transit Agencies**

There are 4 Transit Agencies

**Number of Unique Prefix Codes by Agency**

Unnamed: 0,primary_agency_name,n
49,Los Angeles,34
50,Los Angeles County,31
47,Long Beach,18
51,Los Angeles County Metropolitan Transportation...,18
63,Oxnard,14


**Number of Unique Agencies by Prefix Codes**

Unnamed: 0,prefix,n
65,HSIPL,60
87,STPL,59
60,HPLUL,34
22,CML,27
81,SRTSL,27


**Top 5 Types of Work**

Unnamed: 0,type_of_work
9,Construct Improvement To The Wells Center Bus ...
13,Create A Multi-modal Transit Plaza To Integrat...
17,Install Bus Shelters At Existing Bus Stops
18,"Install Ped-scale Street Lights,trees,bus Benc..."
1,": Transit/pedestrian Linkages,ped Improv,etc(tc)"
