# Building a school expenditure dashboard using widgets

In this notebook, we build a school expenditure dashboard using ipywidgets.

In [806]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import widgets, interactive
import re
from IPython.display import display, clear_output
%matplotlib inline

In [807]:
all_years_data={}
for year in range(2006, 2013):
    all_years_data[year] = pd.read_csv('../../data/merged_data/expenditure_demo_account_year_{}.csv'.format(year), index_col=0)

Now that we've read in the data, we implement our UI for searching.

In [808]:
def search_function(Search):
    
    Search = Search.value
    
    ## Get school_names
    unique_schools = []
    for year in all_years_data:
        all_years_data[year].Name = all_years_data[year].Name.map(lambda x: str(x).strip()) #strip whitespace
        all_years_data[year].Name = all_years_data[year].Name.map(lambda x: re.sub('[^a-zA-Z0-9\s]','',x)) #strip non alphanumeric characters
        all_years_data[year].Name = all_years_data[year].Name.map(lambda x: str(x).upper()) #make uppercase
        unique_schools.extend(list(all_years_data[year].Name))
    unique_schools = list(set(unique_schools))
    
    ## Search through school names
    upper_school = re.sub('[^0-9a-zA-Z\s]', '', Search).upper()
    matches = [re.search(upper_school, name) for name in unique_schools]
    matching_schools = [i.string for i in matches if i != None]
    if len(matching_schools)<20:
        return matching_schools
    else:
        print '{} results found- here are 20 results. Re-search using more specific search terms.'.format(len(matching_schools))
        return matching_schools[0:30]

def plot_school(school):
    fig = plt.figure(figsize=(16,12))
    total_per_student = {}
    mean_by_T1_status = {}
    title_1 = {'Title 1':[],'non-Title 1':[]}
    for year in all_years_data:
        
        #Get group mean for comparison
        if all_years_data[year].loc[all_years_data[year]['Name'] == school,'Title_1'].isin([1]).values:
            title_1['Title 1'].append(year)
            mean_by_T1_status[year] = all_years_data[year].loc[all_years_data[year]['Title_1'] == 1,'Total'].mean()
        elif all_years_data[year].loc[all_years_data[year]['Name'] == school,'Title_1'].isin([0]).values:
            title_1['non-Title 1'].append(year)
            mean_by_T1_status[year] = all_years_data[year].loc[all_years_data[year]['Title_1'] == 0,'Total'].mean()
        
        #If school exists, get data for year
        if len(all_years_data[year].loc[all_years_data[year]['Name'] == school,'Total'])>0:
            total_per_student[year] = all_years_data[year].loc[all_years_data[year]['Name'] == school,'Total']
    
    mean_by_T1_status = pd.DataFrame.from_dict(mean_by_T1_status, orient='index')
    mean_by_T1_status.sort(inplace=True)
    total_per_student = pd.DataFrame.from_dict(total_per_student, orient='index')
    total_per_student.sort(inplace=True)
    
    
    ##Ax one- total expenditure comparison
    ax1 = plt.subplot(3,1,1)
    ax1.plot(total_per_student.index, total_per_student.values, marker='s', markersize=10, label='Expenditures per student at {}'.format(school.title()))
    
    #Make label for comparison plot:
    if len(title_1['Title 1']) == 0:
        comparison_label = 'Average expenditures across all non-Title 1 schools citywide'
    elif len(title_1['non-Title 1']) == 0:
        comparison_label = 'Average expenditures across all Title 1 schools citywide'
    else: ##Need to specify years for comparison
        comparison_label = 'Average expenditures across Title 1 schools (years {})\nand non-Title 1 schools (years {})'.format(str(title_1['Title 1']).strip('[]'),str(title_1['non-Title 1']).strip('[]'))
    
    ax1.plot(mean_by_T1_status.index, mean_by_T1_status.values, marker='o', markersize=10, label=comparison_label)
    title = 'Total expenditures per student by year'
    ax1.set_title(title)
    ax1.set_xlabel('Year')
    ax1.set_ylabel('Total expenditures per student ($)')
    ax1.set_xlim((2006,2012))
    min_val = min(mean_by_T1_status.values.min(), total_per_student.values.min())-2000
    max_val = max(mean_by_T1_status.values.max(), total_per_student.values.max())+1000
    ax1.set_ylim((min_val,max_val))
    ax1.ticklabel_format(useOffset=False)
    ax1.legend(loc='lower center', fancybox=True, shadow=True, ncol=2, fontsize='medium')
    
    DBN = []
    for year in all_years_data:
        DBN.extend(all_years_data[year].loc[all_years_data[year]['Name'] == school,'School'])
    if len(list(set(DBN))):
        make_all_subs(list(set(DBN))[0])
    else:
        make_all_subs(pd.Series(make_all_subs).value_counts().idxmax())
    plt.show()
    return 

def update_dropdown(search):
    global container
    clear_output()
    schools = search_function(search)
    if len(schools)>1:
        plot_school(schools[0])
        select_from = interactive(plot_school, school=widgets.Dropdown(options = schools, description = 'Select school to plot: '))
        container.children=[Search,select_from]
    if len(schools)==1:
        plot_school(schools[0])
        select_from = interactive(plot_school, school=widgets.Dropdown(options = schools, description = 'Select school to plot: '))
        container.children=[Search,select_from]
    if len(schools)==0:
        print '{} not found- please search for a different school'.format(Search.value)
        select_from = interactive(plot_school, school=widgets.Dropdown(options = schools, description = 'Select school to plot: '))
        container.children=[Search,select_from]

In [818]:
import matplotlib.patches as mpatches

def make_pie_plot(year, DBN):
    plt.gcf()
    matches = [re.search(r'_All_Funds', x) for x in list(all_years_data[year].columns)]
    matches = [x for x in matches if x is not None]
    matches = [x.string for x in matches]

    matches_keys = [' '.join(re.findall('[A-Z][^A-Z]*', x)) for x in matches]
    matches_keys = [re.sub('_',' ', x) for x in matches_keys]
    matches_keys = [re.sub('  ', ' ', x) for x in matches_keys]
    matches_keys = [re.sub('Srcs', 'Services', x) for x in matches_keys]
    matches_keys = [re.sub(' All Funds', '', x) for x in matches_keys]
    
    map_cols_to_keys = dict(zip(matches, matches_keys))
    
    target_school = all_years_data[year].loc[DBN, matches]
    target_school.index = [map_cols_to_keys[x] for x in target_school.index]
    target_school.sort(inplace=True, ascending=False)
    target_school.name = ''
    
    possible_categories = ['Ancillary Support Services', 'Building Services',
                           'Central Administration', 'Central Instructional Support',
                           'Classroom Instruction', 'Field Support',
                           'Instructional Support Services', 'Instructional Support and Administration',
                           'Leadership Supervision Support', 'Other Field Support Costs',
                           'Referral and Evaluation Services', 'Other']
    color_scale = [plt.get_cmap('Set1')(i) for i in np.linspace(0, 0.6, len(possible_categories))]
    map_keys_to_colors = dict(zip(possible_categories,color_scale))
    
    non_negligible = (target_school/target_school.sum())[(target_school/target_school.sum())>0.025]
    non_negligible['Other'] = 1.0-non_negligible.sum()
    
    colors=[map_keys_to_colors[x] for x in non_negligible.index]
    
    handles = [mpatches.Patch(color=map_keys_to_colors[x], label=str(x)) for x in non_negligible.index]
    return handles, plt.pie(non_negligible, autopct=(lambda x: str(round(x,1))+'%'), radius=1.2, colors=colors, shadow=True, pctdistance=0.8)

In [819]:
def make_all_subs(DBN):
    handles = []
    handle_labels = []
    for year in range(2006,2010):
        ax = plt.subplot(3,4, year-2001)
        ax.set_title(str(year))
        try:
            plots = make_pie_plot(year, DBN)
            handles.extend(plots[0])
            plots[1]
        except KeyError: ##School not in dataset for year
            ax.axis('off')
            ax.text(0.5, 0.5,'Data not available\nfor {}'.format(str(year)), fontsize=16,
                    horizontalalignment='center', verticalalignment='center', transform=ax.transAxes)
            
    for year in range(2010, 2013):
        ax = plt.subplot(3,4, year-2001)
        ax.set_title(str(year))
        try:
            plots = make_pie_plot(year, DBN)
            handles.extend(plots[0])
            plots[1]
        except KeyError: ##School not in dataset for year
            ax.axis('off')
            ax.text(0.5, 0.5,'Data not available\nfor {}'.format(str(year)), fontsize=16,
                    horizontalalignment='center', verticalalignment='center', transform=ax.transAxes)

    legend_corner = plt.subplot(3,4,12)
    legend_corner.axis('off')
    unique_handles = []
    for patch in handles:
        if patch.get_label() in handle_labels:
            pass
        else:
            handle_labels.append(patch.get_label())
            unique_handles.append(patch)
    plt.legend(handles=unique_handles, fontsize='medium', loc='center', title='Legend')
    

Now let's test it out:

In [821]:
container = widgets.Box()
container.border_color = 'red'
container.border_style = 'dotted'
container.border_width = 3
Search = widgets.Text(description='Search:', value='Bronx Lab')
container.children=[Search]
display(container)
Search.on_submit(update_dropdown)