# Data Exploration UI

This program is to provide a UI to explore the Data from Ingelwood Unified School District to find trneds

_*scroll to the bottom to use the UI_

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display

#read in the main dataframe
df=pd.read_csv('school_data_2.csv',index_col='Unnamed: 0')
df['Week'] =pd.to_datetime(df.Week).dt.date

## Functions 

This section of code creates the fuctions used for producing different graphs and charts

In [2]:
#a function to both trim df to the slected date range and also return a gourpby of some of the data
def date_trim(start_date, end_date, group_by):
    #add a group by functions for greater control
    df=pd.read_csv('school_data_2.csv',index_col='Unnamed: 0')
    #Send the 'Week' column to date type
    df['Week'] =pd.to_datetime(df.Week).dt.date
    #filter the data set
    df= df[(df['Week'] >= start_date) & (df['Week'] <= end_date)]
    #make the grouped by DataFrame, collapsing all data on the 'Week' values and suming the Absences
    grouped=df.groupby(group_by,as_index=False)['Absences'].sum()
    #rturn the trimmed df and the new grouped DataFrame
    return df, grouped

#a function to graph groupedby Week v Abcecences
def week_grouped(start_date, end_date,grouped, group_by):
    #sort the time series data
    grouped=grouped.sort_values(by=group_by)
    #define plot size
    plt.figure(figsize = (15,10))
    #plot the calender week by the number of Absences
    sns.lineplot(x=group_by,
                 y='Absences',
                 data=grouped).set(title='Total Absentee Over Time Between '+
                                          str(start_date)[:4] +
                                          ' and ' +
                                          str(end_date)[:4])

#a function to grpah a barplot of the groupby School v Absences
def school_grouped(start_date, end_date, grouped):
    #filter out any datapoints that are very small
    grouped=grouped[grouped['Absences']>=200]
    #create to new data frames, one with half the date above the mean and one with the data below the mean
    group_1=grouped[grouped['Absences']<grouped['Absences'].mean()] 
    group_2=grouped[grouped['Absences']>grouped['Absences'].mean()] 
    #create a frame work of sublots to hold the barplots staked on vertically
    fig, ax = plt.subplots(2, 1, figsize=(15,15))
    #plot the below the mean schools as a barplot with School v Absences
    sns.barplot(x="School",
                y="Absences",
                data=group_1,
                ax=ax[0]).set(title='Total Absentee By School Between'+
                                     str(start_date)[:4] +
                                     ' and ' +
                                     str(end_date)[:4])
    #plot the above the mean schools as a barplot with School v Absences
    sns.barplot(x="School",
                y="Absences",
                data=group_2,
                ax=ax[1]).set(title='Total Absentee By School Between'+
                                     str(start_date)[:4] +
                                     ' and ' +
                                     str(end_date)[:4])
    #rotate lables by 80 degrees so that they do not overlap each other
    ax[0].tick_params(rotation=80)
    ax[1].tick_params(rotation=80)
    #make the layout of the subplots so that no lables or charts overlap
    plt.tight_layout()

#a function to grpah a barplot of the groupby Grade v Absences
def grade_grouped(start_date, end_date, grouped):
    #set a figure size for the chart
    plt.figure(figsize = (15,10))
    #Remove all rows that contain a letter or are outside of schooling range
    grouped=grouped[(grouped['Grade'] != 'K')  &
                    (grouped['Grade'] != 'TK') & 
                    (grouped['Grade'] != 'PS') &
                    (grouped['Grade'] != '18')]
    #send the 'Grade' type to interger
    grouped['Grade']=grouped['Grade'].astype(int)
    #create the barplot of Grade v Absences
    sns.barplot(x="Grade",
                y="Absences",
                data=grouped).set(title='Total Absentee by Grade'+
                                         str(start_date)[:4] +
                                         ' and ' +
                                         str(end_date)[:4])

#a function that is used in pct type functions, this function pulls the correct data 
#for pct absentisim and trims the data frame to the rigt dates 
def pct_helper(start_date, end_date):
    #read in the school_date_3.csv
    df2=pd.read_csv('school_date_3.csv', index_col='Unnamed: 0')
    #send the week column to a new column as a date type object
    df2["date"] = pd.to_datetime(df2["Week"]).dt.date
    #trim df2 to the correct dates
    df2=df2[(df2['date'] >=start_date) & (df2['date'] <= end_date)]
    #drop outliners that are less then 1%
    df2=df2[df2['pct_absences']>=1]
    #reset the index for ease of use
    df2=df2.reset_index(drop=True)
    #return df2
    return df2

#a function that displays a chart of pct differnce of absentism with any major linear realtionships scrubbed
#this is done as we are looking for micro trends across multiple years and want to observe difference from an 
#overall trend not the known trends
def typed_grouped_pct_controlled(start_date, end_date):
    #pull a cleaned df2 using the pct helper
    df2=pct_helper(start_date, end_date)
    #assign x as a range from 0 to the length of df2
    x = np.arange(df2.shape[0])
    #make a modle of linear regression to modle the growth of absentisim over time
    fit = np.polyfit(x, df2['pct_absences'], 1)
    fit_fn = np.poly1d(fit)
    #assing an empty column to hold the trend line
    df2['trend_line']=0.0
    #plot the trend line into df2
    for x in range(df2.shape[0]):
        #for each row of df2 find the value of the trendline and send it to df2
        df2.loc[x,'trend_line']=fit_fn(x)     
    #make a zero line for plotting
    df2['zero']=0
    #calculate the distance from the trend line for all rows
    df2['Difference']=df2['pct_absences']-df2['trend_line'] 
    #assing the size of the plot
    plt.figure(figsize = (15,10))
    #plot the distance from the trend line
    plt.plot(df2['date'],df2['Difference'])
    #plot a zero line to "represent" the trend line
    plt.plot(df2['date'],df2['zero'])
    #add an adaptive title to the plot so that it descibes what is happening
    plt.title('pct_absences controlled between: ' 
              + str(start_date)[:4] + 
              ' and ' + 
              str(end_date)[:4])
    #lable the x and y axis
    plt.ylabel('pct_absences')
    plt.xlabel('Week')
    #rotake the x ticks for legibility
    plt.xticks(rotation = 90)
    #display the plot
    plt.show()
    
#a function that displays a lineplot of Week v pct_absences
def typed_grouped_pct(start_date, end_date):
    #pull a cleaned df2 using the pct helper
    df2=pct_helper(start_date, end_date)
    #assing the size of the plot
    plt.figure(figsize = (15,10))
    #plot  Week v pct_absences
    plt.plot(df2['date'],df2['pct_absences'])
    #add an adaptive title to the plot so that it descibes what is happening well
    plt.title('pct_absences controlled between: ' +
              str(start_date)[:4] +
              ' and ' +
              str(end_date)[:4])
    #lable the x and y axis
    plt.ylabel('pct_absences')
    plt.xlabel('Week')
    #rotake the x ticks for legibility
    plt.xticks(rotation = 90)
    #display the plot
    plt.show()

## Widgets 

This section creates the widgets that are used for the actual UI such as dropdowns, radio buttons, and date selectors

In [3]:
#add a drop down menu to controll the gourp_by value
group_by=widgets.Dropdown(
    #make the dorpdown options as the list of clumns of the group
    options=df.columns,
    #Defualt of the key 'Week'
    value='Week',
    #add a disrcription
    description='Gourp By:',
)
#add a secound drop down menu to controll the type_value
type_value=widgets.Dropdown(
    #make the dorpdown options as the list of clumns of the group
    options=['Total','Percent','Percent Controlled'],
    #Defualt of the key 'Week'
    value='Total',
    #add a disrcription
    description='Count Type:',
)

#create a date pickert for the start date to be displayed
date_picker_start = widgets.DatePicker(
    #add a discription
    description='Start Date',
    disabled=False,
    #add the starting value of the farthest back date
    value=df['Week'].min()
)
#create a date pickert for the end date to be displayed
date_picker_end = widgets.DatePicker(
    #add a discription
    description='End Date',
    disabled=False,
    #add the starting date of the most recent date
    value=df['Week'].max()
)


## Master function

This section is the fucntion called to control what graphs are displayed and with what data

In [4]:
#display a fucnition to display the filtered graph
def display_func(group_by, start_date, end_date, type_value):
    #send to the date_trim function
    df, grouped= date_trim(start_date, end_date, group_by)
    #check to see the group type and the type_value value
    if (group_by =='Week') & (type_value =='Total'):
        #send to the helper fucntion to display the total count by Date/Week
        week_grouped(start_date, end_date, grouped, group_by)   
    #check to see if the group type is 'School'
    elif group_by=='School':
        #send to the helper fucntion to display the total count by School
        school_grouped(start_date, end_date, grouped) 
    #check to see if the group type is 'Grade'
    elif group_by == 'Grade':
        #send to the helper fucntion to display the total count by Grade
        grade_grouped(start_date, end_date, grouped)
    #check to see the the group type and the type_value value
    elif (group_by == 'Week') & (type_value =='Percent Controlled'):
        #send to the helper fucntion to display the  normalized pct abcentee by Date
        typed_grouped_pct_controlled(start_date, end_date)
    #check to see the the group type and the type_value value
    elif (group_by == 'Week') & (type_value =='Percent'):
        #send to the helper fucntion to display the pct abcentee by Date
        typed_grouped_pct(start_date, end_date)


## Widget interaction 

This section controls the itneractions of the Widgets with the display function

In [5]:
#stack the realted boxes into vertical gorups
vbox1= widgets.VBox([group_by, type_value])
vbox2=widgets.VBox([date_picker_start, date_picker_end])
#place these vertical boxes next to each other
ui = widgets.HBox([vbox1, vbox2])
#creat the interactive display 
out = widgets.interactive_output(display_func,
                                 {'group_by': group_by,
                                  'type_value': type_value,
                                  'start_date' : date_picker_start,
                                  'end_date' : date_picker_end})

## Data Exploration UI

Here is the UI crerated to explore the data obtained from the Ingelwood Unified School District

### Widget Break Down

* Group By: This tells us how to apply the gorup by funtion, this marks the x-axis and the y-axis is abcentism
* Count Type: This set abcentism as either a sum, a pct of enrolment, or a distance from a linear fit pct of enrolment
* Start Date: This is the first date of the date set 
* End Date: This is the last date of the date set 

In [6]:
display(ui, out)

HBox(children=(VBox(children=(Dropdown(description='Gourp By:', options=('Week', 'School', 'Grade', 'Absences'…

Output()