In [42]:
import pandas as pd
import plotly.express as px
import numpy as np
import dash
from dash import dcc 
from dash import html
from dash.dependencies import Input, Output
import dash_bootstrap_components as dbc
from matplotlib import pyplot as plt
import seaborn as sns
import plotly.figure_factory as ff

## Data Ingestion and Prep

In [43]:
df = pd.read_csv('states_all.csv')

In [44]:
df = df[(df['YEAR'] >= 1992) & (df['YEAR'] <= 2016)]

In [45]:
df['AVG_TOTAL_REVENUE_STUDENT'] = df['TOTAL_REVENUE'] / df['ENROLL']
df['AVG_STATE_REVENUE_STUDENT'] = df['STATE_REVENUE'] / df['ENROLL']
df['AVG_LOCAL_REVENUE_STUDENT'] = df['LOCAL_REVENUE'] / df['ENROLL']
df['AVG_FEDERAL_REVENUE_STUDENT'] = df['FEDERAL_REVENUE'] / df['ENROLL']
df['AVG_EXPENDITURE_STUDENT'] = df['TOTAL_EXPENDITURE'] / df['ENROLL']

## Dash Layout

In [46]:
app = dash.Dash()

In [47]:
app.layout = html.Div([
    html.H1("Data 608 Final Project", style={'text-align':'center'}),
    html.H3("State Comparision Scatter Plot"),
    html.P('Each point on the Scatter plot represents a state. You can configure which x and y axis youd like to compare. Hovering over a point will filter the two timeseries plots on the right'),
    html.Div([
        html.Div([
            html.P('X-axis'),
            dcc.Dropdown(
                id="xaxis",
                options=df.select_dtypes(include=np.number).columns,
                multi=False,
                value= 'FEDERAL_REVENUE'
            )
        ], style={'width': '49%', 'display': 'inline-block','padding': '10px 5px'}),
        
        html.Div([
            html.P('Y-axis'),
            dcc.Dropdown(
                id="yaxis",
                options=df.select_dtypes(include=np.number).columns,
                multi=False,
                value= 'STATE_REVENUE'
            )
        ], style={'width': '49%', 'display': 'inline-block','padding': '10px 5px'})
    ], style={'padding': '10px 5px'}),
    
    html.Div([
        dcc.Slider(
            df['YEAR'].min(),
            df['YEAR'].max(),
            step=None,
            id='year-slider',
            value=df['YEAR'].max(),
            marks={str(year): str(year) for year in df['YEAR'].unique()}
        )
    ], style={'padding' : '15px 15px'}),

    html.Div([
        dcc.Graph(id='scatter'
                 ,hoverData={'points': [{'customdata': 'CALIFORNIA'}]})
    ], style={'width': '49%', 'display': 'inline-block', 'padding': '0 20'}),
    
     html.Div([
        dcc.Graph(id='x-time-series'),
        dcc.Graph(id='y-time-series'),
    ], style={'display': 'inline-block', 'width': '49%'}),
    
    html.H3("Variable Comparision Scatter Plot"),
    
    html.Div([
        html.Div([
            html.P('X-axis'),
            dcc.Dropdown(
                id="xaxis2",
                options=df.select_dtypes(include=np.number).columns,
                multi=False,
                value= 'AVG_TOTAL_REVENUE_STUDENT'
            ),
            html.P('State'),
            dcc.Dropdown(
                id="state_filter",
                options=df['STATE'].unique(),
                multi=True,
                value= df['STATE'].unique()
            )
        ], style={'width': '49%', 'display': 'inline-block','padding': '10px 5px'}),
        
        html.Div([
            html.P('Y-axis'),
            dcc.Dropdown(
                id="yaxis2",
                options=df.select_dtypes(include=np.number).columns,
                multi=False,
                value= 'AVG_MATH_4_SCORE'
            ),
            html.P('Correlation Filter'),
            dcc.Dropdown(
                id="correlation_filter",
                options=df.select_dtypes(include=np.number).columns,
                multi=True,
                value=['AVG_TOTAL_REVENUE_STUDENT', 'AVG_MATH_4_SCORE']
            )
        ], style={'width': '49%', 'display': 'inline-block','padding': '10px 5px'})
    ], style={'padding': '10px 5px'}),

    html.Div([
        dcc.Graph(id='scatter2')
    ], style={'width': '49%', 'display': 'inline-block', 'padding': '0 20'}),
    
    html.Div([
        dcc.Graph(id='corrplot')
    ], style={'width': '49%', 'display': 'inline-block', 'padding': '0 20'}),
    
    html.H3("State Comparision Line Plot"),
    
    html.Div([
        dcc.Graph(id='lineplot')
    ], style={'width': '100%', 'display': 'inline-block', 'padding': '0 20'}),
])

## Callbacks

In [48]:
@app.callback(
    Output(component_id='scatter',component_property='figure'),
    [Input(component_id='xaxis',component_property='value'),
     Input(component_id='yaxis',component_property='value'),
     Input(component_id='year-slider',component_property='value')]
)
def update_graph(xaxis, yaxis, year):

    dff = df.copy()
    
    dff = dff[dff['YEAR'] == year]

    # Plotly Express
    fig = px.scatter(
        dff, 
        x=xaxis,
        y=yaxis,
        opacity=0.8,
        hover_name=dff['STATE']
    )
    
    fig.update_traces(customdata=dff['STATE'])
    
    fig.update_layout(margin={'l': 40, 'b': 40, 't': 10, 'r': 0}, hovermode='closest')

    return fig

In [49]:
def time_series(dff, axis):

    fig = px.scatter(dff, x='YEAR', y=axis)

    fig.update_traces(mode='lines+markers')

    fig.update_xaxes(showgrid=False)

    #fig.update_yaxes(type='linear' if axis_type == 'Linear' else 'log')

    fig.add_annotation(x=0, y=0.85, xanchor='left', yanchor='bottom',
                       xref='paper', yref='paper', showarrow=False, align='left')
#                        text=title)

    fig.update_layout(height=225, margin={'l': 20, 'b': 30, 'r': 10, 't': 10})

    return fig

In [50]:
@app.callback(
    Output('x-time-series', 'figure'),
    Input('scatter', 'hoverData'),
    Input('xaxis', 'value'))
#    Input('crossfilter-xaxis-type', 'value'))
def update_y_timeseries(hoverData, xaxis):
    dff = df[df['STATE'] == hoverData['points'][0]['customdata']]
    return time_series(dff, xaxis)


@app.callback(
    Output('y-time-series', 'figure'),
    Input('scatter', 'hoverData'),
    Input('yaxis', 'value'))
#    Input('crossfilter-yaxis-type', 'value'))
def update_x_timeseries(hoverData, yaxis):
    dff = df[df['STATE'] == hoverData['points'][0]['customdata']]
    return time_series(dff, yaxis)

In [51]:
@app.callback(
    Output(component_id='scatter2',component_property='figure'),
    [Input(component_id='xaxis2',component_property='value'),
     Input(component_id='yaxis2',component_property='value'),
     Input(component_id='state_filter',component_property='value')]
)
def update_graph(xaxis2, yaxis2, state_filter):

    dff = df.copy()
    
    dff = dff[dff['STATE'].isin(state_filter)]

    # Plotly Express
    fig = px.scatter(
        dff, 
        x=xaxis2,
        y=yaxis2,
        opacity=0.8
    )
        

    return fig

In [52]:
@app.callback(
    Output(component_id='corrplot',component_property='figure'),
    Input(component_id='correlation_filter',component_property='value')
)
def create_corrplot(correlation_filter):

    dff = df[correlation_filter]
    df_corr = dff.corr()

    x = list(df_corr.columns)
    y = list(df_corr.index)
    z = np.array(df_corr)

    fig = ff.create_annotated_heatmap(
        z,
        x = x,
        y = y ,
        annotation_text = np.around(z, decimals=2),
        hoverinfo='z',
        colorscale='Viridis'
    )
        
    return fig

In [53]:
@app.callback(
    Output(component_id='lineplot',component_property='figure'),
    [Input(component_id='xaxis2',component_property='value'),
     Input(component_id='state_filter',component_property='value')]
)
def create_lineplot(xaxis2, state_filter):

    dff = df.copy()
    
    dff = dff[dff['STATE'].isin(state_filter)]

    # Plotly Express
    fig = px.line(
        dff, 
        x='YEAR',
        y=xaxis2,
        color='STATE'
    )
        

    return fig

In [54]:
if __name__ == '__main__':
    app.run_server(debug=True, use_reloader=False)

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on
