In [1]:
# Run this app with `python app.py` and
# visit http://127.0.0.1:8050/ in your web browser.
from jupyter_dash import JupyterDash
from dash import Dash, html, dcc
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
from dash import Dash, dcc, html, Input, Output  # pip install dash
from preprocess import preprocess
import os.path

app = JupyterDash(__name__)


In [2]:
# preprocess the data; if the data has been preprocessed, just read the preprocessed data
file_name = "atussum_0321.csv"
processed_file_name = "processed_atussum_0321.csv"

is_processed_file_exists = os.path.exists(processed_file_name)
if is_processed_file_exists:
    df = pd.read_csv(processed_file_name)
else:
    df = preprocess(file_name)
    df.to_csv(processed_file_name)

df.dropna()

app.layout = html.Div([

    html.H1("A Web App for the Subgroup Analysis of the ATUS Dataset", style={'text-align': 'center'}),

    html.Div([
        "Select up to four categories, continous variables are labeled (c), discrete variables are labeled (d)",
        dcc.Dropdown(id="slct_ctgr",
                 options=[
                     {"label": "(d): Labor Status", "value": 0},
                     {"label": "(d): Education Level", "value": 1},
                     {"label": "(d): Sex", "value": 2},
                     {"label": "(d): Hispanic", "value": 3},
                     {"label": "(d): Metropolitan Status", "value": 4},
                     {"label": "(c): Age", "value": 5},
                     {"label": "(c): Weekly Income", "value": 6}],
                 multi=True,
                 value=[0,1],
                 style={'width': "80%", 'margin': 'auto'}
                 ),
        
    ], style={'margin': 'auto', 'text-align': 'center', 'margin': '2px'}
    ),
    
    html.Div(id='output_container', children=[], 
             style={'margin': 'auto', 'color': 'red', 'text-align': 'center'}
    ),
    html.Br(),
    
    html.Div([
        "Select the number of bins for continous variables",
        dcc.Slider(2, 10, 1,
               value=5,
               id='my-slider'
            )
        ],
        style={'width': "40%", 
               'margin': 'auto',
              'text-align': 'center'}
    ),
    

    
 
    html.Div(
        dcc.Graph(id='main_fig', figure={}),
        style={
                'margin': 'auto',
                'margin-left': 'auto',
                'margin-right': 'auto',
                'display': 'block',
                'margin-bottom': '3vw'
         }
    )
], style={'font-family': 'Garamond, Arial, sans-serif'})


@app.callback(
    [Output(component_id='output_container', component_property='children'),
     Output(component_id='main_fig', component_property='figure')],
    [Input(component_id='slct_ctgr', component_property='value'),
    Input(component_id='my-slider', component_property='value')]
)
def update_graph(option_slctd,bin_slctd):
    
    option_slctd.sort(reverse = True)
    my_dist = ['labor_status','education_level','sex','Hispanic','Metropolitan_status','age','weekly_income']
    bin_size = bin_slctd
    
    if(len(option_slctd) == 1): 
        container = "" #"The categories chosen was: {}".format(option_slctd)
        fig = px.histogram(df, x=my_dist[option_slctd[0]])
        return container, fig
        
    if(len(option_slctd) == 2):       
        container = "" #"The categories chosen was: {}".format(option_slctd)
        group_list = []
        for i in range(len(option_slctd)):
            if option_slctd[i] == 5 or option_slctd[i] == 6:
                cuts = pd.cut(df[my_dist[option_slctd[i]]], bins=bin_slctd)
                group_list.append(cuts)
            else:
                group_list.append(my_dist[option_slctd[i]])
        cnt = df.groupby(group_list).size().reset_index(name='count')
        for i in range(len(cnt.columns) - 1):
            cnt[cnt.columns[i]] = cnt[cnt.columns[i]].astype(str)
        # Plotly Express
        fig = px.scatter(cnt, x=my_dist[option_slctd[0]], y=my_dist[option_slctd[1]],size='count', size_max=60)
        fig.update_xaxes(type='category')
        fig.update_yaxes(type='category')
        return container, fig
    
    elif(len(option_slctd) == 3):
        container = "" #"The categories chosen was: {}".format(option_slctd)
        group_list = []
        for i in range(len(option_slctd)):
            if option_slctd[i] == 5 or option_slctd[i] == 6:
                cuts = pd.cut(df[my_dist[option_slctd[i]]], bins=bin_slctd)
                group_list.append(cuts)
            else:
                group_list.append(my_dist[option_slctd[i]])
        
        cnt = df.groupby(group_list).size().reset_index(name='count')
        for i in range(len(cnt.columns) - 1):
            cnt[cnt.columns[i]] = cnt[cnt.columns[i]].astype(str)
        # Plotly Express
        fig = px.scatter(cnt, x=my_dist[option_slctd[0]], y=my_dist[option_slctd[1]],size='count', size_max=60,
                        color=my_dist[option_slctd[2]],color_discrete_sequence=px.colors.qualitative.Plotly)
        fig.update_xaxes(type='category')
        fig.update_yaxes(type='category')
        return container, fig
    
    elif(len(option_slctd) == 4):
        container = "Category limit reached, you cannot add more"
        group_list = []
        for i in range(len(option_slctd)):
            if option_slctd[i] == 5 or option_slctd[i] == 6:
                cuts = pd.cut(df[my_dist[option_slctd[i]]], bins=bin_slctd)
                group_list.append(cuts)
            else:
                group_list.append(my_dist[option_slctd[i]])
        
        cnt = df.groupby(group_list).size().reset_index(name='count')
        for i in range(len(cnt.columns) - 1):
            cnt[cnt.columns[i]] = cnt[cnt.columns[i]].astype(str)
        # Plotly Express
        fig = px.scatter_3d(cnt, x=my_dist[option_slctd[0]], y=my_dist[option_slctd[1]],z=my_dist[option_slctd[2]],\
                         size='count', size_max=60,
                        color=my_dist[option_slctd[3]],color_discrete_sequence=px.colors.qualitative.Plotly)
        fig.update_xaxes(type='category')
        fig.update_yaxes(type='category')

        return container, fig
    
    else:
        container = "A default graph is showing during transitioning ,The categories chosen was: None!"
        cnt = df.groupby(['education_level', 'sex']).size().reset_index(name='count')
        for i in range(len(cnt.columns) - 1):
            cnt[cnt.columns[i]] = cnt[cnt.columns[i]].astype(str)
        fig = px.scatter(cnt, x='education_level', y='sex',size='count', size_max=60)
        fig.update_xaxes(type='category')
        fig.update_yaxes(type='category')
        return container, fig

In [3]:
if __name__ == '__main__':
    app.run_server(debug=True)

Dash app running on http://127.0.0.1:8050/
