# **Health Data Dashboard**

In [2]:
# Importing necessary packages
import pandas as pd
import numpy as np
import plotly.express as px

from dash import Dash, html, dcc, callback, Output, Input
import dash_ag_grid as dag
import dash_bootstrap_components as dbc

## Logistic Regression


## KNN
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score, balanced_accuracy_score, confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import LabelEncoder
import plotly.express as px
from pandas.core.groupby.indexing import GroupByIndexingMixin

## Hierarchical Clustering



In [3]:
# Importing and cleaning the data

df = pd.read_csv('CDC-2019-2021-2023-DATA.csv',low_memory=False)
df = df.query("IYEAR != 2024").dropna().drop('Unnamed: 0', axis=1)
df.ADDEPEV3 = df['ADDEPEV3'].replace({'Yes':1,'No':0}).astype(float)
df.head()

Unnamed: 0,BIRTHSEX,MENTHLTH,POORHLTH,ADDEPEV3,DECIDE,DIFFALON,ACEDEPRS,ACEDRINK,ACEDRUGS,ACEPRISN,ACEDIVRC,ACEPUNCH,ACEHURT1,ACESWEAR,ACETOUCH,ACETTHEM,ACEHVSEX,IYEAR
309121,Male,0.0,3.0,0.0,No,No,No,Yes,Yes,Yes,No,More than once,More than once,More than once,Never,Never,Never,2019
309123,Male,0.0,1.0,0.0,No,No,No,No,No,No,No,Never,Never,Never,Never,Never,Never,2019
309124,Male,1.0,0.0,0.0,No,No,No,No,No,No,No,Never,More than once,More than once,Never,Never,Never,2019
309126,Female,0.0,0.0,0.0,No,No,No,No,No,No,No,Never,Never,Never,More than once,Never,Never,2019
309133,Female,1.0,0.0,0.0,No,No,No,No,No,No,No,Never,Once,Never,Never,Never,Never,2019


In [5]:
def do_logit(test_sz, thresh):
    return 

In [None]:
def do_knn():
    y = df['ADDEPEV3']
    X = df[['BIRTHSEX', 'MENTHLTH', 'POORHLTH','DECIDE', 'DIFFALON', 'IYEAR', 'ACEDEPRS', 'ACEDRINK', 'ACEDRUGS','ACEPRISN', 
            'ACEDIVRC', 'ACEPUNCH', 'ACEHURT1', 'ACESWEAR','ACETOUCH','ACETTHEM', 'ACEHVSEX']]
    
    nums = ['POORHLTH', 'MENTHLTH']
    cats = ['IYEAR', 'BIRTHSEX', 'ACEDEPRS', 'DECIDE', 'DIFFALON', 'ACEDRINK', 'ACEDRUGS','ACEPRISN', 'ACEDIVRC', 'ACEPUNCH',
            'ACEHURT1', 'ACESWEAR','ACETOUCH','ACETTHEM', 'ACEHVSEX']
    
    preprocess = ColumnTransformer(transformers=[('encoder',OneHotEncoder(drop='first'),cats),
                                                 ('numeric','passthrough',nums)])
    
    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=42,stratify=y)

    pipe = Pipeline([("preprocess", preprocess),
                     ("scaler",StandardScaler()),
                     ("knn",KNeighborsClassifier(weights="distance"))
                    ])
    
    param_grid = {"knn__n_neighbors": range(1, 41, 2)}
    grid = GridSearchCV(pipe, param_grid, cv=5, scoring="balanced_accuracy", n_jobs=-1)
    grid.fit(X_train, y_train)

    results_df = pd.DataFrame(grid.cv_results_)

    results_df["k"] = results_df["param_knn__n_neighbors"]
    results_df["mean_score"] = results_df["mean_test_score"]

    best_k = grid.best_params_["knn__n_neighbors"]
    best_score = grid.best_score_

    fig = px.line(
        results_df,
        x="k",
        y="mean_score",
        title=f"Cross-Validated Balanced Accuracy vs. K (best k = {best_k})",
        markers=True,
        labels={"k": "Number of Neighbors (k)", "mean_score": "Mean CV Balanced Accuracy"}
    )


    fig.add_scatter(
        x=[best_k],
        y=[best_score],
        mode="markers+text",
        text=[f"Best k = {best_k}"],
        textposition="top center",
        name="Best k"
    )

    fig.update_layout(hovermode="x unified")
    return fig


In [7]:
def do_hier():
    return

In [24]:
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = Dash(__name__,external_stylesheets=external_stylesheets)

#app = Dash()

app.layout = html.Div([
    html.H1(children='Behavioral Risk Mental Health Dashboard'),
    dcc.Tabs(id='tabs', value='tab1', 
        children=[
            dcc.Tab(label='README: Project Overview', value='tab1'),
            dcc.Tab(label='Data Table', value='tab2'),
            dcc.Tab(label='Models', value='tab3',
                    children = [dcc.Tabs(id='subtabs', value='knn_tab',
                                         children = [dcc.Tab(label='KNN Model',value='knn_tab')]),
                                html.Div(id='sub-tabs-content')]
                    )
                 ]
            ),
    html.Div(id='tabs-content')
    ])

@callback(Output('tabs-content', 'children'),
          Input('tabs', 'value'))

def render_content(tab):
    if tab == 'tab1':
        return html.Div([
                html.H2('Behavioral Risk Mental Health Dashboard: Predicting Mental Health with Behavioral Risk Factor Variables'),
                html.P('''This app uses behavioral risk variables from 2019, 2021, and 2023 to predict mental health outcomes,
                          focusing primarily on variables relating to adverse childhood experiences, as well as a few other variables.'''),
                
                html.H3('About the Dataset'),
                html.P('''This dataset comes from the CDC\'s Behavioral Risk Factor Surveillance System,
                          a system of comprehensive telephone surveys conducted every year regarding health-related risk behaviors,
                          chronic health conditions, and use of preventative health services for adults in the United States. Each row
                          represents a single respondent with variables including birth sex, year survey was taken,  and 
                       ''' ),

                html.H3('Target Variable (in the models that utilize one)'),
                html.P([html.B('ADDEPEV3: '),'''Answer to survey question: (Ever told) (you had) a depressive disorder 
                                                (including depression, major depression, dysthymia, or minor depression)?''']),
                html.H3('Predictor Variables'),
                html.Ul([
                    html.Li([html.B('BIRTHSEX: '),'Assigned sex of respondent at birth']),
                    html.Li([html.B('IYEAR: '), 'Year the respondent took the survey']),
                    html.Li([html.B('POORHLTH: '),'''Answer to survey question: During the past 30 days, for about how many days did poor physical or mental health 
                                                   keep you from doing your usual activities, such as self-care, work, or recreation?''']),
                    html.Li([html.B('MENTHLTH: '), '''Answer to survey question:Now thinking about your mental health, 
                                                     which includes stress, depression, and problems with emotions, 
                                                     for how many days during the past 30 days was your mental health not good?''']),
                    html.Li([html.B('DECIDE: '), '''Answer to survey question: Because of a physical, mental, or emotional condition, 
                                                   do you have serious difficulty concentrating, remembering, or making decisions?''']),
                    html.Li([html.B('DIFFALON: '), '''Answer to survey question: Because of a physical, mental, or emotional condition, 
                                                     do you have difficulty doing errands alone such as visiting a doctor's office or shopping?''']),
                    html.Li([html.B('ACEDEPRS: '), 'Answer to survey question: (As a child) Did you live with anyone who was depressed, mentally ill, or suicidal?']),
                    html.Li([html.B('ACEDRINK: '), 'Answer to survey question: (As a child) Did you live with anyone who was a problem drinker or alcoholic?']),
                    html.Li([html.B('ACEDRUGS: '), 'Answer to survey question: (As a child) Did you live with anyone who used illegal street drugs or who abused prescription medications?']),
                    html.Li([html.B('ACEPRISN: '), 'Answer to survey question: (As a child) Did you live with anyone who served time or was sentenced to serve time in a prison, jail, or other correctional facility?']),
                    html.Li([html.B('ACEDIVRC: '), 'Answer to survey question: (As a child) Were your parents separated or divorced?']),
                    html.Li([html.B('ACEPUNCH: '), 'Answer to survey question: (As a child) How often did your parents or adults in your home ever slap, hit, kick, punch or beat each other up?']),
                    html.Li([html.B('ACEHURT1: '), 'Answer to survey question: (As a child) Not including spanking, (before age 18), how often did a parent or adult in your home ever hit, beat, kick, or physically hurt you in any way?']),
                    html.Li([html.B('ACESWEAR: '), 'Answer to survey question: (As a child) How often did a parent or adult in your home ever swear at you, insult you, or put you down']),
                    html.Li([html.B('ACETOUCH: '), 'Answer to survey question: (As a child) How often did anyone at least 5 years older than you or an adult, ever touch you sexually?']),
                    html.Li([html.B('ACETTHEM: '), 'Answer to survey question: (As a child) How often did anyone at least 5 years older than you or an adult, try to make you touch them sexually?']),
                    html.Li([html.B('ACEHVSEX: '), 'Answer to survey question: (As a child) How often did anyone at least 5 years older than you or an adult, force you to have sex?']),
                        ]),

                html.H3('Key Features of Dashboard'),
                html.Ul([
                    html.Li('View rows of the final cleaned dataset in the Data Table Tab'),
                    html.Li('Select from Logistic Regression, K Nearest Neighbor, or Hierarchical Agglomerative Clustering models'),
                    html.Li('Change hyperparameters, such as number of neighbors and train test split, to your liking to view different versions of the model')
                        ]),

                html.H3('Instructions for Use'),
                html.P('hello'),

                html.H3('Authors'),
                html.P('''Randa Ampah, Isabel Delgado, Aysha Hussen, Aniyah McWilliams, 
                          and Jessica Oseghale for the DS 6021 Final Project in the Fall 
                          25 semester of the UVA MSDS program''')

        ])
    
    if tab == 'tab2':
        return html.Div([
                dag.AgGrid(
                    rowData=df.to_dict('records'),
                    columnDefs=[{"field": i} for i in df.columns]
                          )
        ])
    
    if tab == 'tab3':
        return html.H3('sidebar with choices here?')

@callback(Output('sub-tabs-content', 'children'),
          Input('subtabs', 'value'))

def update_subtab(subtabs):
    if subtabs == "knn_tab":
        colors = {'background': '#7FDBFF','text': '#111111'}
        fig = do_knn()
        return html.Div(style={'backgroundColor': colors['background']}, 
                        children=[html.H1(children='KNN Classifier Dashboard',
                                          style={'textAlign': 'center',
                                                 'color': colors['text']
                                                }
                        ),

                                 html.Div(children='Model for KNN Classifier',
                                          style={'textAlign': 'center',
                                                 'color': colors['text']}
                                         ),
                                         
                                 dcc.Graph(figure=fig)
                                 ]
                        )
    #elif subtabs == "logit_tab":
        #return html.Div("Logit")
    
    

if __name__ == '__main__':
    app.run(debug=True)