## Dashboard

In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import ipywidgets as widgets
from IPython.display import display, clear_output
import warnings
from dash import Dash, dcc, html, Input, Output, callback

warnings.filterwarnings("ignore")

In [2]:
# ============================================
# 1. Load Data
# ============================================
print("Loading dataset...")
df = pd.read_csv("cleaned_data.csv")
print(f"Data loaded! Shape: {df.shape}")
print("\nFirst 5 rows:")
print(df.head())

Loading dataset...
Data loaded! Shape: (2763102, 22)

First 5 rows:
   Year Locationabbr                      Class       Topic  \
0  2023           AK  Chronic Health Indicators  Depression   
1  2023           AK  Chronic Health Indicators  Depression   
2  2023           AK  Chronic Health Indicators  Depression   
3  2023           AK  Chronic Health Indicators  Depression   
4  2023           AK  Chronic Health Indicators  Depression   

                                            Question Response Break_Out  \
0  Ever told you that you have a form of depression?      Yes   Overall   
1  Ever told you that you have a form of depression?       No   Overall   
2  Ever told you that you have a form of depression?      Yes      Male   
3  Ever told you that you have a form of depression?       No      Male   
4  Ever told you that you have a form of depression?      Yes    Female   

  Break_Out_Category  Sample_Size  Data_value  ...  ClassId  TopicId  \
0            Overall          

In [3]:
# ============================================
# 2. Basic Data Exploration
# ============================================
print("\n" + "=" * 50)
print("Basic Dataset Information:")
print("=" * 50)
print(df.info())

print("\nColumn names:")
print(df.columns.tolist())

print("\nMissing Value Summary:")
print(df.isnull().sum())

print("\nBreak_Out value counts:")
print(df["Break_Out"].value_counts())

print("\nNumber of unique survey questions:")
print(f"There are {df['Question'].nunique()} unique questions.")


Basic Dataset Information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2763102 entries, 0 to 2763101
Data columns (total 22 columns):
 #   Column                 Dtype  
---  ------                 -----  
 0   Year                   int64  
 1   Locationabbr           object 
 2   Class                  object 
 3   Topic                  object 
 4   Question               object 
 5   Response               object 
 6   Break_Out              object 
 7   Break_Out_Category     object 
 8   Sample_Size            float64
 9   Data_value             float64
 10  Confidence_limit_Low   float64
 11  Confidence_limit_High  float64
 12  ClassId                object 
 13  TopicId                object 
 14  BreakoutID             object 
 15  BreakOutCategoryID     object 
 16  QuestionID             object 
 17  ResponseID             object 
 18  GeoLocation            object 
 19  QuestionID_original    object 
 20  proportion             float64
 21  persons                flo

In [9]:
# ============================================
# 3. Data Preprocessing Functions
# ============================================


def get_questions():
    """Return all unique survey questions."""
    return sorted(df["Question"].unique())


def filter_data_by_question(question):
    """Filter dataset for the selected question."""
    return df[df["Question"] == question].copy()


def get_breakout_data(question_df, breakout_category):
    """
    Get data for a specific breakout group.

    Parameters:
    - question_df: data filtered by question
    - breakout_category: value in Break_Out_Category (e.g., 'Sex', 'Age Group', 'Overall')

    Note:
    - Break_Out_Category describes the grouping type (Sex, Age Group, etc.)
    - Break_Out describes the specific category value (Male, Female, 18-24, etc.)
    """
    if breakout_category == "Overall":
        return question_df[question_df["Break_Out"] == "Overall"].copy()
    else:
        return question_df[
            question_df["Break_Out_Category"] == breakout_category
        ].copy()

# ============================================
# 4. Visualization Functions
# ============================================
def plot_top_bottom_states(data, title, n_states=10):
    """
Draw a comparison chart of Top and Bottom states

    Parameters:
    -data: data frame
    -title: chart title
    -n_states: display the number of states before/after (default 10)
    """
    if data.empty:
        print(f"No data: {title}")
        return None

    # NaN
    data = data.dropna(subset=["Data_value"])

    if data.empty:
        print(f"No valid data: {title}")
        return None

    # Get all response types
    responses = data["Response"].unique()

    # Create subgraphs for each response
    fig = make_subplots(
        rows=len(responses),
        cols=1,
        subplot_titles=[f"Response: {resp}" for resp in responses],
        vertical_spacing=0.15,
    )

    for idx, response in enumerate(responses, 1):
        response_data = data[data["Response"] == response].copy()

        # sort by value
        response_data = response_data.sort_values("Data_value", ascending=False)

        # get top N + Bottom N
        top_n = response_data.head(n_states)
        bottom_n = response_data.tail(n_states)

        # merge and tag
        top_n["Category"] = f"Top {n_states}"
        bottom_n["Category"] = f"Bottom {n_states}"
        combined = pd.concat([top_n, bottom_n])
        combined = combined.sort_values("Data_value", ascending=True)

        # color
        colors = [
            "#2ecc71" if cat == f"Top {n_states}" else "#e74c3c"
            for cat in combined["Category"]
        ]

        # horizontal bar chart
        fig.add_trace(
            go.Bar(
                y=combined["Locationabbr"],
                x=combined["Data_value"],
                orientation="h",
                marker=dict(color=colors),
                text=combined["Data_value"].round(1),
                texttemplate="%{text}%",
                textposition="outside",
                name=response,
                error_x=dict(
                    type="data",
                    symmetric=False,
                    array=combined["Confidence_limit_High"] - combined["Data_value"],
                    arrayminus=combined["Data_value"]
                    - combined["Confidence_limit_Low"],
                ),
                hovertemplate="<b>%{y}</b><br>"
                + "Value: %{x:.1f}%<br>"
                + "CI: [%{customdata[0]:.1f}%, %{customdata[1]:.1f}%]<br>"
                + "<extra></extra>",
                customdata=np.column_stack(
                    (
                        combined["Confidence_limit_Low"],
                        combined["Confidence_limit_High"],
                    )
                ),
                showlegend=False,
            ),
            row=idx,
            col=1,
        )

        #Update x-axis labels
        fig.update_xaxes(title_text="Percentage (%)", row=idx, col=1)
        fig.update_yaxes(title_text="State", row=idx, col=1)

    fig.update_layout(
        title=title,
        height=400 * len(responses),
        template="plotly_white",
        showlegend=False,
    )

    return fig


def plot_bar_with_ci(data, title, x_column="Break_Out", show_text=False):
    """
    Plot a histogram with confidence intervals

    Parameters:
    -data: data frame
    -title: chart title
    -x_column: the column name used for the X axis
    -show_text: whether to display values on the histogram (default False, avoid overlap)
    """
    if data.empty:
        print(f"no data: {title}")
        return None

    # NaN
    data = data.dropna(
        subset=["Data_value", "Confidence_limit_Low", "Confidence_limit_High"]
    )

    if data.empty:
        print(f"No valid data: {title}")
        return None

    # Group by response
    responses = data["Response"].unique()

    fig = go.Figure()

    for response in responses:
        response_data = data[data["Response"] == response].copy()
        response_data = response_data.sort_values(x_column)

        # error value
        error_y_array = (
            response_data["Confidence_limit_High"] - response_data["Data_value"]
        )
        error_y_arrayminus = (
            response_data["Data_value"] - response_data["Confidence_limit_Low"]
        )

        # bar chart
        fig.add_trace(
            go.Bar(
                x=response_data[x_column],
                y=response_data["Data_value"],
                name=response,
                error_y=dict(
                    type="data",
                    symmetric=False,
                    array=error_y_array,
                    arrayminus=error_y_arrayminus,
                    visible=True,
                ),
                # Remove text labels to avoid overlap
                text=response_data["Data_value"].round(1) if show_text else None,
                textposition="outside" if show_text else None,
                texttemplate="%{text}%" if show_text else None,
                # hover info
                hovertemplate="<b>%{x}</b><br>"
                + response
                + ": %{y:.1f}%<br>"
                + "CI: [%{customdata[0]:.1f}%, %{customdata[1]:.1f}%]<br>"
                + "<extra></extra>",
                customdata=np.column_stack(
                    (
                        response_data["Confidence_limit_Low"],
                        response_data["Confidence_limit_High"],
                    )
                ),
            )
        )

    fig.update_layout(
        title=title,
        xaxis_title=x_column,
        yaxis_title="Percentage (%)",
        height=500,
        barmode="group",  # Grouped bar chart
        hovermode="x unified",  
        template="plotly_white",
        showlegend=True,
    )

    return fig


def plot_heatmap(data, title, x_column="Break_Out", simplified=False):
    """Plot heatmap"""
    if data.empty:
        return go.Figure().add_annotation(
            text="No data available",
            xref="paper",
            yref="paper",
            x=0.5,
            y=0.5,
            showarrow=False,
        )

    data = data.dropna(subset=["Data_value"])
    if data.empty:
        return go.Figure().add_annotation(
            text="No valid data after removing NaN",
            xref="paper",
            yref="paper",
            x=0.5,
            y=0.5,
            showarrow=False,
        )

    # Â¶ÇÊûúÊòØÁÆÄÂåñËßÜÂõæÔºåÂêàÂπ∂Âπ¥ÈæÑÁªÑ
    if simplified and x_column == "Break_Out":

        def map_age_group(age):
            if age in ["18-24", "25-34", "35-44"]:
                return "18-44"
            elif age in ["45-54", "55-64"]:
                return "45-64"
            elif age in ["65+", "65 or older"]:
                return "65+"
            else:
                return age

        data["Age_Group_Simplified"] = data[x_column].apply(map_age_group)

        # ÈáçÊñ∞ËÆ°ÁÆóÁÆÄÂåñÂêéÁöÑÂπ≥ÂùáÂÄº
        pivot_data = (
            data.groupby(["Response", "Age_Group_Simplified"])["Data_value"]
            .mean()
            .reset_index()
        )
        pivot_data = pivot_data.pivot_table(
            values="Data_value",
            index="Response",
            columns="Age_Group_Simplified",
            aggfunc="mean",
        )

        # ÊéíÂ∫èÂàó
        simplified_order = ["18-44", "45-64", "65+"]
        cols = pivot_data.columns.tolist()
        sorted_cols = [col for col in simplified_order if col in cols]
        sorted_cols += [col for col in cols if col not in sorted_cols]
        pivot_data = pivot_data[sorted_cols]
    else:
        pivot_data = data.pivot_table(
            values="Data_value", index="Response", columns=x_column, aggfunc="mean"
        )

        age_order = ["18-24", "25-34", "35-44", "45-54", "55-64", "65+", "65 or older"]
        if x_column == "Break_Out":
            cols = pivot_data.columns.tolist()
            sorted_cols = [col for col in age_order if col in cols]
            sorted_cols += [col for col in cols if col not in sorted_cols]
            pivot_data = pivot_data[sorted_cols]

    fig = go.Figure(
        data=go.Heatmap(
            z=pivot_data.values,
            x=pivot_data.columns,
            y=pivot_data.index,
            colorscale="RdYlGn_r",
            text=pivot_data.values.round(1),
            texttemplate="%{text}%",
            textfont={"size": 14 if simplified else 12},
            colorbar=dict(title="Percentage (%)"),
            hovertemplate="<b>%{y}</b><br>%{x}<br>Value: %{z:.1f}%<extra></extra>",
        )
    )

    fig.update_layout(
        title=title,
        xaxis_title=x_column if not simplified else "Age Group (Simplified)",
        yaxis_title="Response",
        height=400,
        template="plotly_white",
        margin=dict(l=50, r=50, t=80, b=50),
    )

    return fig


def plot_confidence_intervals(data, title, x_column="Break_Out"):
    """
    Plot confidence interval chart for a given dataset.

    Parameters:
    - data: DataFrame
    - title: plot title
    - x_column: column to use on the X-axis (default: Break_Out)
    """
    if data.empty:
        print(f"No data available to plot: {title}")
        return None

    # List all response categories
    responses = data["Response"].unique()

    fig = go.Figure()

    for response in responses:
        response_data = data[data["Response"] == response].copy()

        # Remove rows containing NaN values
        response_data = response_data.dropna(
            subset=["Data_value", "Confidence_limit_Low", "Confidence_limit_High"]
        )
        if response_data.empty:
            continue

        # Sort by X-axis
        response_data = response_data.sort_values(x_column)

        # Add confidence interval line
        fig.add_trace(
            go.Scatter(
                x=response_data[x_column],
                y=response_data["Data_value"],
                error_y=dict(
                    type="data",
                    symmetric=False,
                    array=response_data["Confidence_limit_High"]
                    - response_data["Data_value"],
                    arrayminus=response_data["Data_value"]
                    - response_data["Confidence_limit_Low"],
                ),
                mode="markers+lines",
                name=response,
                marker=dict(size=8),
            )
        )

    fig.update_layout(
        title=title,
        xaxis_title=x_column,
        yaxis_title="Percentage (%)",
        height=500,
        hovermode="closest",
        template="plotly_white",
    )

    return fig

In [10]:
# # ============================================
# # 5. Interactive Dashboard
# # ============================================


# def create_dashboard():
#     """Create the full interactive dashboard."""

#     # Generate list of questions
#     questions = get_questions()

#     # Dropdown widget for selecting a question
#     question_dropdown = widgets.Dropdown(
#         options=questions,
#         description="Select Question:",
#         style={"description_width": "100px"},
#         layout=widgets.Layout(width="90%"),
#     )

#     # Output display area
#     output = widgets.Output()

#     def on_question_change(change):
#         """Callback when the selected question changes."""
#         with output:
#             clear_output(wait=True)

#             selected_question = change["new"]
#             print(f"\nAnalyzing Question: {selected_question}\n")

#             # Filter data
#             question_df = filter_data_by_question(selected_question)

#             # 1. Overall
#             print("1. Plotting Overall data...")
#             overall_data = get_breakout_data(question_df, "Overall")
#             fig1 = plot_bar_with_ci(
#                 overall_data, "Overall - Confidence Intervals", "Response"
#             )
#             if fig1:
#                 fig1.show()

#             # 2. By Gender (Sex)
#             print("2. Plotting Sex Group data...")
#             gender_data = get_breakout_data(question_df, "Sex")
#             if not gender_data.empty:
#                 fig2 = plot_bar_with_ci(
#                     gender_data, "By Gender - Confidence Intervals", "Break_Out"
#                 )
#                 fig2.show()
#             else:
#                 print("   No sex group data found.")

#             # 3. By Age Group
#             print("3. Plotting Age Group data...")
#             age_data = get_breakout_data(question_df, "Age Group")
#             if not age_data.empty:
#                 fig3 = plot_heatmap(
#                     age_data, "By Age Group - Confidence Intervals", "Break_Out"
#                 )
#                 fig3.show()
#             else:
#                 print("   No age group data found.")

#             # 4. By Location (State/Region)
#             print("4. Plotting Location data...")
#             location_data = question_df[question_df["Break_Out"] == "Overall"].copy()
#             if not location_data.empty:
#                 fig4 = plot_top_bottom_states(
#                     location_data, "By Location - Confidence Intervals"
#                 )
#                 fig4.show()
#             else:
#                 print("   No location data found.")

#             # 5. By Education
#             print("5. Plotting Education Level data...")
#             edu_data = get_breakout_data(question_df, "Education Attained")
#             if not edu_data.empty:
#                 fig5 = plot_bar_with_ci(
#                     edu_data, "By Education Level - Confidence Intervals", "Break_Out"
#                 )
#                 fig5.show()
#             else:
#                 print("   No education data found.")

#             # 6. By Income
#             print("6. Plotting Income Level data...")
#             income_data = get_breakout_data(question_df, "Household Income")
#             if not income_data.empty:
#                 fig6 = plot_bar_with_ci(
#                     income_data,
#                     "By Household Income - Confidence Intervals",
#                     "Break_Out",
#                 )
#                 fig6.show()
#             else:
#                 print("   No income data found.")

#             # 7. Time Trend (Year)
#             print("7. Plotting Time Trend data...")
#             temporal_data = question_df[question_df["Break_Out"] == "Overall"].copy()
#             if not temporal_data.empty:
#                 fig7 = plot_confidence_intervals(
#                     temporal_data, "Temporal Trend - Confidence Intervals", "Year"
#                 )
#                 fig7.show()
#             else:
#                 print("   No temporal data found.")

#             print("\n‚úÖ All plots generated successfully!")

#     # Bind callback
#     question_dropdown.observe(on_question_change, names="value")

#     # Display widgets
#     display(widgets.VBox([question_dropdown, output]))

#     # Auto-select first question
#     if len(questions) > 0:
#         question_dropdown.value = questions[0]

In [11]:
# # ============================================
# # 7. Launch Dashboard
# # ============================================
# print("\n" + "=" * 50)
# print("Launching Interactive Dashboard...")
# print("=" * 50)
# create_dashboard()

In [16]:
# ============================================
# 4. Initialize Dash App
# ============================================

app = Dash(__name__)
app.title = "BRFSS Interactive Dashboard"

# ============================================
# 5. App Layout
# ============================================

app.layout = html.Div(
    [
        # Header
        html.Div(
            [
                html.H1(
                    "BRFSS Interactive Dashboard",
                    style={
                        "textAlign": "center",
                        "color": "#2c3e50",
                        "marginBottom": "10px",
                    },
                ),
                html.P(
                    "Behavioral Risk Factor Surveillance System - CDC Survey Data Analysis",
                    style={
                        "textAlign": "center",
                        "color": "#7f8c8d",
                        "fontSize": "16px",
                    },
                ),
            ],
            style={
                "backgroundColor": "#ecf0f1",
                "padding": "20px",
                "marginBottom": "30px",
            },
        ),
        # Question Selection
        html.Div(
            [
                html.Label(
                    "Select Survey Question:",
                    style={
                        "fontSize": "18px",
                        "fontWeight": "bold",
                        "marginBottom": "10px",
                    },
                ),
                dcc.Dropdown(
                    id="question-dropdown",
                    options=[{"label": q, "value": q} for q in get_questions()],
                    value=get_questions()[0] if len(get_questions()) > 0 else None,
                    style={"width": "100%"},
                ),
            ],
            style={"width": "90%", "margin": "0 auto", "marginBottom": "40px"},
        ),
        # Graphs Container
        html.Div(
            [
                # Overall
                html.Div(
                    [dcc.Graph(id="overall-graph")], style={"marginBottom": "30px"}
                ),
                # By Gender
                html.Div(
                    [dcc.Graph(id="gender-graph")], style={"marginBottom": "30px"}
                ),
                
                # By Age Group (Heatmap)
                html.Div(
                    [
                        html.Div(
                            [
                                html.Label(
                                    "Age Group Detail Level:",
                                    style={
                                        "fontSize": "16px",
                                        "fontWeight": "bold",
                                        "marginRight": "15px",
                                        "display": "inline-block",
                                    },
                                ),
                                dcc.RadioItems(
                                    id="age-detail-level",
                                    options=[
                                        {"label": " More (7 groups)", "value": "more"},
                                        {"label": " Less (3 groups)", "value": "less"},
                                    ],
                                    value="more",
                                    inline=True,
                                    style={"display": "inline-block"},
                                ),
                            ],
                            style={"marginBottom": "15px", "textAlign": "center"},
                        ),
                        dcc.Graph(id="age-graph"),
                    ],
                    style={"marginBottom": "30px"},
                ),
                
                # By Location (Top/Bottom)
                html.Div(
                    [dcc.Graph(id="location-graph")], style={"marginBottom": "30px"}
                ),
                # By Education (Bar)
                html.Div(
                    [dcc.Graph(id="education-graph")], style={"marginBottom": "30px"}
                ),
                # By Income
                html.Div(
                    [dcc.Graph(id="income-graph")], style={"marginBottom": "30px"}
                ),
                # Temporal (Key Years)
                html.Div(
                    [dcc.Graph(id="temporal-graph")], style={"marginBottom": "30px"}
                ),
            ],
            style={"width": "90%", "margin": "0 auto"},
        ),
        # Footer
        html.Div(
            [
                html.P(
                    "Data Source: CDC BRFSS | Dashboard created with Plotly Dash",
                    style={
                        "textAlign": "center",
                        "color": "#95a5a6",
                        "fontSize": "14px",
                    },
                )
            ],
            style={"marginTop": "50px", "marginBottom": "20px"},
        ),
    ],
    style={"fontFamily": "Arial, sans-serif", "backgroundColor": "#ffffff"},
)

# ============================================
# 6. Callbacks
# ============================================


@callback(
    [
        Output("overall-graph", "figure"),
        Output("gender-graph", "figure"),
        Output("age-graph", "figure"),
        Output("location-graph", "figure"),
        Output("education-graph", "figure"),
        Output("income-graph", "figure"),
        Output("temporal-graph", "figure"),
    ],
    [Input("question-dropdown", "value"), Input("age-detail-level", "value")],
)
def update_all_graphs(selected_question, age_detail_level):
    """Update all graphs when question changes"""
    if not selected_question:
        empty_fig = go.Figure()
        return [empty_fig] * 7

    # Filter data
    question_df = filter_data_by_question(selected_question)

    # 1. Overall
    overall_data = get_breakout_data(question_df, "Overall")
    fig1 = plot_bar_with_ci(overall_data, "Overall - Confidence Intervals", "Response")

    # 2. By Gender
    gender_data = get_breakout_data(question_df, "Sex")
    fig2 = plot_bar_with_ci(
        gender_data, "By Gender - Confidence Intervals", "Break_Out"
    )

    # 3. By Age Group (Heatmap)
    age_data = get_breakout_data(question_df, 'Age Group')
    simplified = (age_detail_level == 'less')
    title_suffix = " (3 Groups)" if simplified else " (7 Groups)"
    fig3 = plot_heatmap(age_data, f"By Age Group - Heatmap{title_suffix}", 'Break_Out', simplified=simplified)
    
    # 4. By Location (Top/Bottom 10)
    location_data = question_df[question_df["Break_Out"] == "Overall"].copy()
    fig4 = plot_top_bottom_states(
        location_data, "By Location - Top & Bottom 10 States", n_states=10
    )

    # 5. By Education (Bar)
    edu_data = get_breakout_data(question_df, "Education Attained")
    fig5 = plot_bar_with_ci(edu_data, "By Education Level - Bar Chart", "Break_Out")

    # 6. By Income
    income_data = get_breakout_data(question_df, "Household Income")
    fig6 = plot_bar_with_ci(income_data, "By Income Level - Bar Chart", "Break_Out")

    # 7. Temporal (Key Years)
    temporal_data = question_df[question_df["Break_Out"] == "Overall"].copy()
    fig7 = plot_confidence_intervals(
                        temporal_data, "Temporal Trend - Confidence Intervals", "Year"
                    )

    return fig1, fig2, fig3, fig4, fig5, fig6, fig7


# ============================================
# 7. Run the App
# ============================================

if __name__ == "__main__":
    print("\n" + "=" * 60)
    print("üöÄ Starting BRFSS Dashboard...")
    print("=" * 60)
    print("\nüìä Dashboard will be available at: http://127.0.0.1:8050/")
    print("\nüí° Press Ctrl+C to stop the server\n")

    app.run(debug=True)


üöÄ Starting BRFSS Dashboard...

üìä Dashboard will be available at: http://127.0.0.1:8050/

üí° Press Ctrl+C to stop the server

