## Dashboard

In [2]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import ipywidgets as widgets
from IPython.display import display, clear_output
import warnings

warnings.filterwarnings("ignore")

In [3]:
# ============================================
# 1. Load Data
# ============================================
print("Loading dataset...")
df = pd.read_csv("dataset.csv")
print(f"Data loaded! Shape: {df.shape}")
print("\nFirst 5 rows:")
print(df.head())

Loading dataset...
Data loaded! Shape: (2763102, 27)

First 5 rows:
   Year Locationabbr Locationdesc                      Class       Topic  \
0  2023           AK       Alaska  Chronic Health Indicators  Depression   
1  2023           AK       Alaska  Chronic Health Indicators  Depression   
2  2023           AK       Alaska  Chronic Health Indicators  Depression   
3  2023           AK       Alaska  Chronic Health Indicators  Depression   
4  2023           AK       Alaska  Chronic Health Indicators  Depression   

                                            Question Response Break_Out  \
0  Ever told you that you have a form of depression?      Yes   Overall   
1  Ever told you that you have a form of depression?       No   Overall   
2  Ever told you that you have a form of depression?      Yes      Male   
3  Ever told you that you have a form of depression?       No      Male   
4  Ever told you that you have a form of depression?      Yes    Female   

  Break_Out_Category Sam

In [4]:
# ============================================
# 2. Basic Data Exploration
# ============================================
print("\n" + "=" * 50)
print("Basic Dataset Information:")
print("=" * 50)
print(df.info())

print("\nColumn names:")
print(df.columns.tolist())

print("\nMissing Value Summary:")
print(df.isnull().sum())

print("\nBreak_Out value counts:")
print(df["Break_Out"].value_counts())

print("\nNumber of unique survey questions:")
print(f"There are {df['Question'].nunique()} unique questions.")


Basic Dataset Information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2763102 entries, 0 to 2763101
Data columns (total 27 columns):
 #   Column                      Dtype  
---  ------                      -----  
 0   Year                        int64  
 1   Locationabbr                object 
 2   Locationdesc                object 
 3   Class                       object 
 4   Topic                       object 
 5   Question                    object 
 6   Response                    object 
 7   Break_Out                   object 
 8   Break_Out_Category          object 
 9   Sample_Size                 object 
 10  Data_value                  float64
 11  Confidence_limit_Low        float64
 12  Confidence_limit_High       float64
 13  Display_order               object 
 14  Data_value_unit             object 
 15  Data_value_type             object 
 16  Data_Value_Footnote_Symbol  object 
 17  Data_Value_Footnote         object 
 18  DataSource                  object

In [5]:
# ============================================
# 3. Data Preprocessing Functions
# ============================================


def get_questions():
    """Return all unique survey questions."""
    return sorted(df["Question"].unique())


def filter_data_by_question(question):
    """Filter dataset for the selected question."""
    return df[df["Question"] == question].copy()


def get_breakout_data(question_df, breakout_category):
    """
    Get data for a specific breakout group.

    Parameters:
    - question_df: data filtered by question
    - breakout_category: value in Break_Out_Category (e.g., 'Sex', 'Age Group', 'Overall')

    Note:
    - Break_Out_Category describes the grouping type (Sex, Age Group, etc.)
    - Break_Out describes the specific category value (Male, Female, 18-24, etc.)
    """
    if breakout_category == "Overall":
        return question_df[question_df["Break_Out"] == "Overall"].copy()
    else:
        return question_df[
            question_df["Break_Out_Category"] == breakout_category
        ].copy()


# ============================================
# 4. Visualization Functions
# ============================================


def plot_confidence_intervals(data, title, x_column="Break_Out"):
    """
    Plot confidence interval chart for a given dataset.

    Parameters:
    - data: DataFrame
    - title: plot title
    - x_column: column to use on the X-axis (default: Break_Out)
    """
    if data.empty:
        print(f"No data available to plot: {title}")
        return None

    # List all response categories
    responses = data["Response"].unique()

    fig = go.Figure()

    for response in responses:
        response_data = data[data["Response"] == response].copy()

        # Remove rows containing NaN values
        response_data = response_data.dropna(
            subset=["Data_value", "Confidence_limit_Low", "Confidence_limit_High"]
        )
        if response_data.empty:
            continue

        # Sort by X-axis
        response_data = response_data.sort_values(x_column)

        # Add confidence interval line
        fig.add_trace(
            go.Scatter(
                x=response_data[x_column],
                y=response_data["Data_value"],
                error_y=dict(
                    type="data",
                    symmetric=False,
                    array=response_data["Confidence_limit_High"]
                    - response_data["Data_value"],
                    arrayminus=response_data["Data_value"]
                    - response_data["Confidence_limit_Low"],
                ),
                mode="markers+lines",
                name=response,
                marker=dict(size=8),
            )
        )

    fig.update_layout(
        title=title,
        xaxis_title=x_column,
        yaxis_title="Percentage (%)",
        height=500,
        hovermode="closest",
        template="plotly_white",
    )

    return fig

In [6]:
# ============================================
# 5. Interactive Dashboard
# ============================================


def create_dashboard():
    """Create the full interactive dashboard."""

    # Generate list of questions
    questions = get_questions()

    # Dropdown widget for selecting a question
    question_dropdown = widgets.Dropdown(
        options=questions,
        description="Select Question:",
        style={"description_width": "100px"},
        layout=widgets.Layout(width="90%"),
    )

    # Output display area
    output = widgets.Output()

    def on_question_change(change):
        """Callback when the selected question changes."""
        with output:
            clear_output(wait=True)

            selected_question = change["new"]
            print(f"\nAnalyzing Question: {selected_question}\n")

            # Filter data
            question_df = filter_data_by_question(selected_question)

            # 1. Overall
            print("1. Plotting Overall data...")
            overall_data = get_breakout_data(question_df, "Overall")
            fig1 = plot_confidence_intervals(
                overall_data, "Overall - Confidence Intervals", "Response"
            )
            if fig1:
                fig1.show()

            # 2. By Gender (Sex)
            print("2. Plotting Sex (Gender) data...")
            gender_data = get_breakout_data(question_df, "Sex")
            if not gender_data.empty:
                fig2 = plot_confidence_intervals(
                    gender_data, "By Gender - Confidence Intervals", "Break_Out"
                )
                fig2.show()
            else:
                print("   No gender data found.")

            # 3. By Age Group
            print("3. Plotting Age Group data...")
            age_data = get_breakout_data(question_df, "Age Group")
            if not age_data.empty:
                fig3 = plot_confidence_intervals(
                    age_data, "By Age Group - Confidence Intervals", "Break_Out"
                )
                fig3.show()
            else:
                print("   No age group data found.")

            # 4. By Location (State/Region)
            print("4. Plotting Location data...")
            location_data = question_df[question_df["Break_Out"] == "Overall"].copy()
            if not location_data.empty:
                fig4 = plot_confidence_intervals(
                    location_data, "By Location - Confidence Intervals", "Locationabbr"
                )
                fig4.show()
            else:
                print("   No location data found.")

            # 5. By Education
            print("5. Plotting Education Level data...")
            edu_data = get_breakout_data(question_df, "Education Attained")
            if not edu_data.empty:
                fig5 = plot_confidence_intervals(
                    edu_data, "By Education Level - Confidence Intervals", "Break_Out"
                )
                fig5.show()
            else:
                print("   No education data found.")

            # 6. By Income
            print("6. Plotting Income Level data...")
            income_data = get_breakout_data(question_df, "Household Income")
            if not income_data.empty:
                fig6 = plot_confidence_intervals(
                    income_data,
                    "By Household Income - Confidence Intervals",
                    "Break_Out",
                )
                fig6.show()
            else:
                print("   No income data found.")

            # 7. Time Trend (Year)
            print("7. Plotting Time Trend data...")
            temporal_data = question_df[question_df["Break_Out"] == "Overall"].copy()
            if not temporal_data.empty:
                fig7 = plot_confidence_intervals(
                    temporal_data, "Temporal Trend - Confidence Intervals", "Year"
                )
                fig7.show()
            else:
                print("   No temporal data found.")

            print("\n✅ All plots generated successfully!")

    # Bind callback
    question_dropdown.observe(on_question_change, names="value")

    # Display widgets
    display(widgets.VBox([question_dropdown, output]))

    # Auto-select first question
    if len(questions) > 0:
        question_dropdown.value = questions[0]

In [None]:
# ============================================
# 7. Launch Dashboard
# ============================================
print("\n" + "=" * 50)
print("Launching Interactive Dashboard...")
print("=" * 50)
create_dashboard()


Launching Interactive Dashboard...


VBox(children=(Dropdown(description='Select Question:', layout=Layout(width='90%'), options=('About how long h…