In [7]:
import pandas as pd
import plotly.graph_objects as go
from dash import Dash, dcc, html, Input, Output


# read in data
df = pd.read_csv("Downloads/Nutrition__Physical_Activity__and_Obesity_-_Behavioral_Risk_Factor_Surveillance_System.csv")

continental_states = [
    "AK","AL","AZ","AR","CA","CO","CT","DE","DC","FL","GA","ID","IL","IN","IA",
    "KS","KY","LA","ME","MD","MA","MI","MN","MS","MO","MT","NE","NV","NH","NJ",
    "NM","NY","NC","ND","OH","OK","OR","PA","RI","SC","SD","TN","TX","UT","VT",
    "VA","WA","WV","WI","WY"
]


# filter dataset
df = df[df["LocationAbbr"].isin(continental_states)]
df["Stratification1"] = df["Stratification1"].replace({"Other": "Race other"})
df["Data_Value"] = pd.to_numeric(df["Data_Value"], errors="coerce")
df["Sample_Size"] = pd.to_numeric(df["Sample_Size"], errors="coerce")
df = df.dropna(subset=["Data_Value"])





# weighted mean aggregation for National
agg_cols = ["YearStart", "Question", "Stratification1"]
def weighted_mean_group(df_group):
    return ((df_group["Data_Value"] * df_group["Sample_Size"]).sum()) / (df_group["Sample_Size"].sum())

df_nat = df.groupby(agg_cols, as_index=False).apply(
    lambda x: pd.Series({"Data_Value": weighted_mean_group(x)})
)
df_nat["LocationDesc"] = "National"
df_nat["Stratification1"] = df_nat["Stratification1"].replace({"Other": "Race other"})

df_combined = pd.concat([df, df_nat], ignore_index=True)
df_combined = df_combined[df_combined["Stratification1"].str.upper() != "DATA NOT REPORTED"]



# short names
question_shortnames = {
    "Percent of adults aged 18 years and older who have obesity": "Adults w/ Obesity",
    "Percent of adults aged 18 years and older who have an overweight classification": "Adults Overweight",
    "Percent of adults who achieve at least 150 minutes a week of moderate-intensity aerobic physical activity or 75 minutes a week of vigorous-intensity aerobic physical activity (or an equivalent combination)": "≥150min Aerobic Activity",
    "Percent of adults who achieve at least 150 minutes a week of moderate-intensity aerobic physical activity or 75 minutes a week of vigorous-intensity aerobic physical activity (or an equivalent combination) and engage in muscle-strengthening activities on 2 or more days a week": "≥150min Aerobic + Strength ≥2d",
    "Percent of adults who achieve more than 300 minutes a week of moderate-intensity aerobic physical activity or 150 minutes a week of vigorous-intensity aerobic activity (or an equivalent combination)": ">300min Aerobic Activity",
    "Percent of adults who engage in muscle-strengthening activities on 2 or more days a week": "Strength ≥2d",
    "Percent of adults who engage in no leisure-time physical activity": "No Leisure-Time Physical Activity",
    "Percent of adults who report consuming fruit less than one time daily": "Low Fruit Intake",
    "Percent of adults who report consuming vegetables less than one time daily": "Low Veg Intake"
}

# strat groups
income_strats = ["Less than $15,000", "$15,000 - $24,999", "$25,000 - $34,999",
                 "$35,000 - $49,999", "$50,000 - $74,999", "$75,000 or greater"]
age_strats = ["18 - 24", "25 - 34", "35 - 44", "45 - 54", "55 - 64", "65 or older"]
race_strats = ["American Indian/Alaska Native", "Asian", "Hispanic",
               "Non-Hispanic Black", "Non-Hispanic White", "2 or more races",
               "Race other", "Hawaiian/Pacific Islander"]
edu_strats = ["Less than high school", "High school graduate",
              "Some college or technical school", "College graduate"]
gender_strats = ["Male", "Female"]
total_strats = ["Total"]

legend_order = income_strats + age_strats + race_strats + edu_strats + gender_strats + total_strats

# rainbow color map for strats
color_map = {
    "Less than $15,000": "#e41a1c", "$15,000 - $24,999": "#fb8072",
    "$25,000 - $34,999": "#f16913", "$35,000 - $49,999": "#ef3b2c",
    "$50,000 - $74,999": "#a50f15", "$75,000 or greater": "#67000d",

    "18 - 24": "#ff7f00", "25 - 34": "#fdae6b", "35 - 44": "#f16913",
    "45 - 54": "#d94801", "55 - 64": "#a63603", "65 or older": "#7f2704",

    "American Indian/Alaska Native": "#9e9ac8",
    "Asian": "#807dba",
    "Hispanic": "#6a3d9a",
    "Non-Hispanic Black": "#54278f",
    "Non-Hispanic White": "#8c6d31",
    "2 or more races": "#a65628",
    "Race other": "#d95f0e",
    "Hawaiian/Pacific Islander": "#7f2704",

    "Less than high school": "#08519c", "High school graduate": "#2171b5",
    "Some college or technical school": "#4292c6", "College graduate": "#6baed6",

    "Male": "#1f78b4",
    "Female": "#e377c2",

    "Total": "#000000"
}

# strat marker symbols
symbols = ["circle", "square", "diamond", "cross", "x",
           "triangle-up", "triangle-down", "triangle-left", "triangle-right",
           "hexagon", "pentagon"]

#assigne markers symbols to strata
def assign_symbols(strata, symbols):
    return {s: symbols[i % len(symbols)] for i, s in enumerate(strata)}

symbol_map = {}
symbol_map.update(assign_symbols(income_strats, symbols))
symbol_map.update(assign_symbols(age_strats, symbols))
symbol_map.update(assign_symbols(race_strats, symbols))
symbol_map.update(assign_symbols(edu_strats, symbols))
symbol_map.update(assign_symbols(gender_strats, symbols))
symbol_map.update(assign_symbols(total_strats, ["star"]))








# app
questions = sorted(df_combined["Question"].unique())
locations = sorted([loc for loc in df_combined["LocationDesc"].unique() if loc != "National"])
stratifications = sorted(df_combined["Stratification1"].unique())
default_question = [questions[1]] if len(questions) > 1 else []
default_location = ["National"]
default_strat = ["Total"]

app = Dash(__name__)

#set title and dropdowns
app.layout = html.Div([
    html.H2("Exploratory Analysis: Nutrition, Physical Activity, and Obesity"),
    html.Label("Select Questions:"),
    
    dcc.Dropdown(
        id="question-dropdown",
        options=[{"label": question_shortnames.get(q, q), "value": q} for q in questions],
        multi=True, value=default_question
    ),
    
    html.Br(),
    html.Label("Select States/Locations:"),
    dcc.Checklist(id="state-toggle", options=[{"label": "All States", "value": "All"}], value=[]),
    
    dcc.Dropdown(
        id="location-dropdown",
        options=[{"label": "National", "value": "National"}] + [{"label": l, "value": l} for l in locations],
        multi=True, value=default_location
    ),
    
    html.Br(),
    html.Label("Select Stratifications:"),
    dcc.Checklist(id="strat-toggle", options=[{"label": "All Stratifications", "value": "All"}], value=[]),
    
    dcc.Dropdown(
        id="strat-dropdown",
        options=[{"label": s, "value": s} for s in stratifications],
        multi=True, value=default_strat
    ),
    
    dcc.Graph(id="lineplot", style={"width": "100%", "height": "85vh"})
])

#make callbacvks for interactivity
@app.callback(
    
    Output("lineplot", "figure"),
    Input("question-dropdown", "value"),
    Input("location-dropdown", "value"),
    Input("strat-dropdown", "value"),
    Input("state-toggle", "value"),
    Input("strat-toggle", "value")
)

#make update plot func to make plot
def update_plot(selected_questions, selected_locations, selected_strats, state_toggle, strat_toggle):
    
    fig = go.Figure()
    
    if not (selected_questions and selected_locations and selected_strats):
        fig.update_layout(title="Please select questions, states, and stratifications")
        return fig

    expanded_locations = selected_locations.copy()
    
    if "All" in state_toggle:
        expanded_locations = list(set(expanded_locations) | set(locations))
    expanded_strats = selected_strats.copy()
    if "All" in strat_toggle:
        expanded_strats = list(set(expanded_strats) | set(stratifications))


    def shorten_list(items, max_items=3):
        return ", ".join(items) if len(items) <= max_items else ", ".join(items[:max_items]) + ", ..."

    
    rank_counter = 0
    for q in sorted(selected_questions):
        short_q = question_shortnames.get(q, q)
        for loc in ["National"] + sorted([l for l in expanded_locations if l != "National"]):
            
            for strat in legend_order:
                if strat not in expanded_strats:
                    continue
                df_sub = df_combined[
                    (df_combined["Question"] == q) &
                    (df_combined["LocationDesc"] == loc) &
                    (df_combined["Stratification1"] == strat)
                ].sort_values("YearStart")
                if df_sub.empty:
                    continue
                text_labels = [None] * (len(df_sub) - 1) + [strat]
                fig.add_trace(go.Scatter(
                    x=df_sub["YearStart"], y=df_sub["Data_Value"],
                    mode="lines+markers+text",
                    name=f"{short_q} | {loc} — {strat}",
                    text=text_labels,
                    textposition="middle right",
                    textfont=dict(size=9, color=color_map.get(strat, "#000000")),
                    legendrank=rank_counter,
                    line=dict(color=color_map.get(strat, "#000000"),
                              width=2 if strat == "Total" else 1.5),
                    marker=dict(symbol=symbol_map.get(strat, "circle"),
                                size=9 if strat == "Total" else 7,
                                color=color_map.get(strat, "#000000"))
                ))
                rank_counter += 1

    title_questions = shorten_list([question_shortnames.get(q, q) for q in sorted(selected_questions)])
    title_states = shorten_list(["National"] + sorted([l for l in expanded_locations if l != "National"]))
    title_strats = shorten_list(sorted(expanded_strats))
    graph_title = f"Comparative Trends — {title_questions} | {title_states} | {title_strats}"

    
    fig.update_layout(
        title=graph_title,
        xaxis_title="Year",
        yaxis_title="Percent",
        width=1400,
        height=900,
        margin=dict(l=70, r=50, t=80, b=220),  # reserve extra space at bottom
        legend_title="Question | Location — Stratification",
        legend=dict(
            orientation="h",
            yanchor="top",
            y=-0.35,
            xanchor="center",
            x=0.5,
            traceorder="normal",
            bgcolor="rgba(255,255,255,0.85)",
            bordercolor="rgba(0,0,0,0.1)",
            borderwidth=1,
            font=dict(size=10)
        )
    )
    return fig

if __name__ == "__main__":
    app.run(debug=True, reload=False)




