In [36]:
from pathlib import Path
import pandas as pd
from nvi_survey import create_nvi_survey
from rollups import save_excel_sheets

filepath = (
    Path.cwd() /
    "output" /
    "nvi_2024_analysis_source_2026_2026_20250514.csv"
)
nvi = create_nvi_survey(filepath)

In [None]:
indicator_groups = (
    nvi.answer_key[~nvi.answer_key["indicator_db_id"].isna()]
    .groupby(["indicator_db_id", "response_type", "tabulate"]) # indicators can only have one response type (possible error though)
)

tables = []

for (indicator, response_type, tabulate), group in indicator_groups:
    if not tabulate:
        continue

    if response_type in {"SINGLE", "GROUPED-SINGLE"}:
        tables.append(
            nvi.compile_single_response_indicator(indicator, "citywide")
            .assign(indicator_db_id=indicator)
        )

    elif response_type == "MULTI-SELECT":
        tables.append(
            nvi.compile_multi_response_indicator(indicator, "citywide")
            .assign(indicator_db_id=indicator)
        )

(
    pd.concat(tables, axis=0)
    .reset_index()
    .assign(indicator_name=lambda df: df["indicator_db_id"]
    .map(nvi.indicator_key)).drop(["citywide", "indicator_db_id"], axis=1)
)[
    ["indicator_name", "count", "universe", "percentage"]
].to_excel("citywide_indicators_20250515.xlsx", index=False)

In [None]:
indicator_groups = (
    nvi.answer_key[~nvi.answer_key["indicator_db_id"].isna()]
    .drop_duplicates(subset=["indicator_db_id", "response_type"]) # indicators can only have one response type (possible error though)
)

result = []
for _, row in indicator_groups.iterrows():
    if not row["tabulate"]:
        continue

    if row["response_type"] in ("SINGLE", "GROUPED-SINGLE"):
        result.append(
            nvi.compile_single_response_indicator(row["indicator_db_id"], "district")
            .assign(indicator=nvi.indicator_key[row["indicator_db_id"]])
        )
    else:
        result.append(
            nvi.compile_multi_response_indicator(row["indicator_db_id"], "district")
            .assign(indicator=nvi.indicator_key[row["indicator_db_id"]])
        )

(
    pd.concat(result)[["indicator", "count", "universe", "percentage"]]
    .to_excel("nvi_survey_indicator_2024_20250515.xlsx")
)

In [None]:
# Break down for resident opportunity questions

resident_opportunity_questions = (
    nvi.answer_key[nvi.answer_key["site_category"] == "Resident Opportunity"]
)[["question", "group"]].drop_duplicates()

tables = []
for _, row in resident_opportunity_questions.iterrows():

    tables.append(
        (
            row["question"],
            nvi.tabulate_question(row["question"], row["group"], "citywide")
        )
    )

save_excel_sheets(tables, Path.cwd() / "output" / "resident_opportunity_breakouts_20250515.xlsx")

In [None]:
nvi.tabulate_single_question("citywide", "Household_Annual_Income_Before_Taxes").T.to_clipboard()

In [None]:
# ROLL UP ALL INDICATORS

indicators = (
    nvi.answer_key[["indicator_db_id", "response_type"]]
    .dropna(subset="indicator_db_id")
    .drop_duplicates()
)

result = []
for _, indicator in indicators.iterrows():
    if indicator["response_type"] in {"GROUPED-SINGLE", "SINGLE"}:
        method = 'compile_single_response_indicator'
    elif indicator["response_type"] == "MULTI-SELECT":
        method = 'compile_multi_response_indicator'
    else:
        raise ValueError(
            f"Indicator {indicator["indicator_db_id"]}:'{indicator["response_type"]}' "
            "is not a valid response type."
        )
    aggregations = ["citywide", "district", "zone"]
    result.append(
        pd.concat(
            [
                nvi.__getattribute__(method)(
                    indicator["indicator_db_id"], 
                    agg, 
                    readable=False
                )
                .reset_index()
                .rename(columns={agg: "location_id"})
                .assign(
                    indicator_id=indicator["indicator_db_id"],
                    year=2024
                )
                for agg in aggregations
            ]
        )
    )


indicators_tall = pd.concat(result)


# ROLL UP ALL QUESTIONS

questions = pd.concat([
    nvi.answer_key[
        nvi.answer_key["indicator_db_id"].notna()
        & (nvi.answer_key["response_type"].isin({"SINGLE", "GROUPED-SINGLE"}))
    ][["group", "question", "response_type", "indicator_db_id"]].drop_duplicates(),
    nvi.answer_key[
        nvi.answer_key["indicator_db_id"].notna()
        & (nvi.answer_key["response_type"] == "MULTI-SELECT")
    ][["group", "question", "response_type", "indicator_db_id"]].drop_duplicates(subset=["group", "response_type"])
])

aggs = ["citywide", "district", "zone"]

result = []
for _, question in questions.iterrows():
    if question["response_type"] in {"SINGLE", "GROUPED-SINGLE"}:
        question = nvi.answer_key[
            (nvi.answer_key["group"] == question["group"])
            & (nvi.answer_key["question"] == question["question"])
        ][
            [
                "indicator_db_id", 
                "full_column", 
                "survey_code", 
                "db_question_code", 
                "db_answer_code", 
                "response_type"
            ]
        ].drop_duplicates()


        for agg in aggs:
            indicator_id = question["indicator_db_id"].iloc[0]
            survey_question_id = question["db_question_code"].iloc[0]
            column = question["full_column"].iloc[0]

            recode = {
                row["survey_code"]: row["db_answer_code"]
                for _, row in question.iterrows()
            }

            result.append(
                nvi.survey_data[[agg, column]]
                .groupby(agg)
                .value_counts()
                .reset_index()
                .rename(columns={agg: "location_id"})
                .assign(
                    indicator_id=indicator_id,
                    survey_question_id=survey_question_id,
                    survey_question_option_id=lambda df: df[column].map(lambda v: recode.get(v)),
                    universe=lambda df: df.groupby("location_id")["count"].transform("sum"),
                    percentage=lambda df: df["count"] / df["universe"],
                )
                .drop(column, axis=1)
            )
        
    elif question["response_type"] == "MULTI-SELECT":
        group = nvi.answer_key[
            nvi.answer_key["indicator_db_id"].notna()
            & (nvi.answer_key["group"] == question["group"])
        ]

        rename = {
            row["full_column"]: (row["db_question_code"], row["db_answer_code"])
            for _, row in group.iterrows()
        }

        for agg in aggs:
            result.append(
                nvi.survey_data[[agg] + list(group["full_column"])]
                .rename(columns=rename)
                .groupby(agg)
                .agg(["count", "size"])
                .stack(level=[0,1], future_stack=True)
                .reset_index()
                .rename(columns={
                    agg: "location_id",
                    "level_1": "survey_question_id",
                    "level_2": "survey_question_option_id",
                    "size": "universe"
                })
                .assign(
                    indicator_id=question["indicator_db_id"],
                    percentage=lambda df: df["count"] / df["universe"]
                )
            )
    else:
        raise ValueError(f"'{question["response_type"]}' is not a valid response type.")

answers_tall = pd.concat(result)

In [None]:
pd.concat([indicators_tall, answers_tall]).astype({
    "survey_question_id": pd.Int64Dtype(),
    "survey_question_option_id": pd.Int64Dtype(),
})

Unnamed: 0,location_id,count,universe,percentage,indicator_id,year,survey_question_id,survey_question_option_id
0,1,2808,4061,0.691455,2,2024.0,,
0,2,431,614,0.701954,2,2024.0,,
1,3,424,605,0.700826,2,2024.0,,
2,4,269,410,0.656098,2,2024.0,,
3,5,425,619,0.686591,2,2024.0,,
...,...,...,...,...,...,...,...,...
171,31,7,189,0.037037,56,,94,107
172,31,10,189,0.05291,56,,94,109
173,31,88,189,0.465608,56,,94,110
174,31,8,189,0.042328,56,,94,108
