In [36]:
from pathlib import Path
import pandas as pd
from nvi_survey import create_nvi_survey
from rollups import save_excel_sheets

filepath = (
    Path.cwd() /
    "output" /
    "nvi_2024_analysis_source_2026_2026_20250514.csv"
)
nvi = create_nvi_survey(filepath)

In [None]:
indicator_groups = (
    nvi.answer_key[~nvi.answer_key["indicator_db_id"].isna()]
    .groupby(["indicator_db_id", "response_type", "tabulate"]) # indicators can only have one response type (possible error though)
)

tables = []

for (indicator, response_type, tabulate), group in indicator_groups:
    if not tabulate:
        continue

    if response_type in {"SINGLE", "GROUPED-SINGLE"}:
        tables.append(
            nvi.compile_single_response_indicator(indicator, "citywide")
            .assign(indicator_db_id=indicator)
        )

    elif response_type == "MULTI-SELECT":
        tables.append(
            nvi.compile_multi_response_indicator(indicator, "citywide")
            .assign(indicator_db_id=indicator)
        )

(
    pd.concat(tables, axis=0)
    .reset_index()
    .assign(indicator_name=lambda df: df["indicator_db_id"]
    .map(nvi.indicator_key)).drop(["citywide", "indicator_db_id"], axis=1)
)[
    ["indicator_name", "count", "universe", "percentage"]
].to_excel("citywide_indicators_20250515.xlsx", index=False)

In [None]:
indicator_groups = (
    nvi.answer_key[~nvi.answer_key["indicator_db_id"].isna()]
    .drop_duplicates(subset=["indicator_db_id", "response_type"]) # indicators can only have one response type (possible error though)
)

result = []
for _, row in indicator_groups.iterrows():
    if not row["tabulate"]:
        continue

    if row["response_type"] in ("SINGLE", "GROUPED-SINGLE"):
        result.append(
            nvi.compile_single_response_indicator(row["indicator_db_id"], "district")
            .assign(indicator=nvi.indicator_key[row["indicator_db_id"]])
        )
    else:
        result.append(
            nvi.compile_multi_response_indicator(row["indicator_db_id"], "district")
            .assign(indicator=nvi.indicator_key[row["indicator_db_id"]])
        )

(
    pd.concat(result)[["indicator", "count", "universe", "percentage"]]
    .to_excel("nvi_survey_indicator_2024_20250515.xlsx")
)

In [None]:
# Break down for resident opportunity questions

resident_opportunity_questions = (
    nvi.answer_key[nvi.answer_key["site_category"] == "Resident Opportunity"]
)[["question", "group"]].drop_duplicates()

tables = []
for _, row in resident_opportunity_questions.iterrows():

    tables.append(
        (
            row["question"],
            nvi.tabulate_question(row["question"], row["group"], "citywide")
        )
    )

save_excel_sheets(tables, Path.cwd() / "output" / "resident_opportunity_breakouts_20250515.xlsx")

In [None]:
nvi.tabulate_single_question("citywide", "Household_Annual_Income_Before_Taxes").T.to_clipboard()

In [None]:
pd.concat([indicators_tall, answers_tall]).astype({
    "survey_question_id": pd.Int64Dtype(),
    "survey_question_option_id": pd.Int64Dtype(),
})

Unnamed: 0,location_id,count,universe,percentage,indicator_id,year,survey_question_id,survey_question_option_id
0,1,2808,4061,0.691455,2,2024.0,,
0,2,431,614,0.701954,2,2024.0,,
1,3,424,605,0.700826,2,2024.0,,
2,4,269,410,0.656098,2,2024.0,,
3,5,425,619,0.686591,2,2024.0,,
...,...,...,...,...,...,...,...,...
171,31,7,189,0.037037,56,,94,107
172,31,10,189,0.05291,56,,94,109
173,31,88,189,0.465608,56,,94,110
174,31,8,189,0.042328,56,,94,108
