In [2]:
# ============================================================
# SHHA EVENT CHECK-IN LIST (1 ROW PER ADDRESS)
# Source: address_users_export.csv (1 row = 1 person tied to an address)
#
# Goal:
# - One row per ADDRESS
# - Leftmost cell contains ALL PEOPLE at that address (from First/Last Name)
# - Include Unit + Membership Status (address-level)
# - Sort by Street Name, then Street Number, then Unit
#
# Output: event_checkin_by_address_from_address_users.csv
# ============================================================

import pandas as pd

ADDR_USERS_FILE = "address_users_export.csv"
addr_users_df = pd.read_csv(ADDR_USERS_FILE).fillna("")

def _s(df, col):
    return df[col].fillna("").astype(str).str.strip()

print("ADDRESS USERS DATA LOADED")
print("-------------------------")
print(f"User rows loaded (1 row = 1 person @ an address): {len(addr_users_df):,}")
print()

# -----------------------------
# Build address components
# -----------------------------
addr_users_df["Street Number"] = _s(addr_users_df, "Street Number")
addr_users_df["Street Name"] = _s(addr_users_df, "Street Name")
addr_users_df["Street Unit"] = _s(addr_users_df, "Street Unit")

# A clean full-address string (for display + grouping)
addr_users_df["Address"] = (
    addr_users_df["Street Number"] + " " +
    addr_users_df["Street Name"] + " " +
    addr_users_df["Street Unit"]
).str.replace(r"\s+", " ", regex=True).str.strip()

# -----------------------------
# Build person name (clean)
# -----------------------------
first = _s(addr_users_df, "First Name").str.strip().str.strip('"').str.strip()
last  = _s(addr_users_df, "Last Name").str.strip().str.strip('"').str.strip()

# Build "First Last" and drop blanks
addr_users_df["Person"] = (first + " " + last).str.replace(r"\s+", " ", regex=True).str.strip()
addr_users_df["has_name"] = addr_users_df["Person"] != ""

# -----------------------------
# Membership flag (address-level)
# If any person record at that address has Is Member == 1, mark address as Member
# -----------------------------
addr_users_df["Is Member"] = pd.to_numeric(_s(addr_users_df, "Is Member"), errors="coerce").fillna(0).astype(int)

# -----------------------------
# Group to one row per address
# -----------------------------
def join_people(series: pd.Series) -> str:
    # keep order as-is, drop empties, de-dupe while preserving order
    seen = set()
    out = []
    for x in series:
        x = str(x).strip()
        if not x:
            continue
        if x in seen:
            continue
        seen.add(x)
        out.append(x)
    return "; ".join(out)

group_cols = ["Street Name", "Street Number", "Street Unit", "Address"]

checkin_df = (
    addr_users_df
    .groupby(group_cols, dropna=False)
    .agg(
        People=("Person", join_people),
        HouseholdSize=("has_name", "sum"),
        IsMemberAny=("Is Member", "max"),
    )
    .reset_index()
)

checkin_df["Membership Status"] = checkin_df["IsMemberAny"].map({1: "Member", 0: "Non-member"})
checkin_df = checkin_df.drop(columns=["IsMemberAny"])

# Numeric sort for Street Number
checkin_df["_StreetNumberSort"] = pd.to_numeric(checkin_df["Street Number"], errors="coerce").fillna(10**9).astype(int)

# Sort: Street Name, then Street Number (numeric), then Unit
checkin_df = (
    checkin_df
    .sort_values(["Street Name", "_StreetNumberSort", "Street Unit", "Address"], kind="stable")
    .drop(columns=["_StreetNumberSort"])
)

# Reorder columns so People is leftmost (as requested)
export_cols = [
    "People",
    "Address",
    "Street Unit",
    "Membership Status",
    "HouseholdSize",
]

# Some addresses may have blank People if names were blank in source (rare but possible)
# Keep them for completeness; you can filter later if desired.
out_path = "event_checkin_by_address_from_address_users.csv"
checkin_df[export_cols].to_csv(out_path, index=False)

print("EVENT CHECK-IN EXPORT (BY ADDRESS, FROM ADDRESS_USERS)")
print("------------------------------------------------------")
print(f"Unique addresses exported: {len(checkin_df):,}")
print(f"Rows with blank People (needs review): {(checkin_df['People'].fillna('').str.strip() == '').sum():,}")
print(f"Saved to: {out_path}")

ADDRESS USERS DATA LOADED
-------------------------
User rows loaded (1 row = 1 person @ an address): 3,668

EVENT CHECK-IN EXPORT (BY ADDRESS, FROM ADDRESS_USERS)
------------------------------------------------------
Unique addresses exported: 2,230
Rows with blank People (needs review): 0
Saved to: event_checkin_by_address_from_address_users.csv
