In [1]:
!pip install --upgrade \
    pyreadr \
    mapclassify \
    geopandas \
    shapely \
    folium \
    pandas \
    matplotlib \
    pyproj \
    rtree \
    fiona



In [48]:
import pyreadr, geopandas as gpd

gdf = gpd.read_file("C:/Users/User/Documents/TOOLKIT/clients/arshad/cache/austin_acs5yr_tracts.gpkg")




In [35]:
gdf.columns



Index(['GEOID', 'NAME', 'median_household_income',
       'aggregate_household_income', 'white_alone', 'hispanic_or_latino',
       'bachelors_degree', 'masters_degree', 'total_population',
       'median_age_total', 'median_age_male', 'median_age_female',
       'median_value_pre1939', 'female_living_alone',
       'log_median_household_income', 'log_aggregate_household_income',
       'log_median_value_pre1939', 'median_household_income_clipped',
       'aggregate_household_income_clipped', 'median_value_pre1939_clipped',
       'aggregate_household_income_pct', 'white_alone_pct',
       'hispanic_or_latino_pct', 'bachelors_degree_pct', 'masters_degree_pct',
       'female_living_alone_pct', 'edu_attain_pct', 'pct_male_30_54.y',
       'pct_female_30_54.y', 'male_30_54', 'female_30_54',
       'vacant_residence_elsewhere', 'two_or_more_races', 'income_75k_plus.y',
       'income_125k_plus.y', 'center', 'other', 'nails', 'spa', 'salon', 'tan',
       'urgent', 'wax', 'aesthetics', 'NA

In [49]:
# 1. Rename safely
rename_map = {
    "log_median_household_income": "Median Household Incomes",
    "white_alone_pct":             "Population: White Alone",
    "pct_female_30_54.y":          "Females Living Alone - Age 35‑54",
    "pct_male_30_54.y":            "Males Living Alone - Age 35‑54",
    "total_population":            "Total Population",
    "log_aggregate_household_income": "Aggregate Household Income",
    "masters_degree_pct":             "Educational Attainment_Masters",
    "income_125k_plus.y":             "Incomes $125k+"
}

existing_cols = gdf.columns
missing_cols = [c for c in rename_map if c not in existing_cols]
if missing_cols:
    print("⚠️ Missing columns:", missing_cols)

# Perform safe rename and subset
safe_rename = {k: v for k, v in rename_map.items() if k in existing_cols}
gdf = gdf[list(safe_rename.keys()) +
          [c for c in gdf.columns if c.endswith("_hotspot")] +
          ["geometry"]].rename(columns=safe_rename)

# 2. Define reclassification targets (based on renamed names)
vars_to_class = list(safe_rename.values())

# 3. Reclassify
import mapclassify as mc

for col in vars_to_class:
    if col in gdf.columns:
        scheme = mc.Quantiles(gdf[col], k=5)
        gdf[f"{col}_cls"] = scheme.find_bin(gdf[col]) + 1
        print(f"{col} breaks: {scheme.bins.round(2)}")
    else:
        print(f"⚠️ Column '{col}' missing in reclassification step.")

Median Household Incomes breaks: [11.17 11.37 11.57 11.79 12.43]
Population: White Alone breaks: [0.44 0.56 0.67 0.77 0.92]
Females Living Alone - Age 35‑54 breaks: [ 3.77  6.37  9.05 11.53 21.33]
Males Living Alone - Age 35‑54 breaks: [ 3.83  7.34 10.74 14.12 44.55]
Total Population breaks: [ 2850.2  3791.   4524.8  6009.8 14151. ]
Aggregate Household Income breaks: [18.71 19.07 19.4  19.77 20.61]
Educational Attainment_Masters breaks: [0.05 0.09 0.12 0.17 0.31]
Incomes $125k+ breaks: [ 179.8  328.2  536.4  851.4 2382. ]


In [50]:
# 1–5 scores
score_cols = [f"{c}_cls" for c in vars_to_class]
gdf["stack_score"] = gdf[score_cols].sum(axis=1)

# add +3 for EACH hotspot column that is TRUE
hotspot_cols = [
    "pct_female_30_54.y_hotspot",
    "pct_male_30_54.y_hotspot",
    "white_alone_pct_hotspot",
    "log_median_household_income_hotspot",
    "log_aggregate_household_income_hotspot",
    "income_125k_plus.y_hotspot"
]

bonus = gdf[hotspot_cols].astype(bool).sum(axis=1) * 3
# 👉  If you want “+3 once if ANY hotspot is true” instead:
# bonus = (gdf[hotspot_cols].astype(bool).any(axis=1) * 3)

gdf["stack_score"] += bonus

In [51]:
# Rank the tracts: rank = 1 means highest stack_score
gdf["score_rank"] = gdf["stack_score"].rank(
    method="first",   # break ties by order of appearance
    ascending=False
).astype(int)

def rank_to_tier(r):
    if   1  <= r <=  5:  return "Tier 1"
    elif 6  <= r <= 10:  return "Tier 2"
    elif 11 <= r <= 15:  return "Tier 3"
    elif 15 <= r <= 25:  return "Tier 4"
    else:                return "Other"

gdf["tier"] = gdf["score_rank"].apply(rank_to_tier)

In [52]:
#  -------------------------------------------------
#  📌  Static PDF choropleth of Tier 1‑4 + Other
#  -------------------------------------------------
import matplotlib.pyplot as plt
from pathlib import Path
import geopandas as gpd
import matplotlib.colors as mcolors

# ── 1.  Choose a discrete color map for tiers
tier_colors = {
    "Tier 1": "#800026",   # dark red
    "Tier 2": "#BD0026",
    "Tier 3": "#E31A1C",
    "Tier 4": "#FC4E2A",
    "Other":  "#FFEDA0"    # pale yellow
}
cmap = mcolors.ListedColormap(list(tier_colors.values()))
tier_categories = list(tier_colors.keys())

# ── 2.  Plot
fig, ax = plt.subplots(figsize=(8.5, 11))
gdf.plot(
    column="tier",
    categorical=True,
    categories=tier_categories,
    cmap=cmap,
    linewidth=0.2,
    edgecolor="white",
    ax=ax
)

# ── 3.  Cosmetics
ax.set_title("Austin – Stacked Score Tiers (Top 20 Tracts Highlighted)",
             fontsize=14, pad=15)
ax.axis("off")

# Legend
handles = [
    plt.Rectangle((0,0),1,1,color=tier_colors[tier])  # dummy patches
    for tier in tier_categories
]
ax.legend(handles, tier_categories,
          title="Tier",
          loc="lower left",
          frameon=False)

# ── 4.  Save the PDF
out_pdf = Path(r"C:\Users\User\Documents\TOOLKIT\clients\arshad\outputs\pdf\arshad_tiers_py")
plt.savefig(out_pdf, dpi=300, bbox_inches="tight")
plt.close()

print(f"📄  PDF map saved ➞  {out_pdf.resolve()}")

📄  PDF map saved ➞  C:\Users\User\Documents\TOOLKIT\clients\arshad\outputs\pdf\arshad_tiers_py


In [53]:
from pathlib import Path

out_gpkg = Path(
    r"C:\Users\User\Documents\TOOLKIT\clients\arshad\cache"
    r"\arshad_austin_acs_final_stackscore.gpkg"
)

# make sure the parent folder exists
out_gpkg.parent.mkdir(parents=True, exist_ok=True)

# save — GeoPandas will create (or overwrite) the .gpkg file
gdf.to_file(out_gpkg, driver="GPKG")

print(f"💾  GeoPackage saved ➞  {out_gpkg}")

💾  GeoPackage saved ➞  C:\Users\User\Documents\TOOLKIT\clients\arshad\cache\arshad_austin_acs_final_stackscore.gpkg


In [41]:
from pathlib import Path

# Define output path
out_csv = Path("C:/Users/User/Documents/TOOLKIT/clients/arshad/outputs/composite_summary.csv")
out_csv.parent.mkdir(parents=True, exist_ok=True)

# Select and save
gdf[["stack_score", "score_rank", "tier", "N"]].to_csv(out_csv, index=False)

print(f"📄 CSV saved ➞ {out_csv}")







KeyError: "['NAME'] not in index"