In [4]:
import pandas as pd
import json
from pathlib import Path


# 0) READ CSV
df = pd.read_csv("iowa_2014_precinct_database.csv")


# 1) FILTER TO STATE HOUSE 
house = df[df["RaceTitle"].astype(str).str.startswith("State Rep")].copy()

print(f"State House rows: {len(house)}")
print(f"Unique precincts: {house['shp_idx'].nunique()}")
print(f"Example RaceTitles: {sorted(house['RaceTitle'].unique())[:5]}")


house_for_district = house[house["house_district"].notna()].copy()
house_for_district = house_for_district[house_for_district["house_district"].astype(int).between(1, 100)].copy()


# 2) STANDARDIZE PARTY NAMES
party_map = {
    "Republican Party": "Republican",
    "Democratic Party": "Democratic",
    "Libertarian Party": "Libertarian",
}

house["party"] = house["PoliticalPartyName"].map(party_map).fillna("Other")
house_for_district["party"] = house_for_district["PoliticalPartyName"].map(party_map).fillna("Other")

print("\nParty mapping counts (ALL State Rep rows):")
print(house.groupby(["PoliticalPartyName", "party"]).size().to_string())


# 3) FILE 1
precinct_rshare = {}

for shp_idx, group in house.groupby("shp_idx"):
    total = group["votes"].sum()
    r_votes = group.loc[group["party"] == "Republican", "votes"].sum()
    d_votes = group.loc[group["party"] == "Democratic", "votes"].sum()

    r_share = round(float(r_votes / total * 100), 1) if total > 0 else 0.0
    twoparty_total = r_votes + d_votes
    r_twoparty = round(float(r_votes / twoparty_total * 100), 1) if twoparty_total > 0 else 0.0

    precinct_rshare[str(int(shp_idx))] = {
        "r_share": r_share,
        "r_twoparty": r_twoparty,
        "total_votes": int(total),
    }

print(f"\nprecinct_rshare: {len(precinct_rshare)} precincts")
print(f"Example (shp_idx '0'): {precinct_rshare.get('0')}")

# 4) FILE 2
results_district = {}

district_totals = (
    house_for_district.groupby(["house_district", "CandidateName", "party"])["votes"]
    .sum()
    .reset_index()
)

print(f"\nUnique house districts found: {district_totals['house_district'].nunique()}")

for district, dgroup in district_totals.groupby("house_district"):
    total_d = dgroup["votes"].sum()
    dgroup_sorted = dgroup.sort_values("votes", ascending=False)

    cand_list = []
    for _, row in dgroup_sorted.iterrows():
        cand_list.append({
            "CandidateName": row["CandidateName"],
            "party": row["party"],
            "votes": int(row["votes"]),
            "share": round(float(row["votes"] / total_d * 100), 1) if total_d > 0 else 0.0
        })

    results_district[str(int(district))] = cand_list

some_key = sorted(results_district.keys(), key=lambda x: int(x))[0]
print(f"\nExample district ({some_key}) top candidates:")
for c in results_district[some_key][:3]:
    print(f"  {c['CandidateName']} ({c['party']}): {c['votes']:,} votes ({c['share']}%)")


# 5) FILE 3
results_precinct = {}

for shp_idx, group in house.groupby("shp_idx"):
    total = group["votes"].sum()
    group_sorted = group.sort_values("votes", ascending=False)

    candidates = []
    for _, row in group_sorted.iterrows():
        candidates.append({
            "CandidateName": row["CandidateName"],
            "party": row["party"],
            "votes": int(row["votes"]),
            "share": round(float(row["votes"] / total * 100), 1) if total > 0 else 0.0
        })

    results_precinct[str(int(shp_idx))] = candidates

print(f"\nresults_precinct: {len(results_precinct)} precincts")
print(f"Example (shp_idx '0'): {results_precinct.get('0')}")


# 6) WRITE JSON FILES 
office_name = "state_house"
out_dir = Path("dashboard_data")
out_dir.mkdir(exist_ok=True)

with open(out_dir / f"precinct_rshare_{office_name}.json", "w") as f:
    json.dump(precinct_rshare, f)

with open(out_dir / f"results_district_{office_name}.json", "w") as f:
    json.dump(results_district, f)

with open(out_dir / f"results_precinct_{office_name}.json", "w") as f:
    json.dump(results_precinct, f)

print("\nJSON files created:")
print(f"  {out_dir}/precinct_rshare_{office_name}.json")
print(f"  {out_dir}/results_district_{office_name}.json")
print(f"  {out_dir}/results_precinct_{office_name}.json")


State House rows: 3903
Unique precincts: 1682
Example RaceTitles: ['State Rep. Dist. 1', 'State Rep. Dist. 10', 'State Rep. Dist. 100', 'State Rep. Dist. 11', 'State Rep. Dist. 12']

Party mapping counts (ALL State Rep rows):
PoliticalPartyName  party      
Democratic Party    Democratic     1287
Iowa Green Party    Other            14
Libertarian Party   Libertarian      87
Republican Party    Republican     1321

precinct_rshare: 1682 precincts
Example (shp_idx '0'): {'r_share': 83.8, 'r_twoparty': 100.0, 'total_votes': 173}

Unique house districts found: 100

Example district (1) top candidates:
  John H. Wills (Republican): 9,997 votes (98.9%)
  Write-in (Other): 109 votes (1.1%)

results_precinct: 1682 precincts
Example (shp_idx '0'): [{'CandidateName': 'Megan Hess', 'party': 'Republican', 'votes': 145, 'share': 83.8}, {'CandidateName': 'Terry Manwarren', 'party': 'Other', 'votes': 28, 'share': 16.2}]

JSON files created:
  dashboard_data/precinct_rshare_state_house.json
  dashboa