
# HeapHHApportionment-2020 (Huntington–Hill via Max-Heap)

**Author:** Jack Henney  
**Goal:** Use 2020 Census data to apportion 435 U.S. House seats across 50 states using the Huntington–Hill method (implemented with a max-heap), and verify against the official results provided in `StatesPopulation.csv`.

> 🔗 **GitHub Link:** _After you upload this notebook to your GitHub repo, replace this line with a clickable link to the file in your repo (this is part of the grading!)._


In [None]:

import math
import heapq
import pandas as pd


In [None]:

# 2020 state populations (order aligned with stateNames)
statePop = [
    5024279,733391,7151502,3011524,39538223,5773714,3605944,989948,21538187,10711908,
    1455271,1839106,12812508,6785528,3190369,2937880,4505836,4657757,1362359,6177224,
    7029917,10077331,5706494,2961279,6154913,1084225,1961504,3104614,1377529,9288994,
    2117522,20201249,10439388,779094,11799448,3959353,4237256,13002700,1097379,5118425,
    886667,6910840,29145505,3271616,643077,8631393,7705281,1793716,5893718,576851
]

# NOTE: Source list omitted spaces in a few multi-word names (e.g., NewHampshire).
# We'll keep this original list but also build a "fixed" display name list when comparing to CSV.
stateNames = [
    "Alabama","Alaska","Arizona","Arkansas","California","Colorado","Connecticut","Delaware","Florida","Georgia",
    "Hawaii","Idaho","Illinois","Indiana","Iowa","Kansas","Kentucky","Louisiana","Maine","Maryland","Massachusetts",
    "Michigan","Minnesota","Mississippi","Missouri","Montana","Nebraska","Nevada","NewHampshire","NewJersey",
    "NewMexico","NewYork","NorthCarolina","NorthDakota","Ohio","Oklahoma","Oregon","Pennsylvania","RhodeIsland",
    "SouthCarolina","SouthDakota","Tennessee","Texas","Utah","Vermont","Virginia","Washington","WestVirginia",
    "Wisconsin","Wyoming"
]

# Map to fix the few entries missing spaces so they match the CSV names
name_fix = {
    "NewHampshire": "New Hampshire",
    "NewJersey": "New Jersey",
    "NewMexico": "New Mexico",
    "NewYork": "New York",
    "NorthCarolina": "North Carolina",
    "NorthDakota": "North Dakota",
    "RhodeIsland": "Rhode Island",
    "SouthCarolina": "South Carolina",
    "SouthDakota": "South Dakota",
    "WestVirginia": "West Virginia"
}
stateNamesFixed = [name_fix.get(s, s) for s in stateNames]


In [None]:

# Huntington–Hill via max-heap
TARGET_SEATS = 435
seats = [1] * 50   # each state starts with one
total_seats = 50

# Build initial heap with n=1 => priority = pop / sqrt(2)
heap = []
for i in range(50):
    pr = statePop[i] / (2 ** 0.5)
    heapq.heappush(heap, (-pr, i))  # store negative for max-heap

while total_seats < TARGET_SEATS:
    neg_pr, idx = heapq.heappop(heap)
    seats[idx] += 1
    total_seats += 1
    n = seats[idx]
    new_pr = statePop[idx] / ((n * (n + 1)) ** 0.5)
    heapq.heappush(heap, (-new_pr, idx))

apportionment = pd.DataFrame({
    "State": stateNamesFixed,          # use fixed names for display/merge
    "Population_2020": statePop,
    "Representatives_Calc": seats
}).sort_values("State").reset_index(drop=True)

print("Total seats allocated:", apportionment["Representatives_Calc"].sum())
apportionment.head(10)


In [None]:

# Compare to actual from StatesPopulation.csv (in the same folder)
actual = pd.read_csv("StatesPopulation.csv")

# Clean non-breaking spaces and trim
def clean(s):
    s = str(s)
    s = s.replace("\u00a0", "").replace("\xa0","")
    return s.strip()

if "Number of Voting Seats in Congress" not in actual.columns:
    raise ValueError("Expected 'Number of Voting Seats in Congress' in StatesPopulation.csv")

actual["State"] = actual["State"].map(clean)
actual = actual.rename(columns={"Number of Voting Seats in Congress": "Representatives_Actual"})

lhs = apportionment.rename(columns={"State": "StateKey"}).copy()
rhs = actual.rename(columns={"State": "StateKey"})[["StateKey","Representatives_Actual"]].copy()

merged = pd.merge(lhs, rhs, on="StateKey", how="left")
merged["Match"] = merged["Representatives_Calc"] == merged["Representatives_Actual"]
print(f"Matches: {merged['Match'].sum()}/50")
merged.sort_values("StateKey").reset_index(drop=True)


In [None]:

assert apportionment["Representatives_Calc"].sum() == 435, "Total seats must be 435"
assert len(apportionment) == 50, "There must be exactly 50 states"
print("Assertions passed. ✅ Add your GitHub link up top after uploading!")
