
# heap-assignment-5 (Huntington-Hill via Max-Heap)

**Author:** Jack Henney  
**Course:** CS315 — Algorithms and Data Structures

**GitHub Repository Link:**  
[Click to view this notebook on GitHub](https://github.com/jackhenney/CS315/blob/main/heap-assignment-5.ipynb)

This notebook computes the U.S. House apportionment using 2020 state populations and the Huntington-Hill (Equal Proportions) method, implemented with a max-heap, and verifies it against `StatesPopulation.csv`.


In [None]:

# ========================
# IMPORTS
# ========================
# math  : for square roots used in the Huntington-Hill priority formula
# heapq : Python's priority queue; it's a MIN-heap, so we will push NEGATIVE priorities to simulate a MAX-heap
# pandas: for tabular data handling and CSV comparison
import math
import heapq
import pandas as pd


In [None]:

# ========================
# 2020 STATE POPULATION DATA
# ========================
statePop = [
    5024279,733391,7151502,3011524,39538223,5773714,3605944,989948,21538187,10711908,
    1455271,1839106,12812508,6785528,3190369,2937880,4505836,4657757,1362359,6177224,
    7029917,10077331,5706494,2961279,6154913,1084225,1961504,3104614,1377529,9288994,
    2117522,20201249,10439388,779094,11799448,3959353,4237256,13002700,1097379,5118425,
    886667,6910840,29145505,3271616,643077,8631393,7705281,1793716,5893718,576851
]

stateNames = [
    "Alabama","Alaska","Arizona","Arkansas","California","Colorado","Connecticut","Delaware","Florida","Georgia",
    "Hawaii","Idaho","Illinois","Indiana","Iowa","Kansas","Kentucky","Louisiana","Maine","Maryland","Massachusetts",
    "Michigan","Minnesota","Mississippi","Missouri","Montana","Nebraska","Nevada","NewHampshire","NewJersey",
    "NewMexico","NewYork","NorthCarolina","NorthDakota","Ohio","Oklahoma","Oregon","Pennsylvania","RhodeIsland",
    "SouthCarolina","SouthDakota","Tennessee","Texas","Utah","Vermont","Virginia","Washington","WestVirginia",
    "Wisconsin","Wyoming"
]

name_fix = {
    "NewHampshire": "New Hampshire",
    "NewJersey": "New Jersey",
    "NewMexico": "New Mexico",
    "NewYork": "New York",
    "NorthCarolina": "North Carolina",
    "NorthDakota": "North Dakota",
    "RhodeIsland": "Rhode Island",
    "SouthCarolina": "South Carolina",
    "SouthDakota": "South Dakota",
    "WestVirginia": "West Virginia"
}
stateNamesFixed = [name_fix.get(s, s) for s in stateNames]

assert len(statePop) == 50
assert len(stateNames) == 50
assert len(stateNamesFixed) == 50


In [None]:

# ========================
# HELPER FUNCTIONS
# ========================
def hh_priority(population, n):
    """Return Huntington-Hill priority P / sqrt(n(n+1)) for the next seat."""
    return population / math.sqrt(n * (n + 1))

def clean_state_name(s):
    """Normalize state name text read from CSV to avoid whitespace/encoding mismatches."""
    s = str(s)
    return s.replace("\u00a0", "").replace("\xa0", "").strip()


In [None]:

# ========================
# INITIALIZE SEATS AND HEAP
# ========================
# Start with 1 seat per state (constitutional minimum).
TARGET_SEATS = 435
seats = [1] * 50
total_seats = 50

# Priority queue (max-heap simulated using negative priorities).
heap = []
for i in range(50):
    # For n=1 the divisor is sqrt(2)
    initial_priority = statePop[i] / math.sqrt(2.0)
    heapq.heappush(heap, (-initial_priority, i))

print("Initial seats:", total_seats, "; heap entries:", len(heap))


In [None]:

# ========================
# ALLOCATE REMAINING SEATS
# ========================
# Pop the highest-priority state, give it one more seat, then push its updated priority.
while total_seats < TARGET_SEATS:
    neg_pr, idx = heapq.heappop(heap)    # highest real priority (smallest negative)
    seats[idx] += 1                       # give the seat
    total_seats += 1
    n = seats[idx]                        # new seat count for this state
    next_pr = hh_priority(statePop[idx], n)
    heapq.heappush(heap, (-next_pr, idx)) # push updated priority (as negative)
print("Final total seats:", total_seats)


In [None]:

# ========================
# BUILD RESULTS TABLE
# ========================
import pandas as pd
apportionment = pd.DataFrame({
    "State": stateNamesFixed,
    "Population_2020": statePop,
    "Representatives_Calc": seats
}).sort_values("State").reset_index(drop=True)

print("States:", len(apportionment))
print("Sum seats:", apportionment["Representatives_Calc"].sum())
apportionment.head(10)


In [None]:

# ========================
# VERIFY AGAINST CSV
# ========================
actual = pd.read_csv("StatesPopulation.csv")
actual["State"] = actual["State"].map(clean_state_name)
actual = actual.rename(columns={"Number of Voting Seats in Congress": "Representatives_Actual"})

merged = pd.merge(
    apportionment.rename(columns={"State": "StateKey"}),
    actual.rename(columns={"State": "StateKey"})[["StateKey", "Representatives_Actual"]],
    on="StateKey", how="left"
)
merged["Match"] = merged["Representatives_Calc"] == merged["Representatives_Actual"]
print("Matches:", merged["Match"].sum(), "/ 50")
merged.sort_values("StateKey").reset_index(drop=True).head(10)


In [None]:

# ========================
# FINAL ASSERTIONS
# ========================
assert merged["Representatives_Calc"].sum() == 435, "Total calculated seats must be 435"
assert merged["Representatives_Actual"].sum() == 435, "Total official seats must be 435"
assert merged["Match"].all(), "All states must match the official apportionment"
print("All checks passed. Ready to submit.")
