In [1]:
link = '/Users/jck/Documents/MSDS 460/Module 6/Assignment3/Indiana Population Dataset.csv'
import pandas as pd
import pulp

# Load and prepare the cleaned data
data = pd.read_csv(link)  # Replace 'link' with the actual URL or file path

# Ensure 'Total' and '% White' columns are correctly typed
data['Total'] = pd.to_numeric(data['Total'], errors='coerce')
data['% White'] = pd.to_numeric(data['% White'].str.rstrip('%'), errors='coerce') / 100

# Drop any rows where conversions failed (if any)
data.dropna(subset=['Total', '% White'], inplace=True)

# Define the problem
problem = pulp.LpProblem("Indiana_Redistricting", pulp.LpMinimize)

# Create binary variables: x[i, j] is 1 if county i is assigned to district j, else 0
x = pulp.LpVariable.dicts("county_district",
                          ((i, j) for i in data.index for j in range(9)),
                          cat='Binary')

# Pre-calculate the average % White across all counties
avg_white = data['% White'].mean()

# Objective Function: Minimize the difference in racial composition (% White) across districts
problem += pulp.lpSum([x[i, j] * (data.loc[i, '% White'] - avg_white)**2 for i in data.index for j in range(9)])

# Constraints
# Each county must be in exactly one district
for i in data.index:
    problem += pulp.lpSum([x[i, j] for j in range(9)]) == 1, f"One_district_{i}"

# Balance the total population in each district
average_population = data['Total'].sum() / 9
for j in range(9):
    problem += pulp.lpSum([x[i, j] * data.loc[i, 'Total'] for i in data.index]) <= 1.05 * average_population, f"Max_Pop_{j}"
    problem += pulp.lpSum([x[i, j] * data.loc[i, 'Total'] for i in data.index]) >= 0.95 * average_population, f"Min_Pop_{j}"

# Solve the problem
problem.solve()

# Check the status of the solution and print the results if solved successfully
if problem.status == pulp.LpStatusOptimal:
    for v in problem.variables():
        if v.varValue is not None and v.varValue > 0:
            print(v.name, "=", v.varValue)
else:
    print("Problem not solved to optimality. Status:", pulp.LpStatus[problem.status])



Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /Users/jck/anaconda3/lib/python3.11/site-packages/pulp/solverdir/cbc/osx/64/cbc /var/folders/xc/3wsqwp_x79lbx1l9yz7pgf780000gn/T/980da585065842358d479e0dca18b108-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /var/folders/xc/3wsqwp_x79lbx1l9yz7pgf780000gn/T/980da585065842358d479e0dca18b108-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 23 COLUMNS
At line 25 RHS
At line 44 BOUNDS
At line 46 ENDATA
Problem MODEL has 18 rows, 1 columns and 0 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Empty problem - 18 rows, 1 columns and 0 elements
Optimal - objective value 0
Optimal objective 0 - 0 iterations time 0.002
Option for printingOptions changed from normal to all
Total time (CPU seconds):       0.00   (Wallclock seconds):       0.01

