# Algorithmic Redisctricting - Group: James, Garo, Manny

### Data Preparation

First, we will set up this notebook so that it will display multiple outputs for each cell if needed, as well as load the necessary libraries.

In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [5]:
from pulp import LpProblem, LpVariable, lpSum, LpBinary, PULP_CBC_CMD, LpStatus
import pandas as pd

# Assuming `indiana_population_cleaned` is already loaded and cleaned as previously shown
#indiana_population_cleaned = pd.read_csv('/Users/jck/Documents/MSDS 460/Module 6/Assignment3/Indiana Population Dataset.csv')
# For adjacency, we'll need a data structure like this, assuming it's available:
# adjacency = {'County1': ['County2', 'County3'], ...}


In [7]:
import pandas as pd
import os

os.getcwd()

#set working Directory to where class corpus is saved.
os.chdir('/Users/mhi573/OneDrive - Northwestern University/Documents/MSDS460/A3. Algorithmic Redistricting/Integer-Programming---Algorithmic-Redistricting')
#os.chdir('/Users/mhurt/Documents/MSDS460')

#read in class corpus csv into python
data = pd.read_csv('Indiana Population Dataset.csv')

# show first five rows of the data
data.head()
# show number of columns and rows
data.shape

'C:\\Users\\mhi573\\OneDrive - Northwestern University\\Documents\\MSDS460\\A3. Algorithmic Redistricting\\Integer-Programming---Algorithmic-Redistricting'

Unnamed: 0,County,District,Latitude,Longitude,White Alone,Black Alone,American Ind. or Alaskan Native Alone,Asian Alone,Native Hawaiian and Other Pac. Isl. Alone,Two or More Races,Total,% White
0,"Lake County, IN",1,41.42,-87.47,354449,122399,2813,8614,335,11079,499689,71%
1,"Porter County, IN",1,41.46,-87.06,159440,8665,687,2627,75,3297,174791,91%
2,"Elkhart County, IN",2,41.6,-85.87,184740,12423,1424,2702,192,5409,206890,89%
3,"Fulton County, IN",2,41.05,-86.26,19504,191,197,143,6,286,20327,96%
4,"Kosciusko County, IN",2,41.24,-85.85,76886,1019,405,1250,58,1208,80826,95%


(92, 12)

## Let's setup the problem

In [16]:
import pandas as pd
import pulp

# Read county data into a DataFrame
df = pd.read_csv('Indiana Population Dataset.csv')  

# Define the number of groups
num_groups = 9

# Create a dictionary from the DataFrame for easier access
county_data = {}
for index, row in df.iterrows():
    county_name = row['County']
    population = row['Total']
    latitude = row['Latitude']
    longitude = row['Longitude']
    county_data[county_name] = {'population': population, 'latitude': latitude, 'longitude': longitude}

# Create a PuLP problem
prob = pulp.LpProblem("County_Grouping", pulp.LpMinimize)

# Define decision variables
assign = pulp.LpVariable.dicts("Assign", ((county, group) for county in county_data for group in range(num_groups)), 0, 1, pulp.LpBinary)


In [12]:

# Define objective function (minimize imbalance in population)
prob += pulp.lpSum([county_data[county]['population'] * assign[county][group] for county in county_data for group in range(num_groups)])

# Constraint: Each county must be assigned to exactly one group
for county in county_data:
    prob += pulp.lpSum([assign[county][group] for group in range(num_groups)]) == 1

# Constraint: Population balance across groups
total_population = sum(county_data[county]['population'] for county in county_data)
group_population = total_population // num_groups
for group in range(num_groups):
    prob += pulp.lpSum([county_data[county]['population'] * assign[county][group] for county in county_data]) >= group_population
    prob += pulp.lpSum([county_data[county]['population'] * assign[county][group] for county in county_data]) <= group_population + 1

    
# Constraint: If population is greater than 761,000, assign county to its own group
for county in county_data:
    if county_data[county]['population'] > 761000:
        prob += pulp.lpSum([assign[county][group] for group in range(num_groups)]) == 1

        
# Constraint: Adjacent counties must be in the same group
# Add your adjacency constraints here using latitude and longitude data

# Solve the ILP
prob.solve()

# Output the results
for county in county_data:
    for group in range(num_groups):
        if pulp.value(assign[county][group]) == 1:
            print(f"{county} is assigned to group {group+1}")


KeyError: 'Lake County, IN'

In [9]:

# Define the number of districts
num_districts = 9

# Create the model
model = LpProblem("Indiana_Redistricting", LpBinary)

# Decision variables: x[county][district] == 1 if county is in district, 0 otherwise
x = LpVariable.dicts("county_district", (indiana_population_cleaned['Geography'], range(num_districts)), cat='Binary')

# Objective: None specified, could be minimizing the standard deviation of population across districts
# This would ensure compactness by minimizing the population spread

# Constraints

# Each county must be in exactly one district
for county in indiana_population_cleaned['Geography']:
    model += lpSum(x[county][d] for d in range(num_districts)) == 1, f"one_district_per_county_{county}"

# Population balance across districts
avg_population = indiana_population_cleaned['Total'].sum() / num_districts
for d in range(num_districts):
    model += lpSum(indiana_population_cleaned.loc[indiana_population_cleaned['Geography'] == county, 'Total'].item() * x[county][d] for county in indiana_population_cleaned['Geography']) \
             <= avg_population * 1.05, f"max_population_district_{d}"
    model += lpSum(indiana_population_cleaned.loc[indiana_population_cleaned['Geography'] == county, 'Total'].item() * x[county][d] for county in indiana_population_cleaned['Geography']) \
             >= avg_population * 0.95, f"min_population_district_{d}"

# Adjacency constraint (example for a single district, should be expanded)
# model += x['County1'][0] + x['County2'][0] <= 1, "adjacency_example"

# Solve the problem
solver = PULP_CBC_CMD(msg=True)
model.solve(solver)

# Output results
print("Status:", LpStatus[model.status])
for v in model.variables():
    if v.varValue != 0:
        print(v.name, "=", v.varValue)

# Check outputs and interpret results


KeyError: 'Geography'