In [1]:
# import libraries
import numpy as np
import geopandas as gpd   
import pandas as pd 
from math import pi, pow, sin, cos, asin, sqrt, floor
from pulp import *

In [2]:
def degrees_to_radians(x):
     return((pi / 180) * x)

In [3]:
def lon_lat_distance_miles(lon_a, lat_a, lon_b, lat_b):
    radius_of_earth = 24872 / (2 * pi)
    c = sin((degrees_to_radians(lat_a) - \
    degrees_to_radians(lat_b)) / 2)**2 + \
    cos(degrees_to_radians(lat_a)) * \
    cos(degrees_to_radians(lat_b)) * \
    sin((degrees_to_radians(lon_a) - \
    degrees_to_radians(lon_b))/2)**2
    return(2 * radius_of_earth * (asin(sqrt(c)))) 

In [4]:
def lon_lat_distance_meters (lon_a, lat_a, lon_b, lat_b):
    return(lon_lat_distance_miles(lon_a, lat_a, lon_b, lat_b) * 1609.34)


In [5]:
# read in file with county id, county names, latitudes, longitudes, and populations
file_path = '/Users/Jai/Documents/Git_remote/Decision_analytics/Module6/michigan_counties.xlsx'
michigan_counties = pd.read_excel(file_path, index_col = None)


In [6]:
michigan_counties.head()

Unnamed: 0,count_id,county_names,latitude,longitude,pop2020
0,0,Leelanau,45.151771,-86.038496,22870
1,1,Clinton,42.943652,-84.601517,79748
2,2,Wexford,44.338367,-85.578414,34196
3,3,Branch,41.916119,-85.059044,44531
4,4,Ionia,42.945094,-85.074603,66809


In [7]:
michigan_counties.shape

(83, 5)

In [8]:
# remove population to allow easy joining of long and lat for each county pair
lat_lon = ['county_names', 'latitude', 'longitude']
lat_lon = michigan_counties[lat_lon]


In [9]:
lat_lon

Unnamed: 0,county_names,latitude,longitude
0,Leelanau,45.151771,-86.038496
1,Clinton,42.943652,-84.601517
2,Wexford,44.338367,-85.578414
3,Branch,41.916119,-85.059044
4,Ionia,42.945094,-85.074603
...,...,...,...
78,Lapeer,43.090147,-83.221784
79,Arenac,44.042885,-83.747242
80,Charlevoix,45.502498,-85.373250
81,Alcona,44.683623,-83.129008


In [11]:
# create list of county names for pairing        
county_names = michigan_counties['county_names'].to_numpy()


In [12]:
county_names

array(['Leelanau', 'Clinton', 'Wexford', 'Branch', 'Ionia', 'Mecosta',
       'Keweenaw', 'Isabella', 'Schoolcraft', 'Crawford', 'St. Clair',
       'Missaukee', 'Presque Isle', 'Saginaw', 'Houghton', 'Van Buren',
       'Ottawa', 'Berrien', 'Montmorency', 'Shiawassee', 'Otsego',
       'Lenawee', 'Newaygo', 'Roscommon', 'Marquette', 'Alger', 'Iron',
       'Barry', 'Emmet', 'Osceola', 'Antrim', 'Jackson', 'Manistee',
       'Calhoun', 'Tuscola', 'Gladwin', 'Menominee', 'Ontonagon',
       'Gogebic', 'Macomb', 'Midland', 'Kent', 'St. Joseph', 'Ogemaw',
       'Oceana', 'Iosco', 'Alpena', 'Sanilac', 'Oscoda', 'Washtenaw',
       'Kalamazoo', 'Ingham', 'Dickinson', 'Bay', 'Benzie', 'Huron',
       'Clare', 'Luce', 'Genesee', 'Montcalm', 'Cheboygan', 'Eaton',
       'Chippewa', 'Lake', 'Kalkaska', 'Mason', 'Mackinac', 'Oakland',
       'Monroe', 'Allegan', 'Wayne', 'Muskegon', 'Gratiot',
       'Grand Traverse', 'Baraga', 'Delta', 'Hillsdale', 'Cass', 'Lapeer',
       'Arenac', 'Charlevoi

In [13]:
# create each unique pair
pairs = []

for i in range(len(county_names)):
    for j in range(i + 1, len(county_names)):
        pairs.append((county_names[i], county_names[j]))

col_names = ['county_1', 'county_2']
                
county_pairs = pd.DataFrame(pairs, columns = col_names)

In [14]:
county_pairs

Unnamed: 0,county_1,county_2
0,Leelanau,Clinton
1,Leelanau,Wexford
2,Leelanau,Branch
3,Leelanau,Ionia
4,Leelanau,Mecosta
...,...,...
3398,Arenac,Alcona
3399,Arenac,Livingston
3400,Charlevoix,Alcona
3401,Charlevoix,Livingston


In [15]:
# read in shapefile
file_path = '/Users/Jai/Documents/Git_remote/Decision_analytics/Module6/michigan_counties.geojson'
shapefile_michigan = gpd.read_file(file_path)
map_population_by_county_data = shapefile_michigan.merge(michigan_counties, left_on = 'name', right_on = 'county_names', suffixes = ('_left', '_right'))

# drop unwanted columns
drop_cols = ['statefp', 'countyfp', 'countyns', 'namelsad', 'lsad', 'csafp', 'classfp', 'metdivfp', 'mtfcc', 'cbsafp', 'state_name', 'countyfp_nozero', 'count_id', 'county_names', 'aland', 'awater', 'funcstat']
map_population_by_county_data = map_population_by_county_data.drop(columns = drop_cols)

# check population df; believe that 'geometry' is what's used to create the shape of the state in gpd
map_population_by_county_data.head()


Unnamed: 0,geo_point_2d,geoid,name,stusab,intptlat,intptlon,geometry,latitude,longitude,pop2020
0,"{'lon': -86.0384960523, 'lat': 45.151770859}",26089,Leelanau,MI,45.1461816,-86.051574,"POLYGON ((-85.56175 44.95226, -85.56209 44.950...",45.151771,-86.038496,22870
1,"{'lon': -84.6015165533, 'lat': 42.9436523662}",26037,Clinton,MI,42.950455,-84.5916949,"POLYGON ((-84.83762 43.03264, -84.83754 43.032...",42.943652,-84.601517,79748
2,"{'lon': -85.5784138137, 'lat': 44.3383668115}",26165,Wexford,MI,44.3313751,-85.5700462,"POLYGON ((-85.81909 44.42450, -85.81910 44.425...",44.338367,-85.578414,34196
3,"{'lon': -85.0590443604, 'lat': 41.9161186535}",26023,Branch,MI,41.9184551,-85.0668852,"POLYGON ((-85.29293 41.98482, -85.29293 41.984...",41.916119,-85.059044,44531
4,"{'lon': -85.0746031181, 'lat': 42.9450938315}",26067,Ionia,MI,42.9446503,-85.073766,"POLYGON ((-85.07503 43.12021, -85.06470 43.120...",42.945094,-85.074603,66809


In [17]:

# model variables
n_counties = 83
n_districts = 14

#n_counties = michigan_counties['county_names'].nunique()

#n_districts = districts['district_name'].nunique()


In [26]:
model = LpProblem('Compacted-Redistricting', LpMinimize) 



In [None]:
# Decision variable: x[i][j] is 1 if county i is assigned to district j, 0 otherwise
x = [[LpVariable(f"x_{i}_{j}", cat="Binary") for j in range(n_districts)] for i in range(n_counties)]
x

In [35]:
# Objective: Minimize the number of counties that are assigned to multiple districts
model += lpSum(x[i][j] for i in range(n_counties) for j in range(n_districts)) - n_counties


In [36]:
# Constraint: Each county is assigned to exactly one district
for i in range(n_counties):
    model += lpSum(x[i][j] for j in range(n_districts)) == 1


In [37]:
# Solve the problem
model.solve()


Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /Users/Jai/anaconda3/lib/python3.7/site-packages/pulp/apis/../solverdir/cbc/osx/64/cbc /var/folders/mg/gn4s4j_94_j8j560qd6pnnzm0000gp/T/632cf53d00c046f49a4f97f3d3fa8142-pulp.mps timeMode elapsed branch printingOptions all solution /var/folders/mg/gn4s4j_94_j8j560qd6pnnzm0000gp/T/632cf53d00c046f49a4f97f3d3fa8142-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 88 COLUMNS
At line 4737 RHS
At line 4821 BOUNDS
At line 5984 ENDATA
Problem MODEL has 83 rows, 1162 columns and 1162 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 83 - 0.00 seconds
Cgl0004I processed model has 0 rows, 0 columns (0 integer (0 of which binary)) and 0 elements
Cbc3007W No integer variables - nothing to do
Cuts at root node changed objective from 83 to -1.79769e+308
Probing was tried 0 times and created 0 cuts of which 0 

1

In [38]:

# Print the results
for j in range(n_districts):
    district_counties = [i for i in range(n_counties) if x[i][j].varValue == 1]
    print(f"District {j+1} contains counties: {district_counties}")

District 1 contains counties: [1, 7, 11, 15, 16, 23, 28, 33, 34, 43, 45, 46, 55, 58, 59, 70, 77]
District 2 contains counties: [0, 5, 6, 8, 13, 21, 22, 26, 27, 29, 30, 31, 37, 52, 53, 57, 65, 76, 79, 82]
District 3 contains counties: []
District 4 contains counties: [19, 47, 67]
District 5 contains counties: [41, 42]
District 6 contains counties: [24]
District 7 contains counties: [49]
District 8 contains counties: [9, 36, 73, 81]
District 9 contains counties: [3, 14, 32, 38, 48, 56, 69, 75]
District 10 contains counties: [2, 4, 10, 12, 17, 18, 20, 25, 35, 39, 40, 44, 50, 51, 54, 61, 62, 63, 64, 66, 68, 71, 72, 74, 78]
District 11 contains counties: [60, 80]
District 12 contains counties: []
District 13 contains counties: []
District 14 contains counties: []
