## Imports

In [2]:
import pandas as pd

import math
from math import pi, pow, sin, cos, asin, sqrt, floor
from scipy import stats
import numpy as np


## Data Preprocessing

In [10]:
df = pd.read_csv('/Users/gracefujinaga/Documents/Northwestern/MSDS_460/redistricting/Assignment_3_Redo/Data/pop_data.csv')

df= df[['pop2020', 'name']]

df.to_csv('/Users/gracefujinaga/Documents/Northwestern/MSDS_460/redistricting/Assignment_3_Redo/Data/pop_data_2020.csv', index=False)

In [45]:
def parse_illinois_counties_to_adjacency_list(file_path):
    adjacency_list = {}
    current_county = None
    
    with open(file_path, 'r') as file:
        for line in file:
            # Remove leading/trailing whitespace and split by tab
            parts = line.strip().split("\t")
            
            if len(parts) >= 2 and "IL" in line and line.startswith('"'):
                # Main county is the first part
                county_name = parts[0].strip('"').replace(", IL", "")
                current_county = county_name
                
                # Initialize the adjacency list for the current county
                if county_name not in adjacency_list:
                    adjacency_list[county_name] = []
                
                # Loop through the rest of the parts (neighbor counties)
                
                neighbor_county = parts[2].strip('"').replace(", IL", "")
                if neighbor_county != current_county:
                    adjacency_list[county_name].append(neighbor_county)

            elif current_county and "IL" in line:
                # Only add neighboring counties in Illinois and remove ", IL"
                neighbor_county = line.strip().split("\t")[0].strip('"').replace(", IL", "")
                # Add the neighbor only if it's not the same as the current county
                if neighbor_county != current_county:
                    adjacency_list[current_county].append(neighbor_county)
    
    return adjacency_list

# Parse the adjacency data file
adj_list = parse_illinois_counties_to_adjacency_list('/Users/gracefujinaga/Documents/Northwestern/MSDS_460/redistricting/Assignment_3_Redo/Data/adjacency_data.txt')

# Optionally print out the adjacency list for debugging
for county, neighbors in adj_list.items():
    print(f"{county}: {neighbors}")


Adams County: ['Brown County', 'Hancock County', 'Pike County', 'Schuyler County']
Alexander County: ['Pulaski County', 'Union County']
Bond County: ['Clinton County', 'Fayette County', 'Madison County', 'Montgomery County']
Boone County: ['DeKalb County', 'McHenry County', 'Ogle County', 'Winnebago County']
Brown County: ['Adams County', 'Cass County', 'Morgan County', 'Pike County', 'Schuyler County']
Bureau County: ['Henry County', 'LaSalle County', 'Lee County', 'Marshall County', 'Putnam County', 'Stark County', 'Whiteside County']
Calhoun County: ['Greene County', 'Jersey County', 'Pike County']
Carroll County: ['Jo Daviess County', 'Ogle County', 'Stephenson County', 'Whiteside County']
Cass County: ['Brown County', 'Mason County', 'Menard County', 'Morgan County', 'Sangamon County', 'Schuyler County']
Champaign County: ['Douglas County', 'Edgar County', 'Ford County', 'McLean County', 'Piatt County', 'Vermilion County']
Christian County: ['Macon County', 'Montgomery County', 'S

In [46]:
print(len(adj_list))

102


## Post Processing

In [47]:

def filter_adjacency_list(adj_list, filter_keys):
    # new dict for filtered list
    filtered_adj_list = {}
    
    # Loop through the adjacency list
    for county, neighbors in adj_list.items():
        if county in filter_keys:
            filtered_neighbors = [neighbor for neighbor in neighbors if neighbor in filter_keys]
            filtered_adj_list[county] = filtered_neighbors
    
    return filtered_adj_list

# finds all connected components of the solution - we want 1 cc
def find_connected_components(adjacency_list):
    visited = set()  # visited set of counties
    connected_components = []  # list for connected components

    def dfs(county, component):
        visited.add(county)  # Mark the current county as visited
        component.append(county)  # Add the county to the current component
        
        # check all unvisited neighbors
        for neighbor in adjacency_list.get(county, []):
            if neighbor not in visited:
                dfs(neighbor, component)

    # Iterate over all counties in the adjacency list
    for county in adjacency_list:
        if county not in visited:  
            component = []  
            dfs(county, component)  
            connected_components.append(component)  
    
    return connected_components



In [61]:
# put in results from running the model
counties_str = "Mercer County, Henry County, Bureau County, LaSalle County, Henderson County, Warren County, Knox County, Stark County, Putnam County, Marshall County, Woodford County, Tazewell County, Mason County, Peoria County, Fulton County, McDonough County"
counties_str =" Madison County, Bond County, St. Clair County, Monroe County"
# Counties assigned: McHenry County, Kane County
# District 4: Population = 794434
# Counties assigned: Jo Daviess County, Stephenson County, Winnebago County, Boone County, Carroll County, Ogle County, DeKalb County, Whiteside County, Lee County, Kendall County
# District 5: Population = 856606
# Counties assigned: Grundy County, Will County, Kankakee County
counties = counties_str.split(", ")

# filter adj list
filter_keys = counties  
filtered_adj_list = filter_adjacency_list(adj_list, filter_keys)

# check output of the filtered adj list
print(filtered_adj_list)

# Find connected components
connected_components = find_connected_components(filtered_adj_list)

# Print the connected components
for i, component in enumerate(connected_components, 1):
    print(f"Connected Component {i}: {', '.join(component)}")

{'Bond County': [], 'Monroe County': ['St. Clair County'], 'St. Clair County': ['Monroe County']}
Connected Component 1: Bond County
Connected Component 2: Monroe County, St. Clair County


In [22]:
list(range(1,10))

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [10]:
# Original string
original_string = """
JoDaviess
Stephenson
Winnebago
Boone
McHenry
Lake
Carroll
Ogle
DeKalb
Kane
DuPage
Whiteside
Lee
Kendall
Grundy
Will
Kankakee
"""

# Convert to a comma-separated string with "County" appended
region_2 = ", ".join([f"{item} County" for item in original_string.strip().split("\n")])

print(region_2)


JoDaviess County, Stephenson County, Winnebago County, Boone County, McHenry County, Lake County, Carroll County, Ogle County, DeKalb County, Kane County, DuPage County, Whiteside County, Lee County, Kendall County, Grundy County, Will County, Kankakee County


In [11]:
# Original string
original_string = """
Rock Island
Mercer
Henry
Bureau
LaSalle
Henderson
Warren
Knox
Stark
Putnam
Marshall
Livingston
Ford
Iroquois
Vermillion
Champaign
McLean
Woodford
Tazewell
Mason
Peoria
Fulton
McDonough
"""

# Convert to a comma-separated string with "County" appended
region_3 = ", ".join([f"{item} County" for item in original_string.strip().split("\n")])

print(region_3)


Rock Island County, Mercer County, Henry County, Bureau County, LaSalle County, Henderson County, Warren County, Knox County, Stark County, Putnam County, Marshall County, Livingston County, Ford County, Iroquois County, Vermillion County, Champaign County, McLean County, Woodford County, Tazewell County, Mason County, Peoria County, Fulton County, McDonough County


In [12]:
# Original string
original_string = """
Hancock
Adams
Schuyler
Brown
Cass
Menard
Logan
Dewitt
Piatt
Douglas
Edgar
Clark
Coles
Cumberland
Effingham
Shelby
Moultrie
Macon
Christian
Montgomery
Sangamon
Morgan
Macoupin
Green
Jersey
Calhoun
Scott
Pike
"""

# Convert to a comma-separated string with "County" appended
region_4 = ", ".join([f"{item} County" for item in original_string.strip().split("\n")])

print(region_4)


Hancock County, Adams County, Schuyler County, Brown County, Cass County, Menard County, Logan County, Dewitt County, Piatt County, Douglas County, Edgar County, Clark County, Coles County, Cumberland County, Effingham County, Shelby County, Moultrie County, Macon County, Christian County, Montgomery County, Sangamon County, Morgan County, Macoupin County, Green County, Jersey County, Calhoun County, Scott County, Pike County


In [13]:
# Original string
original_string = """
Madison
Bond
Fayette
Clay
Jasper
Crawford
Lawerence
Richland
Edwards
Wabash
Wayne
Marion
Clinton
St. Clair
Monroe
Randolph
Washington
Jefferson
Perry
Jackson
Franklin
Hamilton
White
Williamson
Saline
Union
Johnson
Pope
Hardin
Alexander
Pulaski
Massac
Gallatin
"""

# Convert to a comma-separated string with "County" appended
region_5 = ", ".join([f"{item} County" for item in original_string.strip().split("\n")])

print(region_5)


Madison County, Bond County, Fayette County, Clay County, Jasper County, Crawford County, Lawerence County, Richland County, Edwards County, Wabash County, Wayne County, Marion County, Clinton County, St. Clair County, Monroe County, Randolph County, Washington County, Jefferson County, Perry County, Jackson County, Franklin County, Hamilton County, White County, Williamson County, Saline County, Union County, Johnson County, Pope County, Hardin County, Alexander County, Pulaski County, Massac County, Gallatin County


In [14]:
print(region_2)
print(region_3)
print(region_4)
print(region_5)

JoDaviess County, Stephenson County, Winnebago County, Boone County, McHenry County, Lake County, Carroll County, Ogle County, DeKalb County, Kane County, DuPage County, Whiteside County, Lee County, Kendall County, Grundy County, Will County, Kankakee County
Rock Island County, Mercer County, Henry County, Bureau County, LaSalle County, Henderson County, Warren County, Knox County, Stark County, Putnam County, Marshall County, Livingston County, Ford County, Iroquois County, Vermillion County, Champaign County, McLean County, Woodford County, Tazewell County, Mason County, Peoria County, Fulton County, McDonough County
Hancock County, Adams County, Schuyler County, Brown County, Cass County, Menard County, Logan County, Dewitt County, Piatt County, Douglas County, Edgar County, Clark County, Coles County, Cumberland County, Effingham County, Shelby County, Moultrie County, Macon County, Christian County, Montgomery County, Sangamon County, Morgan County, Macoupin County, Green County,