# Connecticut Outlier Analysis 
* @authors: Blayde Omura and Lisa Jurca. AI for redistricting final project.
* Outlier analysis with built-in gerry chain libraries.
* Includes metricts of: efficiency gap, mean-median, Democratic vs Republican won districts, and number of cut edges.

In [5]:
import pandas as pd
import geopandas as gpd
import maup
import time

In [7]:
from gerrychain import Graph, Partition, proposals, updaters, constraints, accept, MarkovChain, Election
from gerrychain.updaters import cut_edges, Tally
from gerrychain.proposals import recom
from gerrychain.accept import always_accept
from functools import partial

Import our CT shape file and do a quick check on contents

In [8]:
maup.progress.enabled = True

ct_df = gpd.read_file("./CT/CT.shp")

print(ct_df)

    STATEFP20 COUNTYFP20              NAME20  G16PREDCLI  G16PRERTRU  \
0          09        011    Montville 004-00        1175        1572   
1          09        005  Barkhamsted 001-00         905        1157   
2          09        005    Bethlehem 001-00         801        1405   
3          09        005  Bridgewater 001-00         518         571   
4          09        005       Canaan 001-00         357         203   
..        ...        ...                 ...         ...         ...   
738        09        009    Waterbury 071-02         445         419   
739        09        009    Waterbury 075-04        1102         491   
740        09        009    Waterbury 071-03        1328        1449   
741        09        009    New Haven 022-01         691          19   
742        09        009    New Haven 002-02        1258          75   

     G16PRELJOH  G16PREGSTE  G16PREOWRI  G16USSDBLU  G16USSRCAR  ...    HVAP  \
0           120          52           0        1659    

Let's make sure the columns look good

In [9]:
print(ct_df.columns)

Index(['STATEFP20', 'COUNTYFP20', 'NAME20', 'G16PREDCLI', 'G16PRERTRU',
       'G16PRELJOH', 'G16PREGSTE', 'G16PREOWRI', 'G16USSDBLU', 'G16USSRCAR',
       'G16USSLLIO', 'G16USSGRUS', 'G16USSOWRI', 'TOTPOP', 'HISP', 'NH_WHITE',
       'NH_BLACK', 'NH_AMIN', 'NH_ASIAN', 'NH_NHPI', 'NH_OTHER', 'NH_2MORE',
       'VAP', 'HVAP', 'WVAP', 'BVAP', 'AMINVAP', 'ASIANVAP', 'NHPIVAP',
       'OTHERVAP', '2MOREVAP', 'CD', 'geometry'],
      dtype='object')


# Partition and updaters

In [15]:
graph = Graph.from_geodataframe(ct_df)

# updaters
updaters = {
    "cut_edges": cut_edges,
    "population": Tally("TOTPOP", alias="population"),
}

# Define the election object
election = Election("2016_Presidential", {"Democratic": "G16PREDCLI", "Republican": "G16PRERTRU"})

# Define your updaters dictionary
updaters = {"population": Tally("TOTPOP", alias="population"), "2016_Presidential": election}

# Create an initial partition
initial_partition = Partition(graph, assignment="CD", updaters=updaters)

# Efficiency Gap Calculation

In [16]:
eff_gap = initial_partition["2016_Presidential"].efficiency_gap()

print(f"Efficiency Gap: {eff_gap}")

Efficiency Gap: 0.35716904965472723


# Mean Median Difference Calculation

In [17]:
mean_median_diff = initial_partition["2016_Presidential"].mean_median()

print(f"Mean-Median Difference: {mean_median_diff}")

Mean-Median Difference: 0.0025703346431988905


# Democratic vs Republican Won Districts

In [18]:
# The ElectionResults object for "2020_Presidential"
election_results = initial_partition["2016_Presidential"]

# Use the wins method to get the number of districts won by each party
dem_won_districts = election_results.wins("Democratic")
rep_won_districts = election_results.wins("Republican")

# Print out the counts
print(f"Democratic-won Districts: {dem_won_districts}")
print(f"Republican-won Districts: {rep_won_districts}")

Democratic-won Districts: 5
Republican-won Districts: 0


# Number of Cut Edges

In [19]:
# Ccut edges can be accessed from the partition object
number_of_cut_edges = len(initial_partition["cut_edges"])

print(f"Number of cut edges: {number_of_cut_edges}")

Number of cut edges: 206
