# Connecticut Outlier Analysis 
* @authors: Blayde Omura and Lisa Jurca. AI for redistricting final project.
* Outlier analysis with built-in gerry chain libraries.
* Includes metricts of: efficiency gap, mean-median, Democratic vs Republican won districts, and number of cut edges.

In [3]:
import pandas as pd
import geopandas as gpd
import maup
import time

In [4]:
from gerrychain import Graph, Partition, proposals, updaters, constraints, accept, MarkovChain, Election
from gerrychain.updaters import cut_edges, Tally
from gerrychain.proposals import recom
from gerrychain.accept import always_accept
from functools import partial

Import our CT shape file and do a quick check on contents

In [5]:
maup.progress.enabled = True

ct_df = gpd.read_file("./CT/CT.shp")

print(ct_df)

    STATEFP20 COUNTYFP20              NAME20  G16PREDCLI  G16PRERTRU  \
0          09        011    Montville 004-00        1175        1572   
1          09        005  Barkhamsted 001-00         905        1157   
2          09        005    Bethlehem 001-00         801        1405   
3          09        005  Bridgewater 001-00         518         571   
4          09        005       Canaan 001-00         357         203   
..        ...        ...                 ...         ...         ...   
738        09        009    Waterbury 071-02         445         419   
739        09        009    Waterbury 075-04        1102         491   
740        09        009    Waterbury 071-03        1328        1449   
741        09        009    New Haven 022-01         691          19   
742        09        009    New Haven 002-02        1258          75   

     G16PRELJOH  G16PREGSTE  G16PREOWRI  G16USSDBLU  G16USSRCAR  ...    HVAP  \
0           120          52           0        1659    

Let's make sure the columns look good

In [6]:
print(ct_df.columns)

Index(['STATEFP20', 'COUNTYFP20', 'NAME20', 'G16PREDCLI', 'G16PRERTRU',
       'G16PRELJOH', 'G16PREGSTE', 'G16PREOWRI', 'G16USSDBLU', 'G16USSRCAR',
       'G16USSLLIO', 'G16USSGRUS', 'G16USSOWRI', 'TOTPOP', 'HISP', 'NH_WHITE',
       'NH_BLACK', 'NH_AMIN', 'NH_ASIAN', 'NH_NHPI', 'NH_OTHER', 'NH_2MORE',
       'VAP', 'HVAP', 'WVAP', 'BVAP', 'AMINVAP', 'ASIANVAP', 'NHPIVAP',
       'OTHERVAP', '2MOREVAP', 'CD', 'geometry'],
      dtype='object')


# Partition and updaters

In [9]:
graph = Graph.from_geodataframe(ct_df)

# updaters
updaters = {
    "cut_edges": cut_edges,
    "population": Tally("TOTPOP", alias="population"),
}

# Define the election object
elections = [
    Election("2016_Presidential", {"Democratic": "G16PREDCLI", "Republican": "G16PRERTRU"}),
    Election("2016_Senate", {"Democratic": "G16USSDBLU", "Republican": "G16USSRCAR"})
]

# Define your updaters dictionary
# updaters = {"population": Tally("TOTPOP", alias="population"), "2016_Presidential": election, "2016_Senate": election}
updaters = {"population": Tally("TOTPOP", alias="population")}

election_updaters = {election.name: election for election in elections}
updaters.update(election_updaters)

# Create an initial partition
initial_partition = Partition(graph, assignment="CD", updaters=updaters)

# Efficiency Gap Calculation

In [10]:
pres_eff_gap = initial_partition["2016_Presidential"].efficiency_gap()
sen_eff_gap = initial_partition["2016_Senate"].efficiency_gap()

print(f"Presidential Efficiency Gap: {pres_eff_gap}")
print(f"Senate Efficiency Gap: {sen_eff_gap}")

Presidential Efficiency Gap: 0.35716904965472723
Senate Efficiency Gap: 0.20788267732421292


# Mean Median Difference Calculation

In [11]:
pres_mean_median_diff = initial_partition["2016_Presidential"].mean_median()
sen_mean_median_diff = initial_partition["2016_Senate"].mean_median()

print(f"Presidential Mean-Median Difference: {pres_mean_median_diff}")
print(f"Senate Mean-Median Difference: {sen_mean_median_diff}")

Presidential Mean-Median Difference: 0.0025703346431988905
Senate Mean-Median Difference: -0.014978682744356098


# Democratic vs Republican Won Districts

In [13]:
# The ElectionResults object for "2020_Presidential"
pres_election_results = initial_partition["2016_Presidential"]
sen_election_results = initial_partition["2016_Senate"]


# Use the wins method to get the number of districts won by each party
pres_dem_won_districts = pres_election_results.wins("Democratic")
pres_rep_won_districts = pres_election_results.wins("Republican")

sen_dem_won_districts = sen_election_results.wins("Democratic")
sen_rep_won_districts = sen_election_results.wins("Republican")

# Print out the counts
print(f"Presidential Democratic-won Districts: {pres_dem_won_districts}")
print(f"Presidential Republican-won Districts: {pres_rep_won_districts}")
print(f"Senate Democratic-won Districts: {sen_dem_won_districts}")
print(f"Senate Republican-won Districts: {sen_rep_won_districts}")

Presidential Democratic-won Districts: 5
Presidential Republican-won Districts: 0
Senate Democratic-won Districts: 5
Senate Republican-won Districts: 0


# Number of Cut Edges

In [14]:
# Ccut edges can be accessed from the partition object
number_of_cut_edges = len(initial_partition["cut_edges"])

print(f"Number of cut edges: {number_of_cut_edges}")

Number of cut edges: 206
