# GerryChain Demo!!

First, git clone the CO shapefile from https://github.com/mggg-states/CO-shapefiles

In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'svg'

import matplotlib.pyplot as plt
from gerrychain import (GeographicPartition, Partition, Graph, MarkovChain,
                        proposals, updaters, constraints, accept, Election)
from gerrychain.proposals import recom
from functools import partial
import pandas

###  Load the graph we want to use.

In [None]:
graph = Graph.from_file("CO-shapefiles/co_precincts.shp")

### Inspecting the contents of the graph

In [None]:
graph.nodes[20]

### Configure our elections, telling GerryChain which column names for our shapefiles correspond to vote totals.

In [None]:
elections = [
    Election("GOV18", {"Democratic": "GOV18D", "Republican": "GOV18R"})
]

### Configure our updaters (everything we want to compute for each plan in the ensemble).

In [None]:
# Population updater, for computing how close to equality the district
# populations are. "TOTPOP" is the population column from our shapefile.
my_updaters = {"population": updaters.Tally("TOTPOP", alias="population")}

In [None]:
# Election updaters, for computing election results using the vote totals from our shapefile.
election_updaters = {election.name: election for election in elections}
my_updaters.update(election_updaters)

### Instantiate the initial state of our Markov chain, using the Congressional districting plan.

In [None]:
initial_partition = GeographicPartition(graph, assignment="CD116FP", updaters=my_updaters)

In [None]:
initial_partition.plot()

### Set up a proposal mechanism

In [None]:
# The recom proposal needs to know the ideal population for the districts so that
# we can improve speed by bailing early on unbalanced partitions.

ideal_population = sum(initial_partition["population"].values()) / len(initial_partition)

# We use functools.partial to bind the extra parameters (pop_col, pop_target, epsilon, node_repeats)
# of the recom proposal.
proposal = partial(recom,
                   pop_col="TOTPOP",
                   pop_target=ideal_population,
                   epsilon=0.02,
                   node_repeats=2
                  )

### Set up compactness constraints, which we say is upto 2 times the number of "cut-edges" of the seed plan

In [None]:
compactness_bound = constraints.UpperBound(
    lambda p: len(p["cut_edges"]),
    2*len(initial_partition["cut_edges"])
)

### Configure the MarkovChain

In [None]:
chain = MarkovChain(
    proposal=proposal,
    constraints=[
        # District populations must stay within 2% of equality
        constraints.within_percent_of_ideal_population(initial_partition, 0.02),
        compactness_bound
    ],
    accept=accept.always_accept,
    initial_state=initial_partition,
    total_steps=20
)

### Plotting every 5th plan in the chain

In [None]:
plot_frequency = 5
i = 1
for partition in chain:
    i += 1
    if i % plot_frequency == 0:
        partition.plot()

### Running another chain, this time collecting the % Dem vote in each district

In [None]:
data = pandas.DataFrame(
    sorted(partition["GOV18"].percents("Democratic"))
    for partition in chain
)

In [None]:
data

In [None]:
fig, ax = plt.subplots(figsize=(8, 6))

# Draw 50% line
ax.axhline(0.5, color="#cccccc")

# Draw boxplot
data.boxplot(ax=ax, positions=range(0, len(data.columns)))

# Draw initial plan's Democratic vote %s (.iloc[0] gives the first row)
plt.plot(data.iloc[0], "ro")

# Annotate
ax.set_title("Comparing the seed plan to an ensemble")
ax.set_ylabel("Democratic vote % (Senate 2012)")
ax.set_xlabel("Sorted districts")
ax.set_ylim(0, 1)
ax.set_yticks([0, 0.25, 0.5, 0.75, 1])

plt.show()