In [None]:
import os
from functools import partial
import json

import geopandas as gpd
import matplotlib.pyplot as plt
import networkx as nx
import pandas as pd
import numpy as np
from tqdm import tqdm_notebook

from gerrychain import (
    Election,
    Graph,
    MarkovChain,
    Partition,
    accept,
    constraints,
    updaters,
)

from gerrychain.metrics import efficiency_gap, mean_median
from gerrychain.proposals import recom
from gerrychain.updaters import cut_edges
from gerrychain.tree import recursive_tree_part

In [None]:
graph_path = "../nevada/BG32.json"
graph = Graph.from_json(graph_path)
with open (graph_path, "r") as myfile:
    data=myfile.readlines()
graph_dict = json.loads(data[0])

In [None]:
num_dist = 4

# Exercise: Compute exact population from your data.
pop = 2700551

my_updaters = {
    "population": updaters.Tally("TOTPOP", alias="population"),
    "cut_edges": cut_edges,
    "hvap": updaters.Tally("HVAP", alias="hvap"),
    "vap": updaters.Tally("VAP", alias="vap"),
    "wvap": updaters.Tally("WVAP", alias="wvap"),
}

new_plan = recursive_tree_part(graph,
                               range(num_dist),
                               pop/num_dist,
                               "TOTPOP",
                               0.05,
                               1)
initial_partition = Partition(graph,
                              new_plan,
                              my_updaters)

In [None]:
proposal = partial(recom,
                   pop_col = "TOTPOP",
                   pop_target = pop/num_dist,
                   epsilon = 0.05,
                   node_repeats = 3)

compactness_bound = constraints.UpperBound(
    lambda p: len(p["cut_edges"]), 2 * len(initial_partition["cut_edges"])
)

In [None]:
chain = MarkovChain(
    proposal=proposal,
    constraints=[
        constraints.within_percent_of_ideal_population(initial_partition, 0.1),
        compactness_bound,
    ],
    accept=accept.always_accept,
    initial_state=initial_partition,
    total_steps=100,
)

In [None]:
%%time
data = pd.DataFrame(sorted(list(np.divide(list(step["hvap"].values()),list(step["vap"].values()))) for step in tqdm_notebook(chain)))

fig, ax = plt.subplots(figsize=(8, 6))

# Draw 50% line
ax.axhline(0.5, color="#cccccc")

# Draw boxplot
data.boxplot(ax=ax, positions=range(len(data.columns)))

# Annotate
ax.set_title("Hispanic Representation in NV Districts")
ax.set_ylabel("Hispanic Voting Age %")
ax.set_xlabel("District #")

plt.show()

In [7]:
data

Unnamed: 0,0,1,2,3
0,0.125683,0.345507,0.257166,0.163360
1,0.146690,0.381581,0.213647,0.162629
2,0.148306,0.298062,0.291254,0.162629
3,0.148306,0.345507,0.231307,0.163360
4,0.148306,0.365351,0.216526,0.163360
5,0.148306,0.365351,0.221958,0.162629
6,0.153962,0.351843,0.221910,0.163360
7,0.159553,0.359517,0.221910,0.163360
8,0.162041,0.365575,0.221910,0.163360
9,0.162512,0.216802,0.172738,0.351673
