# Imports

In [1]:
import os
from functools import partial
import json

import geopandas as gpd
import matplotlib.pyplot as plt; plt.style.use("ggplot")
import networkx as nx
import pandas as pd
import numpy as np
from tqdm import tqdm_notebook
import seaborn as sns
import xml.etree.ElementTree as ET
import csv

from gerrychain import (
    Election,
    Graph,
    MarkovChain,
    Partition,
    accept,
    constraints,
    updaters,
)

from gerrychain.metrics import efficiency_gap, mean_median, partisan_gini
from gerrychain.proposals import recom
from gerrychain.updaters import cut_edges
from gerrychain.tree import recursive_tree_part
from io import BytesIO

# Downloading state data

In [218]:
graph_path = "./pennsylvania.json"
graph = Graph.from_json(graph_path)
with open (graph_path, "r") as myfile:
    data=myfile.readlines()
graph_dict = json.loads(data[0])

In [2]:
va_graph_path = "./virginia.json"
va_graph = Graph.from_json(va_graph_path)
with open (va_graph_path, "r") as myfile:
    data=myfile.readlines()
va_graph_dict = json.loads(data[0])

In [3]:
newdir = "./VA_Outputs/"
os.makedirs(os.path.dirname(newdir + "init.txt"), exist_ok=True)
with open(newdir + "init.txt", "w") as f:
    f.write("Created Folder")

# Chain stuff

In [13]:
for node in va_graph.nodes:
    va_graph.nodes[node]["G18DSEN"] = float(va_graph.nodes[node]["G18DSEN"])
    va_graph.nodes[node]["G18RSEN"] = float(va_graph.nodes[node]["G18RSEN"])

In [18]:
va_graph.nodes[1]

{'boundary_node': False,
 'area': 6734812.159403288,
 'precinct': 'Buckland Mills',
 'locality': 'Prince William County',
 'loc_prec': 'Prince William County Buckland Mills',
 'district': 'Congressional District 1',
 'G18DHOR': '938.0',
 'G18DSEN': 978.0,
 'G18OHOR': '0.0',
 'G18OSEN': '31.0',
 'G18RHOR': '766.0',
 'G18RSEN': 701.0,
 'G17DGOV': '656.0',
 'G17DLTG': '654.0',
 'G17DATG': '647.0',
 'G17DHOD': '601.0',
 'G17RGOV': '596.0',
 'G17RLTG': '603.0',
 'G17RATG': '609.0',
 'G17RHOD': '644.0',
 'G17OHOD': '0.0',
 'G17OGOV': '11.0',
 'G16DPRS': '1007.0',
 'G16RPRS': '807.0',
 'G16OPRS': '78.0',
 'G16DHOR': '920.0',
 'G16RHOR': '932.0',
 'G16OHOR': '11.0',
 'TOTPOP': 3818.000030694916,
 'NH_WHITE': 2133.0000203970503,
 'NH_BLACK': 405.00000532001286,
 'NH_AMIN': 8.000000063845132,
 'NH_ASIAN': 643.0000026063219,
 'NH_NHPI': 1.19045652e-07,
 'NH_OTHER': 9.999999864060221,
 'NH_2MORE': 150.00000134425977,
 'HISP': 469.00000098032007,
 'H_WHITE': 265.0000008445269,
 'H_BLACK': 11.999999

In [120]:
pop_count = 0

for i in graph.nodes:
    pop_count += graph.nodes[i]["TOT_POP"]
    
print(pop_count)

12684929


In [19]:
va_pop_count = 0

for i in va_graph.nodes:
    va_pop_count += va_graph.nodes[i]["TOTPOP"]
    
print(va_pop_count)

8001023.999569645


In [127]:
%%time
num_dist = 18

# Exercise: Compute exact population from your data.
pop = pop_count

my_updaters = {
    "population": updaters.Tally("TOT_POP", alias="population"),
    "cut_edges": cut_edges,
    "SEN16": Election("SEN16", {"democratic":"T16SEND","republican":"T16SENR"}),
}


new_plan = recursive_tree_part(graph,
                               range(num_dist),
                               pop/num_dist,
                               "TOT_POP",
                               0.01,
                               3)
initial_partition = Partition(graph,
                              new_plan,
                              my_updaters)

CPU times: user 15.2 s, sys: 375 ms, total: 15.6 s
Wall time: 16.2 s


In [20]:
%%time
va_num_dist = 11

# Exercise: Compute exact population from your data.
va_pop = va_pop_count

va_my_updaters = {
    "population": updaters.Tally("TOTPOP", alias="population"),
    "cut_edges": cut_edges,
    "SEN18": Election("SEN18", {"democratic":"G18DSEN","republican":"G18RSEN"}),
}


va_new_plan = recursive_tree_part(va_graph,
                               range(va_num_dist),
                               va_pop/va_num_dist,
                               "TOTPOP",
                               0.01,
                               3)
va_initial_partition = Partition(va_graph,
                              va_new_plan,
                              va_my_updaters)

CPU times: user 3.71 s, sys: 70.9 ms, total: 3.78 s
Wall time: 3.93 s


In [128]:
%%time
proposal = partial(recom,
                   pop_col = "TOT_POP",
                   pop_target = pop/num_dist,
                   epsilon = 0.05,
                   node_repeats = 3)

compactness_bound = constraints.UpperBound(
    lambda p: len(p["cut_edges"]), 2 * len(initial_partition["cut_edges"])
)

CPU times: user 110 ms, sys: 3.72 ms, total: 114 ms
Wall time: 120 ms


In [21]:
%%time
va_proposal = partial(recom,
                   pop_col = "TOTPOP",
                   pop_target = va_pop/va_num_dist,
                   epsilon = 0.05,
                   node_repeats = 3)

va_compactness_bound = constraints.UpperBound(
    lambda p: len(p["cut_edges"]), 2 * len(va_initial_partition["cut_edges"])
)

CPU times: user 23.5 ms, sys: 681 µs, total: 24.2 ms
Wall time: 23.8 ms


In [148]:
%%time
chain = MarkovChain(
    proposal=proposal,
    constraints=[
        constraints.within_percent_of_ideal_population(initial_partition, 0.03),
        compactness_bound,
    ],
    accept=accept.always_accept,
    initial_state=initial_partition,
    total_steps=1000,
)

CPU times: user 49 µs, sys: 1 µs, total: 50 µs
Wall time: 55.1 µs


In [22]:
%%time
va_chain = MarkovChain(
    proposal=va_proposal,
    constraints=[
        constraints.within_percent_of_ideal_population(va_initial_partition, 0.05),
        va_compactness_bound,
    ],
    accept=accept.always_accept,
    initial_state=va_initial_partition,
    total_steps=10000,
)

CPU times: user 7.19 ms, sys: 58 µs, total: 7.24 ms
Wall time: 7.21 ms


In [16]:
%%time

mms = []
egs = []
pgs = []
ces = []
seats = []

for step in tqdm_notebook(chain):
    mms.append(mean_median(step["SEN16"]))
    egs.append(efficiency_gap(step["SEN16"]))
    pgs.append(partisan_gini(step["SEN16"]))
    ces.append(len(step["cut_edges"]))
    seats.append(step["SEN16"].wins("republican"))

NameError: name 'chain' is not defined

In [23]:
%%time

va_mms = []
va_egs = []
va_pgs = []
va_ces = []
va_seats = []


for step in tqdm_notebook(va_chain):
    va_mms.append(mean_median(step["SEN18"]))
    va_egs.append(efficiency_gap(step["SEN18"]))
    va_pgs.append(partisan_gini(step["SEN18"]))
    va_ces.append(len(step["cut_edges"]))
    seats.append(step["SEN18"].wins("republican"))

HBox(children=(IntProgress(value=0, max=10000), HTML(value='')))


CPU times: user 36min 23s, sys: 14.7 s, total: 36min 38s
Wall time: 40min 40s


In [29]:
va_initial_partition["SEN18"].wins("republican")

3

In [24]:
# with open(newdir + "PAmms" + ".csv", "w") as tf1:
#     tf1.writelines(str(i) + "\n" for i in mms)

# with open(newdir + "PAegs"+ ".csv", "w") as tf1:
#     tf1.writelines(str(j) + "\n" for j in egs)
    
# with open(newdir + "PApgs" + ".csv", "w") as tf1:
#     tf1.writelines(str(i) + "\n" for i in pgs)

# with open(newdir + "PAces"+ ".csv", "w") as tf1:
#     tf1.writelines(str(j) + "\n" for j in ces)
    
# with open(newdir + "PAseats"+ ".csv", "w") as tf1:
#     tf1.writelines(str(j) + "\n" for j in seats)
    
with open(newdir + "VAmms" + ".csv", "w") as tf1:
    tf1.writelines(str(i) + "\n" for i in va_mms)

with open(newdir + "VAegs"+ ".csv", "w") as tf1:
    tf1.writelines(str(j) + "\n" for j in va_egs)
    
with open(newdir + "VApgs" + ".csv", "w") as tf1:
    tf1.writelines(str(i) + "\n" for i in va_pgs)

with open(newdir + "VAces"+ ".csv", "w") as tf1:
    tf1.writelines(str(j) + "\n" for j in va_ces)
    
with open(newdir + "VAseats"+ ".csv", "w") as tf1:
    tf1.writelines(str(j) + "\n" for j in va_seats)

In [271]:
def save_with_pretty_fonts(filename):
    # Fix fonts
    # See https://matplotlib.org/3.1.0/gallery/user_interfaces/svg_histogram_sgskip.html
    buf = BytesIO()
    plt.savefig(buf, transparent=True, bbox_inches='tight', format='svg')

    ET.register_namespace('', 'http://www.w3.org/2000/svg')
    tree, xmlid = ET.XMLID(buf.getvalue())

    text_ids = []
    for elem in tree.iter('{http://www.w3.org/2000/svg}g'):
        if 'id' in elem.attrib:
            if elem.attrib['id'].startswith('text_'):
                for text in elem.iter('{http://www.w3.org/2000/svg}text'):
                    if 'style' in text.attrib:
                        text.attrib['style'] = 'text-anchor:middle;font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,"Noto Sans",sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji";font-size:0.7em;color:#6c757d'
    ET.ElementTree(tree).write(filename)
    plt.close()

# Visualization

In [284]:
# Pennsylvania Median Median scores

plt.rcParams["svg.fonttype"] = "none"
fig, ax = plt.subplots(1)
sns.distplot(mms, bins=40, color="#4b89b9", ax=ax, kde=False)
ax.set_yticklabels([])
ax.axvline(x=-0.068, color="red")
ax.yaxis.set_visible(False)
ax.grid(False)
#plt.xlabel("Mean-Median Score")
#plt.ylabel("Frequency")
save_with_pretty_fonts('pa_mm.svg')

In [285]:
# Pennsylvania vs. Virginia Mean Median Scores

plt.rcParams["svg.fonttype"] = "none"
fig, ax = plt.subplots(1)
sns.distplot(mms, bins=40, color="#4b89b9", ax=ax, kde=False, label="PA — SEN16")
sns.distplot(va_mms, bins=40, color="#d2553e", ax=ax, kde=False, label="VA — SEN18")
ax.set_yticklabels([])
ax.yaxis.set_visible(False)
ax.grid(False)
# ax.legend(loc="upper left")
#plt.xlabel("Mean-Median Score")
#plt.ylabel("Frequency")
save_with_pretty_fonts('pa_vs_va_mm.svg')

In [286]:
# Pennsylvania Efficiency Gap Scores

plt.rcParams["svg.fonttype"] = "none"
fig, ax = plt.subplots(1)
sns.distplot(egs, bins=40, color="#4b89b9", ax=ax, kde=False)
ax.set_yticklabels([])
ax.axvline(x=-0.04, color="red")
ax.yaxis.set_visible(False)
ax.grid(False)
# plt.xlabel("Efficiency Gap Score")
# plt.ylabel("Frequency")
save_with_pretty_fonts("pa_eg.svg")

In [287]:
# Pennsylvania Cut Edges Scores

plt.rcParams["svg.fonttype"] = "none"
fig, ax = plt.subplots(1)
sns.distplot(ces, bins=40, color="#4b89b9", ax=ax, kde=False)
ax.set_yticklabels([])
ax.axvline(x=1500, color="red")
ax.yaxis.set_visible(False)
ax.grid(False)
# plt.xlabel("Cut Edges")
# plt.ylabel("Frequency")
save_with_pretty_fonts("pa_ce.svg")

In [288]:
# People's heights histogram: vs. NBA players

n = 1000000
male_heights = (70 + 4 * np.random.randn(n)) / 12
female_heights = (65 + 3.5 * np.random.randn(n)) / 12
all_heights = np.concatenate((male_heights, female_heights), axis=0)
nba_heights = (79 + 3.5 * np.random.randn(2*n)) / 12

plt.rcParams['svg.fonttype'] = 'none'
fig, ax = plt.subplots(1)
# sns.distplot(male_heights, bins=100, color='#4b89b9', ax=ax, kde=False)
# sns.distplot(female_heights, bins=100, color='#d2553e', ax=ax, kde=False)
sns.distplot(all_heights, bins=100, color='#4b89b9', ax=ax, kde=False, label="all people")
sns.distplot(nba_heights, bins=100, color="red", ax=ax, kde=False, label="NBA players")
ax.set_yticklabels([])
ax.yaxis.set_visible(False)
ax.grid(False)
plt.xlim(3, 8)
ax.axvline(x=7, color="red", label="Your Friend's Height")
# ax.legend(loc="upper left")
# plt.xlabel('Height (feet)')
save_with_pretty_fonts("people_vs_nba_heights.svg")