<a href="https://colab.research.google.com/github/drscook/MathVGerrmandering_CMAT_2022/blob/main/make_animations_sim.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! pip install -q condacolab  
import condacolab
condacolab.install()  
! mamba install -q -y -c conda-forge gerrychain geopandas
! mamba install -q -y -c patrikhlobil pandas-bokeh
from IPython import get_ipython
get_ipython().kernel.do_shutdown(True)

In [None]:
# create graph object for MCMC
import pathlib, gdown, numpy as np, pandas as pd, geopandas as gpd, networkx as nx, gerrychain as gc, pandas_bokeh
from functools import partial
from gerrychain.proposals import recom
from gerrychain.accept import always_accept
pandas_bokeh.output_notebook()
from google.colab import drive
drive.mount('/content/drive')

# adjust this path to point where you want to store data within your google drive
path = pathlib.Path('/content/drive/MyDrive/gerrymandering/2021_data')
data_file = path / 'TX_2020_vtd.parquet'
graph_file = path / 'TX_2020_vtd_graph.json'

# if you don't have the data file already, this should fetch it
if not data_file.is_file():
    ! pip install gdown
    url = f'1ikMlLlKSCPNemsrh-l3XFamFL8GoK3eM -O {str(data_file)}'
    print(f'trying to fetch data file from google drive {url}')
    ! gdown {url}

# read selected cols
cols = ['geometry', 'fips', 'county', 'total', 'density', 'polsby_popper', 'aland', 'perim', 'plans2168']
gdf = gpd.read_parquet(data_file, columns=cols)

meters_per_mile = 1609.34
crs = {
    'census': 'EPSG:4269'  , # degrees - used by Census
    'area'  : 'ESRI:102003', # meters
    'length': 'ESRI:102005', # meters
}

try:
    # uses the graph file if you already have it
    G = gc.Graph.from_json(graph_file)
except FileNotFoundError:
    # else creates the graph file - this takes a long time so try to keep that graph file available so you don't have to rebuild it
    G = gc.Graph.from_geodataframe(gdf[['geometry']].to_crs(crs['length']), reproject=False)
    G.to_json(graph_file)
gdf['seats'] = gdf['total'] / gdf['total'].sum() * 31
G.add_data(gdf.drop(columns='geometry'))


### gerrychain ###
def no_defect_cap(steps=100):
    """simple gerrychain example"""
    initial_partition = gc.GeographicPartition(
        G,
        assignment="plans2168",
        updaters={
            "cut_edges" : gc.updaters.cut_edges,
            "population": gc.updaters.Tally("total", alias="population"),
        }
    )
    ideal_population = sum(initial_partition["population"].values()) / len(initial_partition)
    pop_constraint = gc.constraints.within_percent_of_ideal_population(initial_partition, 0.1)

    proposal = partial(recom,
                    pop_col="total",
                    pop_target=ideal_population,
                    epsilon=0.02,
                    node_repeats=2
                    )

    chain = gc.MarkovChain(
        proposal=proposal,
        constraints=[
            pop_constraint,
        ],
        accept=always_accept,
        initial_state=initial_partition,
        total_steps=steps
    )
    return list(chain)




def with_defect_cap(steps):
    """more complex gerrychain example using our definition of "defect" to reflect the TX county-line rule"""
    def get_defect(p):
        I = dict()
        W = dict()
        for x, district in p.assignment.items():
            county = G.nodes[x]['county']
            I.setdefault(county  , set()).add(district)
            W.setdefault(district, set()).add(county)

        inter = inter_target.copy()
        for county, districts in I.items():
            inter[county] -= len(districts)

        whole = whole_target.copy()
        for district, counties in W.items():
            if len(counties) == 1:
                county = counties.pop()
                whole[county] -= 1
        # defect = {county: abs(inter[county]) + abs(whole[county]) for county in seats.keys()}
        return sum(abs(inter[county]) + abs(whole[county]) for county in inter.keys())

    def get_polsby_popper(p):
        for H in p.subgraphs:
            A = sum(d for x, d in H.nodes(data='aland'))
            perim_extern = sum(d for x,    d in H.nodes(data='perim'))
            perim_intern = sum(d for x, y, d in H.edges(data='shared_perim'))
            P = perim_extern - 2 * perim_intern
            return 4 * np.pi * A / P**2


    initial_partition = gc.GeographicPartition(
        G,
        assignment="plans2168",
        updaters={
            "cut_edges" : gc.updaters.cut_edges,
            "population": gc.updaters.Tally("total", alias="population"),
            "defect"    : get_defect,
            "polsby_popper" : get_polsby_popper,
        }
    )


    seats = {G.nodes[x]['county']:0 for x, d in G.nodes(data='county')}
    for x, d in G.nodes(data=True):
        seats[d['county']] += d['seats']
    whole_target = {c: int(np.floor(s)) for c, s in seats.items()}
    inter_target = {c: int(np.ceil (s)) for c, s in seats.items()}
    ideal_defect = get_defect(initial_partition)
    ideal_population = sum(initial_partition["population"].values()) / len(initial_partition)

    pop_constraint    = gc.constraints.within_percent_of_ideal_population(initial_partition, 0.1)
    defect_constraint = gc.constraints.SelfConfiguringUpperBound(get_defect)

    proposal = partial(recom,
                    pop_col="total",
                    pop_target=ideal_population,
                    epsilon=0.02,
                    node_repeats=2
                    )

    chain = gc.MarkovChain(
        proposal=proposal,
        constraints=[
            pop_constraint,
            defect_constraint,
        ],
        accept=always_accept,
        initial_state=initial_partition,
        total_steps=steps
    )
    return list(chain)


### PLOT ###
def plot(partitions, file=None):
    # image generation code
    height = 600
    colormap = "Paired"

    xlim = [-106.2, -94.0]
    ylim = [ 25.4 ,  36.6]
    width = round((xlim[1] - xlim[0]) / (ylim[1] - ylim[0]) * height)

    B = pd.concat([p.assignment.to_series() for p in partitions], axis=1)
    clr = dict(enumerate(np.linspace(0, 256, B[0].nunique()).round().astype(int)))
    B = B.replace(clr)
    steps = [str(x) for x in B.columns]
    B.columns = steps

    X = gdf.join(B).reset_index()#.iloc[:500]
    fig = X.plot_bokeh(
        simplify_shapes=100,
        hovertool_string = f'@county<br>@vtd<br>',# district @{step}<br>pop=@total_pop',
        slider=steps,
        slider_name="step",
        fill_alpha = 0.8,
        line_alpha = 0.00,
        show_colorbar = False,
        xlim = xlim,
        ylim = ylim,
        figsize = (width, height),
        colormap = colormap,
        return_html = True,
        show_figure = True,
    )
    try:
        with open(file, 'w') as f:
            f.write(fig)
    except TypeError:
        pass

In [None]:
partitions = no_defect_cap(200)
plot(partitions, path / 'no_defect_cap.html')

# partitions = with_defect_cap(200)
# plot(partitions, path / 'with_defect_cap.html')
