# Metaheuristics for Optimizing Voter Distributions

## Installing and importing
We first install the necessary packages. If you are using Google Colab (or most other web notebooks), you may install the necessary packages here: 

In [None]:
!pip install bitarray
!pip install gerrychain

We then import all necessary packages: 

In [None]:
import pickle
import numpy as np
import pandas as pd
from numpy import random
import math
import time
import statistics
from bitarray import bitarray
from tqdm import tqdm, trange, tnrange, tqdm_notebook

import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('paper')
sns.set(style="whitegrid")
from gerrychain import (GeographicPartition, Partition, Graph, MarkovChain,
                        proposals, updaters, constraints, accept, Election)
from gerrychain.proposals import recom
from functools import partial
import pandas

## Testing with known cases
The following tests the various algorithms described with known cases of the grid, specifically, this only works for the $5\times 5$ case of the grid that we have known data for. It requires importing a large datafile, which is the generated datafile using `grid_search` and `analysis`. We replace the $\mathsf{Eval}$ function with a deterministic lookup (from the dataframe collected using `grid_search`), and demonstrate how well the algorithms work. 

In [None]:
df = pickle.load(open("df.p", "rb"))

In [None]:
def evaluate(delta, times):
    """
    Replacement for the evaulative function, which
    relies on known exhaustive searches. 
    """
    output = df.loc[to_N(delta)]['Avg']
#     print(output)
    return output

## Defining Constants
We want to define some constants we care about. This basically gives the size of the grid to be $clen \times rlen$, with $clen$ number of districts with $rlen$ blocks each. 

In [None]:
clen = 5
rlen = 5
num_blocks = clen * rlen

## Working with BitArrays
Voter distributions ($\Delta$) are stored as BitArrays (or arrays of BitArrays). They are the least computationally intensive and memory taxing way to test these, and allow us to use XOR (^) later instead of int equality (==), which also saves a lot of time. 

In [None]:
def n_bitarray(n):
    """
    Creates a bit array of length num_blocks with n 'true' values. 
    """
    ar = bitarray()
    for i in range(n):
        ar.append(True)
    for i in range(num_blocks - n):
        ar.append(False)
    return ar

def sq_array(ar):
    """
    Converts a bitarray into
    a rectangular array
    """
    sq_ar = [bitarray() for i in range(rlen)]
    for i in range(rlen):
        sq_ar[i] = ar[(clen * i):(clen * (i + 1))]
    return sq_ar

def get_bitarray(delta):
    """
    Converts a sq/rectangular array
    into a bitarray
    """
    output = bitarray()
    for row in delta:
        for entry in row:
            output.append(entry)
    return output

def print_ar(ar):
    """
    Prints a rectangular array
    """
    print("Grid: ")
    for row in ar: 
        rowtext = ""
        for box in row:
            rowtext += "X" if box else "O"
            rowtext += " "
        print(rowtext)
    print()

def to_N(delta):
    """
    Stores a rectangular array as some index
    (written in base two is the original bitarray)
    """
    i = 0
    for r in reversed(range(rlen)):
        for c in reversed(range(clen)):
            i = (i << 1) | delta[r][c]
    return i

def bitarray_to_N(bar):
    """
    The same as above but for a bitarray and not
    a rectangular array
    """
    i = 0
    for dig in bar: 
        i = (i << 1) | dig
    return i

## Metaheuristics/Optimizers
We first define some of the number of times we want to run our optimizers (and whether we want it to show progress):

In [None]:
disable_tqdm = False
mcmc_steps = 1

### Random Sampling
The following generates random Deltas and evaluates them. 

In [None]:
def random_step(n):
    """
    Gives a random delta with NumH = n
    """
    a = n_bitarray(n)
    random.shuffle(a)
    return sq_array(a)

def random_sample(times, n):
    """
    Evaluates a times number of random_steps
    (random deltas) and returns a array of scores
    """
    samp = []
    for i in tnrange(times, desc = 'Random Sample', leave = False, disable = disable_tqdm):
        step = random_step(n)
        samp.append(evaluate(step, mcmc_steps))
    return samp

### Shotgun Greedy (Random-Restart Iterated Local Search) Algorithm
The following performs a RRILS optimization, which relies on a cellular automata evolutionary algorithm. 

In [None]:
# Threshold for Cellular Automata: 
threshold = 0.6

def unhappy(delta):
    """
    returns a tuple of
    (list of coords of unhappy tiles, 
    list of values of unhappy tiles, 
    Clus, ClusH)
    """
    unhappy_tiles = []
    vals = bitarray()
    total_con = [0, 0]
    con = [0, 0]
    for row in range(rlen):
        for col in range(clen):
            box = delta[row][col]
            total_box = 0
            same_box = 0
            for dx, dy in [(1, 0), (-1, 0), (0, -1), (0, 1)]:
                r = row
                c = col
                nr = r + dx
                nc = c + dy
                if 0 <= nr < rlen and 0 <= nc < clen:
                    total_con[delta[r][c]] += 1
                    total_box += 1
                    samity = 0 if box ^ delta[nr][nc] else 1
                    same_box += samity
                    con[delta[r][c]] += samity
            if same_box / total_box < threshold:
                unhappy_tiles.append((r, c))
                vals.append(delta[r][c])
    return {'coords': unhappy_tiles, 
            'vals': vals, 
            'Clus': (con[0] + con[1]) / (total_con[0] + total_con[1]), 
            'ClusH': con[1] / total_con[1]}

def step(unhappy_coords_shuffled, unhappy_list, delta):
    """
    Makes a step with delta and given values of delta
    and returns a new delta
    """
    idx = 0
    ndelta = delta
    for r, c in unhappy_coords_shuffled:
        ndelta[r][c] = unhappy_list[idx]
        idx += 1
    return ndelta

def greedy_step(delta):
    """
    Runs one evolutionary step, gets rid of the
    other parameters in step
    """
    unh = unhappy(delta)
    random.shuffle(unh['coords'])
    return step(unh['coords'], unh['vals'], delta)

def greedy_seq(n, mcmc_steps):
    """
    Runs a greedy sequence multiple times, until
    no more meaningful evolutions can be done. 
    """
    dt = []
    seed = n_bitarray(n)
    random.shuffle(seed)
    delta = sq_array(seed)
    dt.append(evaluate(delta, mcmc_steps))
    laststep = to_N(delta)
    k = 0
    while True:
        delta = greedy_step(delta)
        k += 1
        delta_idx = to_N(delta)
        score = evaluate(delta, mcmc_steps)
        if delta_idx == laststep:
            break
        dt.append(score)
        laststep = delta_idx
    return (dt, k)

def shotgun_greedy(k_max, n, mcmc_steps):
    """
    Runs a greedy sequence multiple times, until
    it has been evaluated k_max times. 
    Returns the full dictionary of
    {score: N(index)}
    """
    sample = []
    times_run = 0
    pbar = tqdm_notebook(total=k_max, desc = 'Shotgun Greedy', leave = False, disable = disable_tqdm)
    while times_run < k_max: 
        run = greedy_seq(n, mcmc_steps)
        sample += run[0]
        times_run += run[1]
        pbar.update(run[1])
    pbar.close()
    return sample

### Simulated Annealing
This is the simulated annealing algorithm, which combines a bit of all the previous algorithms we've used. 

We first define a probability of acceptance function: 

In [None]:
def prob_accept(change_e, temp):
    try: 
        return 1 / (1 + math.exp(-change_e / temp))
    except OverflowError:
        return 0

`random_accept_thresh` gives the probability that a random state is accepted. 

`mcmc_steps` gives the number of mcmc steps to perform.

`temp_initial` gives the initial temperature.

`temp_ratio` is the cooling schedule.

`simulated_anneal` runs the annealing steps as documented. 

`simulated_anneal_random` does simulated annealing but with random progressions (instead of cellular automata progressions). 

In [None]:
random_accept_thresh = 0.4
temp_initial = 8
temp_ratio = 0.96
heating_ratio = 1.1

# Simulated Annealing algorithm with greedy chance
def simulated_anneal(k_max, n):
    """
    Runs a simulated annealing step
    as described in the paper/documentation
    """
    delta = random_step(n)
    samp = []
    temp = temp_initial
    eval_now = evaluate(delta, mcmc_steps)
    samp.append(eval_now)
    ra_thresh = random_accept_thresh
    k = 0
    pbar = tqdm_notebook(total=k_max, desc = 'Simulated Anneal', leave = False, disable = disable_tqdm)
    while k < k_max:
        delta_n = greedy_step(delta)
        eval_new = evaluate(delta_n, mcmc_steps)
        samp.append(eval_new)
        k += 1
        pbar.update(1)
        change_e = eval_new - eval_now
        # we can also give the change as a proportion, which makes hyperparameter optimization
        # better for generalization: 
        prop_change_e = change_e / clen
        if change_e > 0 or prob_accept(prop_change_e, temp) > random.uniform(0, 1):
#             print('Accept')
            delta = delta_n
            eval_now = eval_new
            temp = temp * temp_ratio
        else:
            delta_rand_n = random_step(n)
            eval_rand_new = evaluate(delta_rand_n, mcmc_steps)
            samp.append(eval_rand_new)
            k += 1
            pbar.update(1)
            change_rand_e = eval_rand_new - eval_now
            prop_random_change_e = change_rand_e / clen
            if change_e > 0 or prob_accept(prop_random_change_e, temp) > random.uniform(0, 1):
#                 print('Accept')
                delta = delta_rand_n
                eval_now = eval_rand_new
                temp = temp * temp_ratio
            else:
                temp = temp * heating_ratio
    pbar.close()
    return samp

def random_swap_step(delta, n):
    delta_ba = get_bitarray(delta)
    delta_n = delta_ba
    change_ar = n_bitarray(n)
    random.shuffle(change_ar)
    change_vals = bitarray()
    idx = 0
    for i in change_ar:
        if i:
            change_vals.append(delta_ba[idx])
        idx += 1
    random.shuffle(change_vals)
    in_idx = 0
    idx = 0
    for i in change_ar:
        if i:
            delta_n[in_idx] = change_vals[idx]
            idx += 1
        in_idx += 1
    return sq_array(delta_n)

step_swap_size = 4

# After testing for various hyperparameters, this is what we found to be the best experimentally: 
temp_initial_random = 1
temp_ratio_random = 0.8

# Simulated Annealing algorithm with random neighbour step
def simulated_anneal_random(k_max, n):
    """
    Runs a simulated annealing step
    as described in the paper/documentation
    """
    delta = random_step(n)
    samp = []
    temp = temp_initial_random
    eval_now = evaluate(delta, mcmc_steps)
    ra_thresh = random_accept_thresh
    k = 0
    pbar = tqdm_notebook(total=k_max, desc = 'Simulated Anneal', leave = False, disable = disable_tqdm)
    while k < k_max:
        samp.append(eval_now)
        delta_n = random_swap_step(delta, step_swap_size)
        eval_new = evaluate(delta_n, mcmc_steps)
        k += 1
        pbar.update(1)
        change_e = eval_new - eval_now
        # we can also give the change as a proportion, which makes hyperparameter optimization
        # better for generalization (grid size does not affect what hyperparams do): 
        prop_change_e = change_e / clen
        if change_e > 0 or prob_accept(prop_change_e, temp) > random.uniform(0, 1):
#             Testing: 
#             print(eval_new)
#             print('Accept')
            delta = delta_n
            eval_now = eval_new
            temp = temp * temp_ratio_random
    pbar.close()
    return samp

`mult_simulated_anneal` runs simulated anneal multiple times. 

### $k_\mathrm{max}$ versus outcome

We can compare our algorithms by seeing how fast they converge to the maximum given a specific $k_\mathrm{max}$. 

The following loops through all the algorithms for varying $k_\mathrm{max}$ values and logs it to datatables. 

In [None]:
disable_tqdm = True

In [None]:
def run_comparison(numH, samp_size, k_max):
    full_dt_singleruns = []

    for i in tnrange(samp_size, desc = "Random"):
        run = random_sample(k_max, numH)
        k_current = 1
        max = 0
        for score in run: 
            if k_current == k_max: 
                break
            if score > max:
                max = score
            full_dt_singleruns.append([k_current, max, "Random"])
            k_current += 1


    for i in tnrange(samp_size, desc = "RRILS"):
        run = shotgun_greedy(k_max, numH, 1)
        k_current = 1
        max = 0
        for score in run: 
            if k_current == k_max: 
                break
            if score > max:
                max = score
            full_dt_singleruns.append([k_current, max, "RRILS"])
            k_current += 1


    for i in tnrange(samp_size, desc = "S.A."):
        run = simulated_anneal(k_max, numH)
        k_current = 1
        max = 0
        for score in run: 
            if k_current == k_max: 
                break
            if score > max:
                max = score
            full_dt_singleruns.append([k_current, max, "S.A."])
            k_current += 1


    for i in tnrange(samp_size, desc = "S.A. (Random)"):
        run = simulated_anneal_random(k_max, numH)
        k_current = 1
        max = 0
        for score in run: 
            if k_current == k_max: 
                break
            if score > max:
                max = score
            full_dt_singleruns.append([k_current, max, "S.A. (Random)"])
            k_current += 1

    actual_max = df.loc[df["NumH"] == numH].nlargest(1, "Avg")["Avg"].item()

    full_dt_singleruns.append([0, actual_max, "Maximum"])
    full_dt_singleruns.append([k_max, actual_max, "Maximum"])


    full_df = pd.DataFrame(full_dt_singleruns, columns = ("k_max", "runmax", "Method"))
    full_df.to_csv("k_max_vs_outcome-samp_size" + str(samp_size) + "-numH" + str(numH) + "-k_max" + str(k_max) + ".csv")

    fig = plt.figure(figsize=(8.5, 6.5), dpi=300)
    ax = sns.lineplot(x="k_max", y="runmax", style="Method", hue="Method", ci=100, dashes = {'Maximum': (1, 2), 'Random': '', 'S.A.': '', 'S.A. (Random)': '', 'RRILS': ''}, data = full_df)
    ax.set(xlabel='$k_\mathrm{max}$', ylabel='Outcome')
    ax.set_title('$k_\mathrm{max}$ versus Outcome for Varying Algorithms, $\mathsf{NumH} = ' + str(numH) + '$')

    plt.savefig("algorithm-comparison-samp_size" + str(samp_size) + "-numH" + str(numH) + "-k_max" + str(k_max) + ".pdf")
    plt.show()

In [None]:
run_comparison(9, 10000, 1000)
run_comparison(10, 10000, 1000)
run_comparison(11, 10000, 1000)
run_comparison(12, 10000, 1000)
run_comparison(5, 10000, 1000)
run_comparison(4, 10000, 1000)
run_comparison(3, 10000, 1000)