In [221]:
import numpy as np
import matplotlib.pyplot as plt
import pickle
import pandas as pd
import seaborn as sns
from gerrychain import Graph, GeographicPartition, Partition, Election, accept
from gerrychain.updaters import Tally, cut_edges
import glob
import functools
import operator
import os

In [222]:
def foldl(func, acc, xs):
  return functools.reduce(func, xs, acc)

foldr = lambda func, acc, xs: functools.reduce(lambda x, y: func(y, x), xs[::-1], acc)

In [223]:
def load_npy_files(directory):
    if not os.path.isdir(directory):
        print(f"Error: Directory '{directory}' does not exist.")
        return []

    data = []
    for filename in os.listdir(directory):
        if filename.endswith(".npy"):
            file_path = os.path.join(directory, filename)
            try:
                loaded_array = np.load(file_path)
                data.append(loaded_array)
            except Exception as e:
                print(f"Error loading '{file_path}': {e}")

    return data

In [226]:
dir_path = "./output/short-burst/sb-runs/"
data = load_npy_files(dir_path)
print(data)
print("-"*50)
print("Number of arrays: ", len(data))

[array([[5., 5., 5., ..., 4., 4., 4.],
       [5., 5., 4., ..., 5., 5., 5.],
       [5., 5., 5., ..., 3., 3., 3.],
       ...,
       [5., 4., 4., ..., 3., 3., 3.],
       [5., 5., 5., ..., 4., 3., 3.],
       [5., 4., 4., ..., 3., 3., 3.]]), array([[5., 5., 5., ..., 4., 4., 4.],
       [5., 5., 5., ..., 4., 3., 3.],
       [5., 5., 5., ..., 4., 4., 4.],
       ...,
       [5., 5., 5., ..., 2., 1., 1.],
       [5., 5., 4., ..., 5., 5., 5.],
       [5., 5., 5., ..., 3., 4., 4.]]), array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 2., 2., 2.],
       [2., 2., 2., ..., 1., 1., 1.],
       ...,
       [3., 3., 3., ..., 3., 3., 3.],
       [3., 3., 3., ..., 2., 2., 2.],
       [3., 3., 3., ..., 3., 3., 3.]]), array([[5., 5., 5., ..., 3., 3., 3.],
       [5., 5., 5., ..., 5., 5., 4.],
       [5., 5., 5., ..., 5., 5., 5.],
       ...,
       [7., 7., 7., ..., 6., 6., 6.],
       [7., 7., 7., ..., 6., 6., 6.],
       [7., 7., 7., ..., 6., 6., 6.]]), array([[5., 5., 5., ..., 5., 4.

In [255]:
def get_state_runs(state, iters=10000, thresholds=[0.5, 0.45, 0.4], min_col="BVAP", ls=[10]): #TODO: Later run with ls=[10, 25, 50, 100, 200]
    results = {}

    for threshold in thresholds:
        for l in ls:
            file_pattern = "./output/short-burst/sb-runs/{}_dists14_{}_opt_{:.1%}_{}_sbl{}_score_num_opportunity_dists_*.npy".format(state, min_col, threshold, iters, l)
            # print("Searching for files with pattern:", file_pattern)

            sb_runs = glob.glob(file_pattern)

            if not sb_runs:
                print(f"No files found for l = {l} and threshold = {threshold}")
                continue

            try:
                data = [np.load(run) for run in sb_runs]
                results[f"len{l}_threshold{threshold}"] = np.array(data)
            except Exception as e:
                print(f"Error loading data for l = {l} and threshold = {threshold}: {e}")

    return results

In [256]:
ga_runs_bvap = get_state_runs("GA", iters=10000, thresholds=[0.5, 0.45, 0.4], min_col="BVAP", ls=[10]) #TODO: Later run with ls=[10, 25, 50, 100, 200]
print(ga_runs_bvap)

NameError: name 'pop_bal_val' is not defined

In [254]:
def create_state_df(runs, pop_bals, iters=10000):
    df_st = []
    for pop_bal in pop_bals:
        for l in runs.keys():
            for i in range(runs[l].shape[0]):
                df = pd.DataFrame()
                df["Step"] = np.arange(iters)

                max_accumulated = np.maximum.accumulate(np.concatenate(runs[l][i]))

                if len(max_accumulated) != iters:
                    print("Error: Length mismatch!")

                df["Maximum"] = max_accumulated
                df["param"] = f"burst_len_{l}"
                df["threshold"] = pop_bal
                df_st.append(df)
    return pd.concat(df_st, ignore_index=True)

df_GA_bvap = create_state_df(ga_runs_bvap, pop_bals=[0.5, 0.45, 0.4])
df_GA_bvap.head()

Unnamed: 0,Step,Maximum,param,threshold
0,0,0.0,burst_len_l_10_pop_bal_0.5,0.5
1,1,0.0,burst_len_l_10_pop_bal_0.5,0.5
2,2,0.0,burst_len_l_10_pop_bal_0.5,0.5
3,3,0.0,burst_len_l_10_pop_bal_0.5,0.5
4,4,0.0,burst_len_l_10_pop_bal_0.5,0.5


In [234]:
ubs = glob.glob("./output/short-burst/sb-runs/GA_dists14_BVAP_opt_*%_10000_sbl*_score_num_opportunity_dists_*_max_part.p")
ub_runs = {}
for i, run in enumerate(ubs):
     with open(run, "rb") as f:
        ub_runs[i] = pickle.load(f)

In [235]:
runs = list(ub_runs.values())
bvap = foldl(lambda x, y: np.concatenate((x, list(y['BVAP'].values()))), list(runs[0]['BVAP'].values()), runs[1:])

In [236]:
pop_bals = [0.5, 0.45, 0.4]
df_GA_bvap = create_state_df(ga_runs_bvap, pop_bals)
df_GA_bvap

Unnamed: 0,Step,Maximum,param,pop_bal
0,0,0.0,b = l_10_pop_bal_0.5,0.5
1,1,0.0,b = l_10_pop_bal_0.5,0.5
2,2,0.0,b = l_10_pop_bal_0.5,0.5
3,3,0.0,b = l_10_pop_bal_0.5,0.5
4,4,0.0,b = l_10_pop_bal_0.5,0.5
...,...,...,...,...
899995,9995,7.0,b = l_10_pop_bal_0.4,0.4
899996,9996,7.0,b = l_10_pop_bal_0.4,0.4
899997,9997,7.0,b = l_10_pop_bal_0.4,0.4
899998,9998,7.0,b = l_10_pop_bal_0.4,0.4


In [243]:
unique_pop_bals = df_GA_bvap['pop_bal'].unique()
unique_pop_bals

array([0.5 , 0.45, 0.4 ])

In [245]:
n_unique_pop_bals = len(unique_pop_bals)

cmap_no_light = sns.color_palette(['#e6194b', '#3cb44b', '#ffe119', '#4363d8', 
                                   '#f58231', '#911eb4', '#46f0f0', '#f032e6', 
                                   '#808000', '#008080', '#9a6324', '#800000', 
                                   '#aaffc3', '#000075'], n_colors=n_unique_pop_bals)

In [246]:
def separate_df_by_unique_column_values(df, column):
    unique_values = df[column].unique()
    separated_dfs = {}
    for value in unique_values:
        df_name = f"df_{column}_{value}"
        separated_dfs[df_name] = df[df[column] == value]
    return separated_dfs

separated_dfs = separate_df_by_unique_column_values(df_GA_bvap, 'pop_bal')

In [247]:
df_pop_bal_0_5 = separated_dfs['df_pop_bal_0.5']
df_pop_bal_0_45 = separated_dfs['df_pop_bal_0.45']
df_pop_bal_0_4 = separated_dfs['df_pop_bal_0.4']

In [248]:
df_pop_bal_0_5.head()

Unnamed: 0,Step,Maximum,param,pop_bal
0,0,0.0,b = l_10_pop_bal_0.5,0.5
1,1,0.0,b = l_10_pop_bal_0.5,0.5
2,2,0.0,b = l_10_pop_bal_0.5,0.5
3,3,0.0,b = l_10_pop_bal_0.5,0.5
4,4,0.0,b = l_10_pop_bal_0.5,0.5


In [249]:
df_pop_bal_0_45.head()

Unnamed: 0,Step,Maximum,param,pop_bal
300000,0,0.0,b = l_10_pop_bal_0.5,0.45
300001,1,0.0,b = l_10_pop_bal_0.5,0.45
300002,2,0.0,b = l_10_pop_bal_0.5,0.45
300003,3,0.0,b = l_10_pop_bal_0.5,0.45
300004,4,0.0,b = l_10_pop_bal_0.5,0.45
