In [1]:
import importlib
import analysis
import ip_models
importlib.reload(ip_models)
importlib.reload(analysis)
from analysis import get_centroids
from analysis import check_continuous_solution
import Clustering_Functions
importlib.reload(Clustering_Functions)
from Clustering_Functions import HH_proxy, Borda_vector, csv_parse
import pandas as pd
import ast
import numpy as np

In [2]:
CONT_HH_PROB_FILE = "continuous_HH_problems.pkl"
ELECTION_DIR = 'scot-elex-main'

continuous_HH_problems = pd.read_pickle(CONT_HH_PROB_FILE)

In [None]:
# Check continuous HH solutions
import math
from tqdm import tqdm

mismatch = 0

mismatches = pd.DataFrame(columns=['election', 'num_cands', 'IP_score', 'PAM_score', 'centroids', 'variables', 'election_data'])

for i, row in tqdm(continuous_HH_problems.iterrows()):
    election = row['filename']
    num_cands = row['num_cands']
    print(num_cands)
    election_data = csv_parse(f'{ELECTION_DIR}/{election}')[1]
    model = ip_models.continuous_hh(election_data, 2)
    model.Params.LogToConsole = 0
    model.optimize()
    model.update()
    dim = math.comb(num_cands, 2)
    z = analysis.extract_z_variables(model, range(dim), range(2), [-1, 0, 1])
    centroids = analysis.extract_centroids(model, z, range(2), range(dim), [-1, 0, 1])
    centroids_dict = {i: centroids[i] for i in range(2)}
    proxy_conversion = {ballot: 2*HH_proxy(ballot, num_cands=num_cands) for ballot in election_data.keys()}
    clusters, cost = analysis.assign_ballots(centroids_dict, election_data, proxy_conversion)
    
    if model.ObjVal != row['PAM score'] or cost != model.ObjVal:
        print(f"Score mismatch: {model.ObjVal} (IP Objective) != {row['PAM score']} (True Objective) != {cost} (IP Manual Cost) for {election}")
        print(f"Number of candidates: {row['num_cands']}")
        print(f"Original IP Objective: {row['IP score']}")
        importlib.reload(analysis)
        print(f"Centroids: {centroids}")
        variable_dict = {var.VarName: var.X for var in model.getVars()}
        mismatches.loc[mismatch] = [election, row['num_cands'], model.ObjVal, row['PAM score'], row['PAM proxies'], variable_dict, election_data]
        mismatch += 1
    else:
        print(f"Solution for {election} matches.")
    
print(f"All continuous HH solutions checked. {mismatch} mismatches found.")
    

In [2]:
CONT_REST_PROB_FILE = "continuous_rest_problems_2.pkl"
ELECTION_DIR = 'scot-elex-main'

continuous_rest_problems = pd.read_pickle(CONT_REST_PROB_FILE)
hh_problems = continuous_rest_problems[continuous_rest_problems['proxy_type'] == 'HH']

In [4]:
continuous_rest_problems[continuous_rest_problems['proxy_type'] == 'HH']

Unnamed: 0,filename,num_cands,proxy_type,IP centers,IP score,PAM centers,PAM score
9436,aberdeen_2022_ward11.csv,5,HH,"{0: (1, 4, 5, 3, 2), 1: (5, 2, 3, 4, 1)}",14853.5,"{0: (5, 2, 3), 1: (1, 4, 5)}",13745.5
9450,aberdeenshire_2012_ward10.csv,5,HH,"{0: (4, 3, 1, 5, 2), 1: (2, 5, 3, 1, 4)}",9521.0,"{0: (3, 1), 1: (2,)}",7820.5
9443,aberdeenshire_2012_ward1.csv,5,HH,"{0: (1, 2, 3, 4, 5), 1: (5, 3, 4, 2, 1)}",8709.0,"{0: (1, 2), 1: (5, 3)}",6616.5
9457,aberdeenshire_2012_ward13.csv,5,HH,"{0: (3, 4, 5, 2, 1), 1: (1, 2, 4, 5, 3)}",10311.5,"{0: (2, 1), 1: (3, 4)}",8605.5
9464,aberdeenshire_2012_ward15.csv,5,HH,"{0: (2, 3, 1, 4, 5), 1: (5, 1, 3, 4, 2)}",11228.0,"{0: (2,), 1: (5, 1, 3)}",9286.0
...,...,...,...,...,...,...,...
15029,west_lothian_2017_ward4.csv,8,HH,"{0: (1, 5, 3, 6, 2, 4, 7, 8), 1: (4, 7, 8, 2, ...",62512.5,"{0: (1, 5, 3), 1: (4, 7, 8)}",39686.5
15036,west_lothian_2017_ward5.csv,8,HH,"{0: (5, 6, 8, 2, 3, 4, 1, 7), 1: (1, 4, 7, 3, ...",57248.5,"{0: (5, 6, 8), 1: (1, 4, 7)}",39412.5
15043,west_lothian_2022_ward3.csv,8,HH,"{0: (5, 7, 1, 4, 2, 8, 3, 6), 1: (3, 6, 8, 5, ...",56837.5,"{0: (3, 6), 1: (5, 7, 1)}",37922.5
15050,west_lothian_2022_ward6.csv,8,HH,"{0: (7, 5, 3, 8, 1, 2, 4, 6), 1: (2, 4, 1, 7, ...",39976.5,"{0: (7, 5), 1: (2, 4)}",23475.5


In [4]:
# Check continuous_rest HH solutions
importlib.reload(analysis)
import analysis
import math

importlib.reload(ip_models)
import ip_models
CONT_REST_PROB_FILE = "continuous_rest_problems_2.pkl"
ELECTION_DIR = 'scot-elex-main'

continuous_rest_problems = pd.read_pickle(CONT_REST_PROB_FILE)
hh_problems = continuous_rest_problems[continuous_rest_problems['proxy_type'] == 'HH']

mismatch = 0
infeasibilities = 0

for i, row in hh_problems[hh_problems['filename'] == 'falkirk_2012_ward5.csv'].iterrows():
    election = row['filename']
    num_cands = row['num_cands']
    election_data = csv_parse(f'{ELECTION_DIR}/{election}')[1]
    model = ip_models.continuous_rest_hh(election_data, 2)
    PAM_proxies = {i: 2*HH_proxy(c, num_cands=num_cands) for i, c in row['PAM centers'].items()}
    # feasible = analysis.check_continuous_solution(model, PAM_proxies, 'hh', election_name=election.split('.')[0], verbose=False)
    model.Params.LogToConsole = 0
    model.optimize()
    model.update()
    dim = math.comb(num_cands, 2)
    z = analysis.extract_z_variables(model, range(dim), range(2), [-1, 0, 1])
    centroids = analysis.extract_centroids(model, z, range(2), range(dim), [-1, 0, 1])
    centroids_dict = {i: centroids[i] for i in range(2)}
    proxy_conversion = {ballot: 2*HH_proxy(ballot, num_cands=num_cands) for ballot in election_data.keys()}
    clusters, cost = analysis.assign_ballots(centroids_dict, election_data, proxy_conversion)
    # 2x since it looks like the PAM scores in this file are halved
    if model.ObjVal != 2*row['PAM score'] or cost != model.ObjVal:
        print(f"Score mismatch: {model.ObjVal} (IP Objective) != {2*row['PAM score']} (True Objective) != {cost} (IP Manual Cost) for {election}")
        print(f"Number of candidates: {row['num_cands']}")
        print(f"Original IP Objective: {row['IP score']}")
        mismatch += 1
    else:
        print(f"Model feasible for {election}")
    
print(f"All continuous_rest HH solutions checked. {mismatch} mismatches found.")
    

{'z[0,0,-1]': (0, 0, -1), 'z[0,0,0]': (0, 0, 0), 'z[0,0,1]': (0, 0, 1), 'z[0,1,-1]': (0, 1, -1), 'z[0,1,0]': (0, 1, 0), 'z[0,1,1]': (0, 1, 1), 'z[1,0,-1]': (1, 0, -1), 'z[1,0,0]': (1, 0, 0), 'z[1,0,1]': (1, 0, 1), 'z[1,1,-1]': (1, 1, -1), 'z[1,1,0]': (1, 1, 0), 'z[1,1,1]': (1, 1, 1), 'z[2,0,-1]': (2, 0, -1), 'z[2,0,0]': (2, 0, 0), 'z[2,0,1]': (2, 0, 1), 'z[2,1,-1]': (2, 1, -1), 'z[2,1,0]': (2, 1, 0), 'z[2,1,1]': (2, 1, 1), 'z[3,0,-1]': (3, 0, -1), 'z[3,0,0]': (3, 0, 0), 'z[3,0,1]': (3, 0, 1), 'z[3,1,-1]': (3, 1, -1), 'z[3,1,0]': (3, 1, 0), 'z[3,1,1]': (3, 1, 1), 'z[4,0,-1]': (4, 0, -1), 'z[4,0,0]': (4, 0, 0), 'z[4,0,1]': (4, 0, 1), 'z[4,1,-1]': (4, 1, -1), 'z[4,1,0]': (4, 1, 0), 'z[4,1,1]': (4, 1, 1), 'z[5,0,-1]': (5, 0, -1), 'z[5,0,0]': (5, 0, 0), 'z[5,0,1]': (5, 0, 1), 'z[5,1,-1]': (5, 1, -1), 'z[5,1,0]': (5, 1, 0), 'z[5,1,1]': (5, 1, 1), 'z[6,0,-1]': (6, 0, -1), 'z[6,0,0]': (6, 0, 0), 'z[6,0,1]': (6, 0, 1), 'z[6,1,-1]': (6, 1, -1), 'z[6,1,0]': (6, 1, 0), 'z[6,1,1]': (6, 1, 1), 'z[7,

In [40]:
# Check continuous_rest HH solutions
importlib.reload(analysis)
import analysis
import math

importlib.reload(ip_models)
import ip_models
CONT_REST_PROB_FILE = "continuous_rest_problems_2.pkl"
ELECTION_DIR = 'scot-elex-main'

continuous_rest_problems = pd.read_pickle(CONT_REST_PROB_FILE)
bordaA_problems = continuous_rest_problems[continuous_rest_problems['proxy_type'] == 'BA']

mismatch = 0
infeasibilities = 0

print(bordaA_problems[bordaA_problems['num_cands'] == 4])
for i, row in bordaA_problems[bordaA_problems['num_cands'] == 3].iterrows():
    election = row['filename']
    num_cands = row['num_cands']
    print(row)
    election_data = csv_parse(f'{ELECTION_DIR}/{election}')[1]
    model = ip_models.continuous_rest_bordaA(election_data, 2)
    PAM_proxies = {i: Borda_vector(c, num_cands=num_cands, borda_style='avg', start=1) for i, c in row['PAM centers'].items()}
    print(PAM_proxies)
    # feasible = analysis.check_continuous_solution(model, PAM_proxies, 'bordaa', election_name=election.split('.')[0], verbose=False)
    # if feasible:
    #     print(f"Model feasible for {election}")
    # else:
    #     print(f"Model infeasible for {election}")
    #     infeasibilities += 1
    model.Params.LogToConsole = 0
    model.optimize()
    model.update()
    dim = num_cands
    values = np.array(range(1, 2*(num_cands) + 1)) / 2
    print(values)
    z = analysis.extract_z_variables(model, range(dim), range(2), values)
    centroids = analysis.extract_centroids(model, z, range(2), range(dim), values)
    centroids_dict = {i: (centroids[i] - 1) for i in range(2)}
    print(centroids_dict)
    proxy_conversion = {ballot: Borda_vector(ballot, num_cands=num_cands, borda_style='avg') for ballot in election_data.keys()}
    print(proxy_conversion)
    clusters, cost = analysis.assign_ballots(centroids_dict, election_data, proxy_conversion)
    # 2x since it looks like the PAM scores in this file are halved
    if model.ObjVal != row['PAM score'] or cost != model.ObjVal:
        print(f"Score mismatch: {model.ObjVal} (IP Objective) != {row['PAM score']} (True Objective) != {cost} (IP Manual Cost) for {election}")
        print(f"Number of candidates: {row['num_cands']}")
        print(f"Original IP Objective: {row['IP score']}")
        mismatch += 1
    else:
        print(f"Model feasible for {election}")
    
print(f"All continuous_rest bordaA solutions checked. {mismatch} mismatches found. {infeasibilities} infeasible models found.")
    

                                  filename  num_cands proxy_type  \
9183              aberdeen_2017_ward11.csv          4         BA   
9190          aberdeenshire_2017_ward1.csv          4         BA   
9197         aberdeenshire_2017_ward16.csv          4         BA   
9204          aberdeenshire_2022_ward6.csv          4         BA   
9211                  angus_2012_ward1.csv          4         BA   
9218           argyll_bute_2012_ward11.csv          4         BA   
9225            argyll_bute_2022_ward1.csv          4         BA   
9232       clackmannanshire_2012_ward5.csv          4         BA   
9253          east_ayrshire_2012_ward2.csv          4         BA   
9274            eilean_siar_2012_ward3.csv          4         BA   
9295                falkirk_2012_ward8.csv          4         BA   
9302                falkirk_2017_ward7.csv          4         BA   
9309  highland_2022_inverness_millburn.csv          4         BA   
9316             midlothian_2022_ward2.csv      

In [38]:
continuous_rest_problems

Unnamed: 0,filename,num_cands,proxy_type,IP centers,IP score,PAM centers,PAM score
9435,aberdeen_2022_ward11.csv,5,BA,"{0: (5,), 1: (1,)}",23620.0,"{0: (1,), 1: (5, 2, 3)}",23148.0
9449,aberdeenshire_2012_ward10.csv,5,BA,"{0: (2,), 1: (4,)}",13046.0,"{0: (1, 3), 1: (2,)}",12289.0
9442,aberdeenshire_2012_ward1.csv,5,BA,"{0: (5,), 1: (1,)}",14268.0,"{0: (1, 2), 1: (5,)}",11052.0
9456,aberdeenshire_2012_ward13.csv,5,BA,"{0: (2,), 1: (3,)}",18290.0,"{0: (1, 2), 1: (3, 4)}",14436.0
9463,aberdeenshire_2012_ward15.csv,5,BA,"{0: (2,), 1: (5,)}",16191.0,"{0: (1, 3), 1: (5,)}",15225.0
...,...,...,...,...,...,...,...
15029,west_lothian_2017_ward4.csv,8,HH,"{0: (1, 5, 3, 6, 2, 4, 7, 8), 1: (4, 7, 8, 2, ...",62512.5,"{0: (1, 5, 3), 1: (4, 7, 8)}",39686.5
15036,west_lothian_2017_ward5.csv,8,HH,"{0: (5, 6, 8, 2, 3, 4, 1, 7), 1: (1, 4, 7, 3, ...",57248.5,"{0: (5, 6, 8), 1: (1, 4, 7)}",39412.5
15043,west_lothian_2022_ward3.csv,8,HH,"{0: (5, 7, 1, 4, 2, 8, 3, 6), 1: (3, 6, 8, 5, ...",56837.5,"{0: (3, 6), 1: (5, 7, 1)}",37922.5
15050,west_lothian_2022_ward6.csv,8,HH,"{0: (7, 5, 3, 8, 1, 2, 4, 6), 1: (2, 4, 1, 7, ...",39976.5,"{0: (7, 5), 1: (2, 4)}",23475.5


In [3]:
ELECTION_DIR = 'scot-elex-main'
election_data = csv_parse(f'{ELECTION_DIR}/edinburgh_2017_ward2.csv')[1]

In [9]:
election_data

{(1,): 131,
 (1, 2): 23,
 (1, 2, 3): 10,
 (1, 2, 3, 4, 5, 6): 1,
 (1, 2, 3, 4, 5, 6, 7): 2,
 (1, 2, 3, 6, 4, 7, 5): 1,
 (1, 2, 3, 7, 6, 4, 5): 1,
 (1, 2, 4): 17,
 (1, 2, 4, 6): 14,
 (1, 2, 4, 6, 3, 5): 1,
 (1, 2, 4, 6, 5): 1,
 (1, 2, 4, 6, 5, 3): 1,
 (1, 2, 4, 6, 7): 2,
 (1, 2, 4, 6, 7, 3, 5): 2,
 (1, 2, 4, 6, 7, 5, 3): 1,
 (1, 2, 4, 7, 6, 3, 5): 1,
 (1, 2, 5): 1,
 (1, 2, 6): 35,
 (1, 2, 6, 4): 8,
 (1, 2, 6, 4, 5, 3, 7): 1,
 (1, 2, 6, 4, 7): 2,
 (1, 2, 6, 4, 7, 3, 5): 1,
 (1, 2, 6, 4, 7, 5, 3): 1,
 (1, 2, 6, 7): 7,
 (1, 2, 6, 7, 3, 4, 5): 1,
 (1, 2, 6, 7, 4): 1,
 (1, 2, 6, 7, 4, 5, 3): 1,
 (1, 2, 7): 2,
 (1, 2, 7, 5): 1,
 (1, 2, 7, 6): 1,
 (1, 2, 7, 6, 4, 3, 5): 1,
 (1, 3): 4,
 (1, 3, 2): 1,
 (1, 3, 2, 5, 4, 6, 7): 1,
 (1, 3, 4): 3,
 (1, 3, 4, 2, 6, 5, 7): 1,
 (1, 3, 4, 5, 6): 1,
 (1, 3, 4, 7): 2,
 (1, 3, 4, 7, 5, 6, 2): 1,
 (1, 3, 5): 4,
 (1, 3, 5, 4): 1,
 (1, 3, 5, 4, 7, 2, 6): 1,
 (1, 3, 5, 6, 4, 7, 2): 1,
 (1, 3, 5, 7, 4, 2, 6): 1,
 (1, 3, 5, 7, 6, 2, 4): 1,
 (1, 3, 6): 6,
 (1, 3, 

In [4]:
model = ip_models.continuous_hh(election_data, 2)

Set parameter Username
Academic license - for non-commercial use only - expires 2025-06-18


In [5]:
model.optimize()

Gurobi Optimizer version 11.0.2 build v11.0.2rc0 (mac64[arm] - Darwin 24.1.0 24B5035e)

CPU model: Apple M2 Max
Thread count: 12 physical cores, 12 logical processors, using up to 12 threads

Optimize a model with 1911 rows, 2770 columns and 59090 nonzeros
Model fingerprint: 0x67093b78
Variable types: 0 continuous, 2770 integer (2602 binary)
Coefficient statistics:
  Matrix range     [1e+00, 2e+04]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 2e+04]
Presolve removed 1385 rows and 1305 columns
Presolve time: 0.05s
Presolved: 526 rows, 1465 columns, 28891 nonzeros
Variable types: 0 continuous, 1465 integer (1356 binary)

Root relaxation: objective 0.000000e+00, 238 iterations, 0.01 seconds (0.01 work units)

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0    0.00000    0  105          -    0.00000      -     -    0s
H    0  

In [11]:
import math
num_cands = 7
dim = math.comb(num_cands, 2)
z = analysis.extract_z_variables(model, range(dim), range(2), [-1, 0, 1])
centroids = analysis.extract_centroids(model, z, range(2), range(dim), [-1, 0, 1])

{'z[0,0,-1]': (0, 0, -1), 'z[0,0,0]': (0, 0, 0), 'z[0,0,1]': (0, 0, 1), 'z[0,1,-1]': (0, 1, -1), 'z[0,1,0]': (0, 1, 0), 'z[0,1,1]': (0, 1, 1), 'z[1,0,-1]': (1, 0, -1), 'z[1,0,0]': (1, 0, 0), 'z[1,0,1]': (1, 0, 1), 'z[1,1,-1]': (1, 1, -1), 'z[1,1,0]': (1, 1, 0), 'z[1,1,1]': (1, 1, 1), 'z[2,0,-1]': (2, 0, -1), 'z[2,0,0]': (2, 0, 0), 'z[2,0,1]': (2, 0, 1), 'z[2,1,-1]': (2, 1, -1), 'z[2,1,0]': (2, 1, 0), 'z[2,1,1]': (2, 1, 1), 'z[3,0,-1]': (3, 0, -1), 'z[3,0,0]': (3, 0, 0), 'z[3,0,1]': (3, 0, 1), 'z[3,1,-1]': (3, 1, -1), 'z[3,1,0]': (3, 1, 0), 'z[3,1,1]': (3, 1, 1), 'z[4,0,-1]': (4, 0, -1), 'z[4,0,0]': (4, 0, 0), 'z[4,0,1]': (4, 0, 1), 'z[4,1,-1]': (4, 1, -1), 'z[4,1,0]': (4, 1, 0), 'z[4,1,1]': (4, 1, 1), 'z[5,0,-1]': (5, 0, -1), 'z[5,0,0]': (5, 0, 0), 'z[5,0,1]': (5, 0, 1), 'z[5,1,-1]': (5, 1, -1), 'z[5,1,0]': (5, 1, 0), 'z[5,1,1]': (5, 1, 1), 'z[6,0,-1]': (6, 0, -1), 'z[6,0,0]': (6, 0, 0), 'z[6,0,1]': (6, 0, 1), 'z[6,1,-1]': (6, 1, -1), 'z[6,1,0]': (6, 1, 0), 'z[6,1,1]': (6, 1, 1), 'z[7,

In [12]:
centroids

array([[ 1.,  1.,  1.,  1.,  1.,  1.,  0.,  0.,  0., -1.,  0.,  0.,  0.,
        -1.,  0.,  0., -1.,  0., -1.,  0.,  1.],
       [ 0., -1., -1., -1.,  0., -1., -1.,  0., -1.,  0., -1.,  1.,  1.,
         1.,  1., -1.,  1.,  0.,  1.,  1., -1.]])