In [83]:
import choix

import numpy as np

from tqdm import tqdm

import sys, os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
import utils

In [84]:
# choix assumes that the items are integeters from [0, num_items - 1

burlington_filename = "../data/preflib/elections-all/burlington/ED-00005-00000002.toi"
ballots, ballot_counts, cand_names, skipped_votes = utils.read_preflib(burlington_filename)

In [87]:
cand_name_to_idx = {cand:idx for idx, cand in enumerate(cand_names)}
cand_idx_to_name = {idx: cand for idx, cand in enumerate(cand_names)}

cand_name_to_idx["END"] = len(cand_names)
cand_idx_to_name[len(cand_names)] = "END"

num_items = len(cand_name_to_idx)

rng = np.random.default_rng()

ranking_data_for_choix = []
for ballot_idx, ballot_with_names in tqdm(enumerate(ballots)):
    ballot_with_idxs = [
        cand_name_to_idx[cand_name] 
        for cand_name in ballot_with_names
    ]
    ballot_with_idxs.append(len(cand_names)) # end

    unlisted_cands = []
    for cand_idx in range(num_items):
        if cand_idx not in ballot_with_idxs:
            unlisted_cands.append(cand_idx)
    
    for _ in range(ballot_counts[ballot_idx]):
        full_ballot = ballot_with_idxs.copy()
        if len(unlisted_cands) > 0:
            rng.shuffle(unlisted_cands)
            full_ballot.extend(unlisted_cands)
            
        ranking_data_for_choix.append(full_ballot)

378it [00:00, 8966.34it/s]


In [89]:
ranking_data_for_choix[:10]

[[4, 6, 2, 3, 5, 1, 0],
 [4, 6, 0, 5, 1, 3, 2],
 [4, 6, 3, 5, 0, 2, 1],
 [4, 6, 3, 0, 2, 5, 1],
 [4, 6, 2, 5, 1, 0, 3],
 [4, 6, 1, 0, 3, 5, 2],
 [4, 6, 3, 5, 0, 1, 2],
 [4, 6, 0, 5, 3, 2, 1],
 [4, 6, 0, 3, 1, 5, 2],
 [4, 6, 2, 3, 5, 0, 1]]

In [90]:
PL_params = choix.lsr.lsr_rankings(
    n_items=len(cand_name_to_idx),
    data=ranking_data_for_choix
)
PL_params

array([ 0.41546024,  0.64017917, -0.4582302 ,  0.37813096,  0.28949842,
       -1.15847786, -0.10656073])

In [97]:
simulated_ballots = choix.utils.generate_rankings(PL_params, n_rankings=8980, size=num_items)
simulated_ballots[:3]

(array([1, 3, 4, 2, 0, 6, 5]),
 array([0, 4, 6, 2, 1, 3, 5]),
 array([1, 2, 0, 3, 4, 6, 5]))

In [98]:
ballots_and_counts = {}
for ballot in simulated_ballots:
    end_idx = np.argwhere(ballot == (num_items-1))
    assert len(end_idx) == 1

    end_idx = end_idx[0][0] #extract the index
    if end_idx == 0:
        continue
        
    truncated_ballot = ballot[:end_idx]
    truncated_ballot_str = " ".join([str(cand) for cand in truncated_ballot])
    
    if truncated_ballot_str not in ballots_and_counts:
        ballots_and_counts[truncated_ballot_str] = 0
    ballots_and_counts[truncated_ballot_str] += 1

# Sort by ballot counts and convert to output ballot format 

sorted_ballots_and_counts = sorted(ballots_and_counts.items(), key=lambda x: -x[1])

# Extract sorted keys and values
output_ballots = []
ballot_counts = []
for ballot_str, count in sorted_ballots_and_counts:
    output_ballot = [cand_idx_to_name[int(cand_idx)] for cand_idx in ballot_str.split(" ")]
    output_ballots.append(output_ballot)
    ballot_counts.append(count)

output_ballots[:10], ballot_counts[:10]

([[2], [1], [4], [5], [2, 1], [1, 2], [2, 4], [4, 2], [2, 5], [1, 4]],
 [281, 246, 229, 216, 115, 110, 109, 98, 92, 83])