# Violations of Preferential Equality 

In [5]:
from pref_voting.generate_profiles import * 
from pref_voting.voting_methods import *
from pref_voting.rankings import *
from pref_voting.profiles_with_ties import ProfileWithTies
from pref_voting.iterative_methods import top_n_instant_runoff_for_truncated_linear_orders

import glob
from zipfile import ZipFile
import os
import io
from tqdm.notebook import tqdm


In [9]:
def same_ranking_extended_strict_pref(ranking1, ranking2, candidates): 
    # check if ranking1 and ranking2 have the same ranking of candidates
    for c1 in candidates:
        for c2 in candidates:
            if not ranking1.extended_strict_pref(c1, c2) and ranking2.extended_strict_pref(c1, c2):
                return False
    return True

def get_winner_runner_up_loser(profile): 

    pl_scores = profile.plurality_scores()
    # find the 2nd largest plurality score
    second_pl_score = sorted(set(pl_scores.values()))[1]
    second_place_cand = [c for c in pl_scores.keys() if pl_scores[c] == second_pl_score][0]
    first_pl_score = sorted(set(pl_scores.values()))[0]
    first_place_cand = [c for c in pl_scores.keys() if pl_scores[c] == first_pl_score][0]

    winner = instant_runoff_for_truncated_linear_orders(profile)[0]

    runner_up = second_place_cand if winner == first_place_cand else first_place_cand

    loser = [c for c in profile.candidates if c not in [winner, runner_up]][0]

    return winner, runner_up, loser

def has_irv_preferential_equality_violation(profile, winner, runner_up, loser): 

    bca = Ranking({
        runner_up:1, 
        loser:2, 
        winner:3})
    
    bac = Ranking({
        runner_up:1, 
        winner:2, 
        loser:3})
    
    acb = Ranking({
        winner:1, 
        loser:2, 
        runner_up:3})
    
    cab = Ranking({
        loser:1, 
        winner:2, 
        runner_up:3})

    rankings, rcounts = profile.rankings_counts

    num_bca = 0
    num_bac = 0
    for r, c in zip(rankings, rcounts): 
        if same_ranking_extended_strict_pref(r, bca, profile.candidates):
            num_bca += c
        if same_ranking_extended_strict_pref(r, bac, profile.candidates):
            num_bac += c
    num = max(num_bca, num_bac)

    bac_violation = num == num_bac
    # print('num is ', num)
    new_rankings = []
    new_counts = []
    # print("acb is ", acb)
    acb_rankings = [r for r in profile.rankings if same_ranking_extended_strict_pref(r, acb, profile.candidates)]

    # print(len(acb_rankings))
    new_rankings.append(cab)
    new_counts.append(len(acb_rankings[:num]))
    new_rankings.append(acb)
    new_counts.append(len(acb_rankings[num:]))
    for r,c in zip(rankings, rcounts):
        if not same_ranking_extended_strict_pref(r, acb, profile.candidates):
            new_rankings.append(r)
            new_counts.append(c)

    new_prof = ProfileWithTies(new_rankings, new_counts)
    # new_prof.anonymize().display()
    # print(loser)
    # print(instant_runoff_for_truncated_linear_orders(ProfileWithTies(new_rankings, new_counts)))

    return loser in instant_runoff_for_truncated_linear_orders(new_prof), bac_violation


In [12]:
def find_violations(profiles): 

    num_profs = 0
    num_profs_no_absolute_maj_winner = 0

    num_orig_prof_violations = 0  
    num_modified_prof_violations = 0  

    for prof in tqdm(profiles): 

        if not prof.is_truncated_linear: 
            continue
        prof.remove_empty_rankings() 

        top_three = top_n_instant_runoff_for_truncated_linear_orders(prof, 3)

        restricted_prof = prof.remove_candidates([c for c in prof.candidates if c not in top_three])

        restricted_prof.remove_empty_rankings() 
        
        if len(restricted_prof.candidates) < 3:
            continue

        irv_ws = instant_runoff_for_truncated_linear_orders(restricted_prof)

        if len(irv_ws) != 1: 
            continue
        
        num_profs += 1

        absolute_majority_winner = absolute_majority(restricted_prof)
        if len(absolute_majority_winner) == 1: 
            continue

        num_profs_no_absolute_maj_winner += 1

        winner, runner_up, loser = get_winner_runner_up_loser(restricted_prof)

        has_violation, orig_prof_violation =  has_irv_preferential_equality_violation(restricted_prof, winner, runner_up, loser)

        if has_violation and orig_prof_violation:
            num_orig_prof_violations += 1

        if has_violation and not orig_prof_violation: 
            num_modified_prof_violations += 1

    print(f"{num_profs_no_absolute_maj_winner} out of {num_profs} profiles have no absolute majority winner: {num_profs_no_absolute_maj_winner/num_profs}\n")

    print(f"Type 1: Original Profile Violations\n{num_orig_prof_violations} violations out of {num_profs_no_absolute_maj_winner} profiles: {num_orig_prof_violations/num_profs_no_absolute_maj_winner}\n")

    print(f"Type 2: Modified Profile Violations\n{num_modified_prof_violations} violations out of {num_profs_no_absolute_maj_winner} profiles: {num_modified_prof_violations/num_profs_no_absolute_maj_winner}\n")

    print(f"Total Violations\n{(num_orig_prof_violations + num_modified_prof_violations)} violations out of {num_profs_no_absolute_maj_winner} profiles: {(num_orig_prof_violations + num_modified_prof_violations)/num_profs_no_absolute_maj_winner}")


In [None]:
num_trials = 100_000
num_cands = 3
num_voters = 1001

profiles = [generate_profile(num_cands, num_voters).to_profile_with_ties() for _ in range(1000)]

find_violations(profiles)

  0%|          | 0/1000 [00:00<?, ?it/s]

1000 out of 1000 profiles have no absolute majority winner: 1.0

Type 1: Original Profile Violations
605 violations out of 1000 profiles: 0.605

Type 2: Modified Profile Violations
186 violations out of 1000 profiles: 0.186

Total Violations
791 violations out of 1000 profiles: 0.791


## Stable Voting Website

In [14]:
profiles = [ProfileWithTies.read(fname) for fname in glob.glob('real_elections/stable_voting_dataset/*')]

find_violations(profiles)

  0%|          | 0/657 [00:00<?, ?it/s]

60 out of 197 profiles have no absolute majority winner: 0.30456852791878175

Type 1: Original Profile Violations
37 violations out of 60 profiles: 0.6166666666666667

Type 2: Modified Profile Violations
11 violations out of 60 profiles: 0.18333333333333332

Total Violations
48 violations out of 60 profiles: 0.8


## Preflib Dataset

In [15]:
profiles = []
elections = []
file_names = []
for fname in tqdm(glob.glob("real_elections/preflib_dataset/*.soi")):

    election_name = fname.split("/")[-1].split(".")[0]

    if election_name in elections: 
        continue

    elections.append(election_name)
    file_names.append(fname)
    profiles.append(ProfileWithTies.read(fname))
    
for fname in tqdm(glob.glob("real_elections/preflib_dataset/*.toi")):

    election_name = fname.split("/")[-1].split(".")[0]

    if election_name in elections: 
        continue

    elections.append(election_name)
    file_names.append(fname)
    profiles.append(ProfileWithTies.read(fname))

for fname in tqdm(glob.glob("real_elections/preflib_dataset/*.toc")):

    election_name = fname.split("/")[-1].split(".")[0]

    if election_name in elections: 
        continue

    elections.append(election_name)
    file_names.append(fname)
    profiles.append(ProfileWithTies.read(fname))

find_violations(profiles)

  0%|          | 0/308 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

  0%|          | 0/364 [00:00<?, ?it/s]

104 out of 308 profiles have no absolute majority winner: 0.33766233766233766

Type 1: Original Profile Violations
28 violations out of 104 profiles: 0.2692307692307692

Type 2: Modified Profile Violations
30 violations out of 104 profiles: 0.28846153846153844

Total Violations
58 violations out of 104 profiles: 0.5576923076923077


## CIVS Dataset

In [16]:
# read a json file
import json
profiles = []
_civs_elections = json.load(open("real_elections/civs_dataset/2024-12-15.json"))

civs_elections = _civs_elections['elections']
profiles = []
for election in tqdm(civs_elections):
    if election["test"] == "yes":
        continue
    ballots = []
    num_candidates = election['num_choices']
    for b in election['ballots']:
        ballots.append({cand: rank for cand, rank in enumerate(b) if rank != "?"})
    profiles.append(ProfileWithTies(ballots, candidates=list(range(num_candidates))))
    
find_violations(profiles)

  0%|          | 0/22477 [00:00<?, ?it/s]

  0%|          | 0/22477 [00:00<?, ?it/s]

590 out of 1883 profiles have no absolute majority winner: 0.3133297928836962

Type 1: Original Profile Violations
339 violations out of 590 profiles: 0.5745762711864407

Type 2: Modified Profile Violations
114 violations out of 590 profiles: 0.19322033898305085

Total Violations
453 violations out of 590 profiles: 0.7677966101694915


## Otis 2022 Dataset

In [15]:
# This will take about 17 minutes to run

items_to_skip = [
    '"skipped', 
    'overvote', 
    'undervote']

profiles = []
enames = []
for file in tqdm(glob.glob("real_elections/otis_2022_dataset/*.zip")):

    if not file.endswith(".csv") and not file.endswith(".zip"):
        continue
    # if file ends with .zip unzip the file and process it 
    if file.endswith(".zip"):
        with ZipFile(file, 'r') as zip_ref:
            # Iterate through each file inside the zip
            for name in zip_ref.namelist():
                # Only process .csv files
                if name.endswith(".csv"):
                    with zip_ref.open(name) as f:
                        # Read the CSV data into memory
                        csv_bytes = f.read()
                        # Decode bytes to string
                        csv_text = csv_bytes.decode('utf-8')
                        # Create a file-like StringIO object
                        csv_buffer = io.StringIO(csv_text)
                        
                        # Now pass this StringIO to ProfileWithTies.read
                        prof = ProfileWithTies.read(
                            csv_buffer,
                            file_format='csv',
                            csv_format='rank_columns',
                            items_to_skip=items_to_skip
                        )
                        enames.append(name)
                        profiles.append(prof)
                        
find_violations(profiles)

  0%|          | 0/458 [00:00<?, ?it/s]

  0%|          | 0/458 [00:00<?, ?it/s]