# Majority Graph Statistics

In [1]:
import itertools
from itertools import combinations
import networkx as nx
from tqdm.notebook import tqdm  
import numpy as np  
from pref_voting.profiles_with_ties import ProfileWithTies
from pref_voting.io.readers import preflib_to_profile
from preflibtools.instances import OrdinalInstance
import glob
import io
from zipfile import ZipFile


In [2]:

def majority_graph_statistics_four_candidates(
        profiles, 
        use_extended_strict_preference = True,
        max_num_candidates = 42): 
    
    linear_order = nx.DiGraph()
    linear_order.add_nodes_from(range(4))
    linear_order.add_edges_from([(0,1),(0,2), (0,3), (1,2), (1,3), (2,3)]) 

    bottom_cycle = nx.DiGraph()
    bottom_cycle.add_nodes_from(range(4))
    bottom_cycle.add_edges_from([(0,1),(0,2), (0,3), (1,2), (2,3), (3,1)])

    top_cycle_graph = nx.DiGraph()
    top_cycle_graph.add_nodes_from(range(4))
    top_cycle_graph.add_edges_from([(0,1),(1,2), (2,0), (0,3), (1,3), (2,3)])

    four_cycle = nx.DiGraph()
    four_cycle.add_nodes_from(range(4))
    four_cycle.add_edges_from([(0,1),(1,2), (2,3), (3,0), (2,0), (1,3)])

    count = 0
    profile_count = 0
    num_voters_list = []
    linear_order_count = 0
    bottom_cycle_count = 0
    top_cycle_count = 0
    four_cycle_count = 0

    for  prof in tqdm(profiles):
        
        if use_extended_strict_preference:
            prof.use_extended_strict_preference()
            
        mg = prof.margin_graph()
        if len(prof.candidates) > max_num_candidates:
            continue

        if len(prof.candidates) < 4: 
            continue

        profile_count += 1
        num_voters_list.append(prof.num_voters)
        
        for subset in itertools.combinations(prof.candidates, 4):
            # Create a new profile with only the 4 candidates
            # Check if there are any zero margins
            if any([mg.margin(a,b) == 0 for a in subset for b in subset if a != b]):
                continue

            count += 1

            # Extract the underlying directed graph from mg
            g = nx.DiGraph()
            g.add_nodes_from(subset)
            directed_edges = [(a,b) for (a,b,c) in mg.edges if a in subset and b in subset]
            g.add_edges_from(directed_edges)

            # Find the appropriate isomorphism type
            if nx.is_isomorphic(g, linear_order):
                linear_order_count += 1

            elif nx.is_isomorphic(g, bottom_cycle):
                bottom_cycle_count += 1

            elif nx.is_isomorphic(g, top_cycle_graph):
                top_cycle_count += 1

            elif nx.is_isomorphic(g, four_cycle):
                four_cycle_count += 1
                
            else: # this should never be reached!
                print("Unknown isomorphism type")
                print(subset)
                print(directed_edges)
                print(g.edges)

    print(f"There are {profile_count} relevant profiles.")
    print(f"The average number of voters is {np.mean(num_voters_list)}")
    print("Total number of 4-candidate subprofiles with no zero margins:", count)
    print(f"Linear orders: {(linear_order_count / count)} ({linear_order_count} out of {count})")
    print(f"Bottom cycles: {(bottom_cycle_count / count)} ({bottom_cycle_count} out of {count})")
    print(f"Top cycles: {(top_cycle_count / count)} ({top_cycle_count} out of {count})")
    print(f"Four cycles: {(four_cycle_count / count)} ({four_cycle_count} out of {count})")


## Stable Voting Dataset

In [3]:
profiles = [ProfileWithTies.read(fname) for fname in glob.glob('real_elections/stable_voting_dataset/*')]

majority_graph_statistics_four_candidates(profiles)


  0%|          | 0/657 [00:00<?, ?it/s]

There are 304 relevant profiles.
The average number of voters is 11.197368421052632
Total number of 4-candidate subprofiles with no zero margins: 47326
Linear orders: 0.9615433377002071 (45506 out of 47326)
Bottom cycles: 0.014135992900308498 (669 out of 47326)
Top cycles: 0.017559058445674684 (831 out of 47326)
Four cycles: 0.006761610953809745 (320 out of 47326)


## Preflib Elections

In [4]:
profiles = []
elections = []

for fname in glob.glob("real_elections/preflib_dataset/*.soi"):

    election_name = fname.split("/")[-1].split(".")[0]

    if election_name in elections: 
        continue

    elections.append(election_name)
    
    profiles.append(ProfileWithTies.read(fname))
    
for fname in glob.glob("real_elections/preflib_dataset/*.toi"):

    election_name = fname.split("/")[-1].split(".")[0]

    if election_name in elections: 
        continue

    elections.append(election_name)
    profiles.append(ProfileWithTies.read(fname))

for fname in glob.glob("real_elections/preflib_dataset/*.toc"):

    election_name = fname.split("/")[-1].split(".")[0]

    if election_name in elections: 
        continue

    elections.append(election_name)

    elections.append(election_name)
    profiles.append(ProfileWithTies.read(fname))

majority_graph_statistics_four_candidates(profiles)


  0%|          | 0/364 [00:00<?, ?it/s]

There are 354 relevant profiles.
The average number of voters is 45332.0988700565
Total number of 4-candidate subprofiles with no zero margins: 44323
Linear orders: 0.9993682738081808 (44295 out of 44323)
Bottom cycles: 0.0003158630959095729 (14 out of 44323)
Top cycles: 0.0003158630959095729 (14 out of 44323)
Four cycles: 0.0 (0 out of 44323)


## Otis 2022 Dataset

In [5]:
# This will take about 17 minutes to run

items_to_skip = [
    'skipped', 
    'overvote', 
    'undervote']

profiles = []

for file in tqdm(glob.glob("real_elections/otis_2022_dataset/*.zip")):

    if not file.endswith(".csv") and not file.endswith(".zip"):
        continue
    # if file ends with .zip unzip the file and process it 
    if file.endswith(".zip"):
        with ZipFile(file, 'r') as zip_ref:
            # Iterate through each file inside the zip
            for name in zip_ref.namelist():
                # Only process .csv files
                if name.endswith(".csv"):
                    with zip_ref.open(name) as f:
                        # Read the CSV data into memory
                        csv_bytes = f.read()
                        # Decode bytes to string
                        csv_text = csv_bytes.decode('utf-8')
                        # Create a file-like StringIO object
                        csv_buffer = io.StringIO(csv_text)
                        
                        # Now pass this StringIO to ProfileWithTies.read
                        prof = ProfileWithTies.read(
                            csv_buffer,
                            file_format='csv',
                            csv_format='rank_columns',
                            items_to_skip=items_to_skip
                        )
                        profiles.append(prof)

majority_graph_statistics_four_candidates(profiles)


  0%|          | 0/458 [00:00<?, ?it/s]

  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read_csv(filename)
  df = pd.read

  0%|          | 0/458 [00:00<?, ?it/s]

There are 289 relevant profiles.
The average number of voters is 88917.24221453287
Total number of 4-candidate subprofiles with no zero margins: 115315
Linear orders: 0.9998265620257556 (115295 out of 115315)
Bottom cycles: 6.070329098556129e-05 (7 out of 115315)
Top cycles: 0.00011273468325889954 (13 out of 115315)
Four cycles: 0.0 (0 out of 115315)
