# Expected Social Utility Performance

In [15]:
from pref_voting.voting_methods import *
from pref_voting.probabilistic_methods import *
from pref_voting.generate_spatial_profiles import *
from pref_voting.generate_profiles import *
from pref_voting.generate_utility_profiles import *
from pref_voting.utility_methods import *
from pref_voting.utility_functions import *
from tqdm.notebook import tqdm
from multiprocess import Pool, cpu_count, current_process

import numpy as np
import pandas as pd
from functools import partial
import seaborn as sns
import matplotlib.pyplot as plt
from functools import partial 
import datetime
import time
import pref_voting
from pref_voting.grade_methods import *
from numba import jit
# needed to ensure that random numbers are different in each process (for multiprocessing)
import os
import time
#from memory_profiler import profile


In [None]:
num_cpus = cpu_count() - 2
print(f"Number of CPUs: {num_cpus}")

In [None]:
import pref_voting
pref_voting.__version__

## Helper functions

In [18]:
def expected_utility(prob, util_func):
    return sum([prob[c] * util_func(c) for c in prob.keys()])

def to_linear_prof(uprof): 
    return Profile([sorted(uprof.domain, key=lambda x: u(x), reverse=True) for u in uprof.utilities])

def find_winning_probs(vms, prob_vms, prof): 
    prob_ws =  {vm.name: vm.prob(prof) for vm in vms}
    for vm in prob_vms:
        prob_ws[vm.name] = vm(prof)
    return prob_ws

@jit(nopython=True, fastmath=True)
def soc_util_performance(
        avg_util_of_util_ws, 
        avg_util_of_cand, 
        avg_util_of_vm_ws):

    return (avg_util_of_vm_ws - avg_util_of_cand) / (avg_util_of_util_ws - avg_util_of_cand)



In [19]:
def estimated_variance_of_sampling_dist(mean_for_each_vm, values_for_each_vm):
    # values_for_each_vm is a 2d numpy array

    m = values_for_each_vm.shape[1]

    row_means_reshaped = mean_for_each_vm[:, np.newaxis]
    return  (1/(m*(m-1))) * np.sum((values_for_each_vm - row_means_reshaped)**2, axis=1)

def estimated_std_error(mean_for_each_vm, values_for_each_vm):
    # values_for_each_vm is a 2d numpy array
    return np.sqrt(estimated_variance_of_sampling_dist(mean_for_each_vm, values_for_each_vm))

## Main Simulation Functions

In [20]:
def generate_samples_randomly_polarized_voters(
        num_polarized_cands, 
        num_centrist_cands,
        num_voters,
        prob_centrist_voter, 
        vms, 
        prob_vms, 
        grade_vms,
        num_dims, 
        cand_cov,
        voter_cov,
        normalization,
        voter_utility,
        num_dims_polarized,
        polarization_distance,
        num_profiles): 
    
    cand_cov = cand_cov if cand_cov is not None else np.eye(num_dims)
    voter_cov = voter_cov if voter_cov is not None else np.eye(num_dims)

    num_left_cands = num_polarized_cands // 2
    sprofs = generate_spatial_profile_polarized_cands_randomly_polarized_voters(
        [
            (np.array([polarization_distance] * num_dims_polarized +  [0] * (num_dims - num_dims_polarized)), cand_cov, num_left_cands),

            (np.array([-1 * polarization_distance] * num_dims_polarized + [0] * (num_dims - num_dims_polarized)), cand_cov, num_polarized_cands - num_left_cands),

            (np.array([0] * num_dims), cand_cov, num_centrist_cands)],
        num_voters,
        [
            (np.array([polarization_distance] * num_dims_polarized + [0] * (num_dims - num_dims_polarized)), voter_cov, (1 - prob_centrist_voter) / 2),

            (np.array([-1 * polarization_distance] * num_dims_polarized + [0] * (num_dims - num_dims_polarized)), voter_cov, (1 - prob_centrist_voter) / 2), 
            
            (np.array([0] * num_dims), voter_cov, prob_centrist_voter)],
             
        num_profiles = num_profiles)

    _uprofs = [sprof.to_utility_profile(utility_function=voter_utility) for sprof in sprofs]

    del sprofs

    if normalization == 'range':
        uprofs = [_uprof.normalize_by_range() for _uprof in _uprofs]
    elif normalization == 'score':
        uprofs = [_uprof.normalize_by_standard_score() for _uprof in _uprofs]
    else: 
        uprofs = _uprofs

    profs = [to_linear_prof(uprof) for uprof in uprofs]

    candidates = profs[0].candidates

    util_ws_s = [sum_utilitarian(uprof) for uprof in uprofs]
    avg_utils = [uprof.avg_utility_function() for uprof in uprofs]

    find_winning_probs_partial = partial(find_winning_probs, vms, prob_vms)

    winning_prob_dicts = list(map(find_winning_probs_partial, profs)) 

    if len(grade_vms) > 0:
        approval_profs = [uprof.to_approval_profile() for uprof in uprofs]
        find_grade_winning_probs_partial = partial(find_winning_probs, grade_vms, [])
        grade_winning_prob_dicts = list(map(find_grade_winning_probs_partial, approval_profs))
        for i in range(len(winning_prob_dicts)):
            winning_prob_dicts[i].update(grade_winning_prob_dicts[i])
            
        del approval_profs

    del profs # free up memory

    avg_util_of_util_ws = [np.average([avg_utils[uidx](w) for w in util_ws]) for uidx, util_ws in enumerate(util_ws_s)]
    avg_util_of_cand = [np.average([avg_util(c) for c in candidates]) for avg_util in avg_utils]

    return np.array([np.array([avg_util_of_util_ws[pidx]] + [avg_util_of_cand[pidx]] + [expected_utility(winning_prob_dicts[pidx][vm.name], avg_utils[pidx]) for vm in vms + prob_vms + grade_vms]) for pidx in range(num_profiles)])




In [21]:
def run_sim_with_estimated_standard_error(
        generate_samples, 
        max_std_error, 
        initial_trials=1000, 
        step_trials=1000,
        min_num_trials = 10000
        ):
    
    results = generate_samples(num_profiles = initial_trials)
    avg_util_of_util_ws, avg_util_of_cand, *avg_util_of_vm_ws = results.T

    del results  # deallocate memory for results

    soc_util_performance_values = np.array([soc_util_performance(avg_util_of_util_ws, avg_util_of_cand, avg_u) for avg_u in avg_util_of_vm_ws])
    
    mean_s = np.mean(soc_util_performance_values, axis=1)
    est_std_errors = estimated_std_error(mean_s, soc_util_performance_values)

    num_trials = initial_trials
    
    while np.any(est_std_errors > max_std_error) or (num_trials < min_num_trials):

        new_results = generate_samples(num_profiles = step_trials)

        new_avg_util_of_util_ws, new_avg_util_of_cand, *new_avg_util_of_vm_ws = new_results.T

        num_trials += step_trials

        avg_util_of_util_ws = np.concatenate((avg_util_of_util_ws, new_avg_util_of_util_ws), axis=0)

        avg_util_of_cand = np.concatenate((avg_util_of_cand, new_avg_util_of_cand), axis=0)

        avg_util_of_vm_ws = np.array([np.concatenate((a, b)) for a, b in zip(avg_util_of_vm_ws, new_avg_util_of_vm_ws)])
        
        soc_util_performance_values = np.array([soc_util_performance(avg_util_of_util_ws, avg_util_of_cand, avg_su)
                            for avg_su in avg_util_of_vm_ws])
        
        mean_s = np.mean(soc_util_performance_values, axis=1)
        variance_s = np.var(soc_util_performance_values, axis=1)
        est_std_errors = estimated_std_error(mean_s, soc_util_performance_values)
        #print(f"mean after {num_trials}"  , mean_s)
        #print(f"est_std_error after {num_trials}"  , est_std_errors)

    return mean_s, est_std_errors, variance_s, num_trials

## Setting Simulation Parameters

In [22]:

pref_voting_version = pref_voting.__version__

all_num_cands =   [3, 4, 5, 6, 10, 7, 8, 9] 
all_num_voters = [11, 101, 1001]
all_num_dims =    [1, 2, 4, 8]
all_normalizations = ["none", "range"]
all_is_polarized = [False, True]
all_num_centrist_cands = ["none", "half", "all"]
all_prob_centrist_voters = [0.0, 0.5, 1.0]
all_num_dims_polarized = ["one", "half", "all"]
all_polarization_distances = [1]
all_subpopulation_stds = [1, 0.5]
all_dispersion =  [1, 0.5]
all_correlation = [0, 0.5]
all_voter_utilities = {
    "Linear": linear_utility, 
    "Quadratic": quadratic_utility, 
    "Shepsle": shepsle_utility,
    "Matthews": matthews_utility,
    "Mixed Proximity-RM": mixed_rm_utility,
    "RM": rm_utility    
    }

vms = [
    # plurality_veto,
    # random_consensus_builder_st,
    # MLRCB,
    # MLRaDiUS,
    # bracket_voting,
    # condorcet_plurality,
    # knockout,
    # loss_trimmer,
    # river_zt,
    # smith_set,
    # superior_voting
    # condorcet,
    # copeland,
    # copeland_local_borda,
    # copeland_global_borda,
    # plurality,
    # anti_plurality,
    # borda,
    # instant_runoff,
    # plurality_with_runoff_put,
    # benham, 
    # bottom_two_runoff_instant_runoff,
    # coombs,
    # baldwin,
    # weak_nanson,
    # raynaud,
    # minimax,
    # stable_voting,
    # beat_path_Floyd_Warshall,
    # ranked_pairs_zt,
    # split_cycle,
    # daunou,
    # blacks,
    # condorcet_irv,
    # smith_irv, 
    # bucklin,
    # woodall, 
    # river_zt,
    # smith_minimax, 
    # tideman_alternative_smith
]
prob_vms = [
    # random_dictator,
    # pr_borda,
    # maximal_lottery, 
    # RaDiUS    
]
grade_vms = [
    # approval
]

max_std_error = 0.005
initial_trials = 1000 
step_trials = 1000
min_num_trials = 10000

num_cands_to_num_centrists = {nc: sorted(list(set([{"none": 0, "one": 1, "half": nc // 2, "all": nc}[cent] for cent in all_num_centrist_cands]))) for nc in all_num_cands}

dim_to_num_dims_polarized = {
    nd: sorted([_n for _n in list(set([{
        "one": 1, 
        "half": nd // 2, 
        "all": nd}[pol] 
        for pol in all_num_dims_polarized])) 
        if _n > 0]) for nd in all_num_dims}



In [23]:
polarized_keys_for_fn = []
unpolarized_keys_for_fn = []

polarized_keys = []
unpolarized_keys = []

for normalization in all_normalizations: 
    for num_voters in all_num_voters: 
        for num_cands in all_num_cands: 
            for num_dims in all_num_dims: 
                for dispersion in all_dispersion: 
                    for correlation in all_correlation: 
                        for voter_utility_name, voter_utility in all_voter_utilities.items():
                            if num_dims == 1 and voter_utility_name == "Matthews":
                                continue
                            if dispersion == 1: 
                                voter_cov = generate_covariance(num_dims, 1, correlation)
                                cand_cov = voter_cov
                            elif dispersion == 0.5:
                                voter_cov = generate_covariance(num_dims, 1, correlation)
                                cand_cov = generate_covariance(num_dims, 0.5, correlation)
                   
                            for is_polarized in all_is_polarized:
                                if not is_polarized: 
                                    num_centrist_cands = 0
                                    prob_centrist_voters = 0.0
                                    num_dims_polarized = 0
                                    polarization_distance = 0
                                    unpolarized_keys.append((
                                        num_cands,
                                        num_voters,
                                        num_dims,
                                        correlation,
                                        dispersion,
                                        num_dims_polarized,
                                        1, # subpopulation_std
                                        "None", #polarization_distance,
                                        "None", #num_centrist_cands,
                                        "None", #prob_centrist_voters,
                                        voter_utility_name,
                                        normalization))

                                    unpolarized_keys_for_fn.append((
                                        num_cands - num_centrist_cands,
                                        num_centrist_cands, 
                                        num_voters,
                                        prob_centrist_voters, 
                                        vms,
                                        prob_vms,
                                        grade_vms,
                                        num_dims,
                                        cand_cov,
                                        voter_cov, 
                                        normalization,
                                        voter_utility,
                                        num_dims_polarized,
                                        polarization_distance,))

                                else: # is_polarized is True
                                    for num_dims_polarized in dim_to_num_dims_polarized[num_dims]:
                                        for num_centrist_cands in num_cands_to_num_centrists[num_cands]: 
                                            for prob_centrist_voters in all_prob_centrist_voters:
                                                if num_centrist_cands == num_cands and prob_centrist_voters == 1.0:
                                                    continue
                                                for polarization_distance in all_polarization_distances:
                                                    for subpop_std in all_subpopulation_stds:
                                                        if dispersion == 1: 
                                                            voter_cov = generate_covariance(num_dims, subpop_std, correlation)
                                                            cand_cov = voter_cov
                                                        elif dispersion == 0.5:
                                                            voter_cov = generate_covariance(num_dims, subpop_std, correlation)
                                                            cand_cov = generate_covariance(num_dims, 0.5 * subpop_std, correlation)
                                                        polarized_keys.append((  
                                                            num_cands,
                                                            num_voters,
                                                            num_dims,
                                                            correlation,
                                                            dispersion,
                                                            num_dims_polarized,
                                                            subpop_std,
                                                            polarization_distance,
                                                            num_centrist_cands,
                                                            prob_centrist_voters,
                                                            voter_utility_name,
                                                            normalization))

                                                        polarized_keys_for_fn.append((
                                                            num_cands - num_centrist_cands,
                                                            num_centrist_cands, 
                                                            num_voters, 
                                                            prob_centrist_voters,
                                                            vms,
                                                            prob_vms,
                                                            grade_vms,
                                                            num_dims,
                                                            cand_cov,
                                                            voter_cov, 
                                                            normalization,
                                                            voter_utility,
                                                            num_dims_polarized,
                                                            polarization_distance,))
                                                    



In [None]:

all_keys = unpolarized_keys + polarized_keys
print("Total number of keys: ", len(all_keys))
all_keys_for_fn = unpolarized_keys_for_fn + polarized_keys_for_fn

# divide all_keys into chunks of size 100
all_keys_for_fn_chunks = [all_keys_for_fn[i:i + 100] for i in range(0, len(all_keys_for_fn), 100)]

all_keys_chunks = [all_keys[i:i + 100] for i in range(0, len(all_keys), 100)]

print("Total number of chunks: ", len(all_keys_chunks))

In [25]:
def run_sim_with_params(key): 
    exp = partial(generate_samples_randomly_polarized_voters, *key)
    return run_sim_with_estimated_standard_error(
        exp, 
        max_std_error, 
        initial_trials=initial_trials, 
        step_trials=step_trials,
        min_num_trials=min_num_trials)
    # mean_s, half_width_s, variance_bootstrap_means, variance_soc_util_performance_values, num_trials


### Main Simulation Loop

In [None]:
starting_chunk_idx = 0 
data_dir  = 'data'

for idx, (chunk, chunk_for_fn) in enumerate(zip(all_keys_chunks[starting_chunk_idx::], all_keys_for_fn_chunks[starting_chunk_idx::])):
    chunk_num = idx + starting_chunk_idx
    print("starting chunk", chunk_num)
    print(chunk)
    with Pool(num_cpus) as pool:
        results = pool.map(run_sim_with_params, chunk_for_fn)
    print("RESULTS ", results)
    data_for_df = {
                "num_cands": [],
                "num_voters": [],
                "num_dims": [],
                "correlation": [],
                "rel_dispersion": [],
                "num_dims_polarized": [],
                "subpopulation_std": [],
                "polarization_distance": [],
                "num_centrist_cands": [],
                "prob_centrist_voters": [],
                "voter_utility": [],
                "normalization": [],
                "initial_trials": [],
                "step_trials": [],
                "min_num_trials": [],
                "max_std_error": [],
                "num_trials": [],
                "vm": [],
                "exp_soc_util_performance": [],
                "est_std_error": [],
                "variance_soc_util_performance_values": [],
                "dt": [],
                "pref_voting_version": [],
            }
    
    for result_idx, (mean_s, est_std_errors,  variance_soc_util_performance_values, num_trials) in enumerate(results):
        print("result_idx", result_idx)
        print(mean_s)
        for vmidx, vm in enumerate(vms + prob_vms + grade_vms):
            key = chunk[result_idx]
            print(mean_s)
            num_cands, num_voters,num_dims,correlation,rel_dispersion,num_dims_polarized, subpopulation_std, polarization_distance,num_centrist_cands, prob_centrist_voters, voter_utility_name, normalization  = key

            data_for_df["num_cands"].append(num_cands)
            data_for_df["num_voters"].append(num_voters)
            data_for_df["num_dims"].append(num_dims)
            data_for_df["rel_dispersion"].append(rel_dispersion)
            data_for_df["correlation"].append(correlation)
            data_for_df["voter_utility"].append(voter_utility_name)
            data_for_df["num_dims_polarized"].append(num_dims_polarized)
            data_for_df["subpopulation_std"].append(subpopulation_std)
            data_for_df["polarization_distance"].append(polarization_distance)
            data_for_df["num_centrist_cands"].append(num_centrist_cands)
            data_for_df["prob_centrist_voters"].append(prob_centrist_voters)
            data_for_df["normalization"].append(normalization)
            data_for_df["initial_trials"].append(initial_trials)
            data_for_df["step_trials"].append(step_trials)
            data_for_df["min_num_trials"].append(min_num_trials)
            data_for_df["max_std_error"].append(max_std_error)
            data_for_df["num_trials"].append(num_trials)
            data_for_df["vm"].append(vm.name)
            data_for_df["exp_soc_util_performance"].append(mean_s[vmidx])
            data_for_df["est_std_error"].append(est_std_errors[vmidx])
            data_for_df["variance_soc_util_performance_values"].append(variance_soc_util_performance_values[vmidx])
            data_for_df["dt"].append(datetime.datetime.now())
            data_for_df["pref_voting_version"].append(pref_voting_version)
            print(data_for_df)
    df = pd.DataFrame(data_for_df)
    df.to_csv(f"{data_dir}/exp_soc_util_performance_simulation_{chunk_num}.csv", index=False)
