In [2]:
import parser 
import pandas as pd
import itertools
import numpy as np
import os 
import matplotlib.pyplot as plt
import seaborn as sns
from voting_rules import voting_rules as vr
import rcv_dimensionality
import statistics
import rcv_distribution

In [3]:
def is_consistent(ballot):
    if len(ballot) == 0 or len(ballot) == 1:
        return True
    x = 0 
    v = 0.25
    i = 1
    while (i < len(ballot)):
        if ballot[i] < ballot[i - 1]:
            x -= (v * min(abs(ballot[i] - ballot[0]), abs(ballot[i] - ballot[i - 1])))
        else:
            x += (v * min(abs(ballot[i] - ballot[0]), abs(ballot[i] - ballot[i - 1])))
        v *= 0.5
        i += 1
    l = []
    if abs(x) >= 0.5 or x == 0:
        return False 
    x += ballot[0]
    for c in ballot:
        l.append(abs(c - x))
    if(all(l[i] <= l[i + 1] for i in range(len(l) -  1))):
        return True 
    return False

In [4]:
def dim(ballots, candidates):
    perms = list(itertools.permutations(candidates))
    #most consistant permutation
    mcp = None
    c_mcp = 0
    x = len(candidates)
    l = [i for i in range(1, x + 1)]
    
    for perm in perms:
        temp = {}
        i = 1
        for candidate in perm:
            temp[candidate] = i
            i += 1
        c = 0
        total = 0
        for b in ballots:
            if len(b) > 0:
                total += ballots[b]
                b_num = []
                for candidate in b:
                    b_num.append(temp[candidate])
                if (is_consistent(b_num)):
                    c += ballots[b]
            if mcp is None or c > c_mcp:
                c_mcp = c
                mcp = perm
    return (c_mcp/total), mcp

In [5]:
def get_gamma(mds, ballots, candidates):
    mcp = []
    mcp_num = []
    for k in mds:
        mcp.append(k)
        mcp_num.append(mds[k])
    
    temp = {}
    for i in range(len(mcp)):
        temp[mcp[i]] = mcp_num[i]
    c = 0
    total = 0
    for b in ballots:
        if len(b) > 0:
            total += ballots[b] 
            b_num = []
            for candidate in b:
                if candidate in mcp:
                    b_num.append(temp[candidate])
            if (is_consistent(b_num)):
                c += ballots[b]
    
    return (c/total), mcp

In [6]:
def freq(ballots, candidates):
    result_freq = {}
    result_first = {}

    frequency = {}
    first = {}
    empty = 0
    for c in candidates:
        frequency[c] = 0
        first[c] = 0
    
    for b in ballots:
        if len(b) > 0:
            first[b[0]] += ballots[b]
        else:
            empty += ballots[b]
        for c in b:
            frequency[c] += ballots[b]
    
    total = sum(ballots.values())
    total -= empty 
    for c in sorted(frequency):
        result_freq[c] = (frequency[c]/total) * 100
    for c in sorted(first):
        result_first[c] = (first[c]/total) * 100
    
   
    return result_freq, result_first




In [43]:
directory = "Proportional"
election_analysis = pd.read_csv("proportional.csv")
print(election_analysis)
print(election_analysis["type"])

for filename in os.listdir(directory):
    print(filename)
    ballots, candidates = rcv_distribution.parse_election_data(os.path.join(directory, filename))
    row_num = election_analysis[election_analysis["filename"] == filename].index[0]

    #mds
    try:
        mds = rcv_dimensionality.perform_rcv_and_normalize(os.path.join(directory, filename))
        gamma_mds, mcp_mds = get_gamma(mds, ballots, candidates)
        election_analysis.loc[row_num, "gamma mds"] = gamma_mds
    except:
        print(filename)

    #brute force 
    if len(candidates) <= 7:
        gamma_bf, mcp_bf = dim(ballots, candidates)
        election_analysis.loc[row_num, "gamma bf"] = gamma_bf

    election_analysis.loc[row_num, "candidates"] = len(candidates)

    data = pd.read_csv(os.path.join(directory, filename), low_memory=False)
    choices = int(data.columns[-1][-1])
    election_analysis.loc[row_num, "choices"] = choices
    
    frequency, first = freq(ballots, candidates)
    most_frequent = []
    for c in sorted(frequency, key=frequency.get, reverse=True):
        most_frequent.append(frequency[c])

    for rank in range(1, min(6, len(candidates) + 1)):
        column = '#' + str(rank) + " candidate"
        election_analysis.loc[row_num, column] = most_frequent[rank - 1]

    
   



KeyError: 'filename\tpartisan\tlevel\ttype\tcandidates\tchoices\tgamma bf\tgamma mds\t#1 candidate\t#2 candidate\t#3 candidate\t#4 candidate\t#5 candidate\tbimodality'

In [16]:
election_analysis.to_csv("election_analysis.csv")