In [8]:
import parser 
import pandas as pd
import itertools
import numpy as np
import os 
import matplotlib.pyplot as plt
import seaborn as sns
from voting_rules import voting_rules as vr
import rcv_dimensionality
import statistics
import rcv_distribution

In [2]:
def is_consistent(ballot):
    if len(ballot) == 0 or len(ballot) == 1:
        return True
    x = 0 
    v = 0.25
    i = 1
    while (i < len(ballot)):
        if ballot[i] < ballot[i - 1]:
            x -= (v * min(abs(ballot[i] - ballot[0]), abs(ballot[i] - ballot[i - 1])))
        else:
            x += (v * min(abs(ballot[i] - ballot[0]), abs(ballot[i] - ballot[i - 1])))
        v *= 0.5
        i += 1
    l = []
    if abs(x) >= 0.5 or x == 0:
        return False 
    x += ballot[0]
    for c in ballot:
        l.append(abs(c - x))
    if(all(l[i] <= l[i + 1] for i in range(len(l) -  1))):
        return True 
    return False

In [3]:
def dim(ballots, candidates):
    perms = list(itertools.permutations(candidates))
    #most consistant permutation
    mcp = None
    c_mcp = 0
    x = len(candidates)
    l = [i for i in range(1, x + 1)]
    
    for perm in perms:
        temp = {}
        i = 1
        for candidate in perm:
            temp[candidate] = i
            i += 1
        c = 0
        total = 0
        for b in ballots:
            if len(b) > 0:
                total += ballots[b]
                b_num = []
                for candidate in b:
                    b_num.append(temp[candidate])
                if (is_consistent(b_num)):
                    c += ballots[b]
            if mcp is None or c > c_mcp:
                c_mcp = c
                mcp = perm
    return (c_mcp/total), mcp

In [4]:
def get_gamma(mds, ballots, candidates):
    mcp = []
    mcp_num = []
    for k in mds:
        mcp.append(k)
        mcp_num.append(mds[k])

    perms = itertools.permutations(candidates)
    c_mcp = 0
    x = len(candidates)
    
    temp = {}
    for i in range(len(mcp)):
        temp[mcp[i]] = mcp_num[i]
    c = 0
    total = 0
    for b in ballots:
        if len(b) > 0:
            total += ballots[b] 
            b_num = []
            for candidate in b:
                if candidate in mcp:
                    b_num.append(temp[candidate])
            if (is_consistent(b_num)):
                c += ballots[b]
    
    return (c/total), mcp

In [None]:
def freq(ballots, candidates):
    result_freq = {}
    result_first = {}

    frequency = {}
    first = {}
    empty = 0
    for c in candidates:
        frequency[c] = 0
        first[c] = 0
    
    for b in ballots:
        if len(b) > 0:
            first[b[0]] += ballots[b]
        else:
            empty += ballots[b]
        for c in b:
            frequency[c] += ballots[b]
    
    total = sum(ballots.values())
    total -= empty 
    for c in sorted(frequency):
        result_freq[c] = (frequency[c]/total) * 100
    for c in sorted(first):
        result_first[c] = (first[c]/total) * 100
    
   
    return result_freq, result_first




In [None]:
directory = "dataverse_files"
elections = pd.read_csv("matched.csv")
election_analysis = pd.read_csv("election_analysis.csv")

for filename in os.listdir(directory):
    
    ballots, candidates = rcv_distribution.parse_election_data(os.path.join(directory, filename))
    row_num = election_analysis[election_analysis["filename"] == filename].index[0]

    #mds
    try:
        mds = rcv_dimensionality.perform_rcv_and_normalize(os.path.join(directory, filename))
        gamma_mds, mcp_mds = get_gamma(mds, ballots, candidates)
        election_analysis.loc[row_num, "gamma mds"] = gamma_mds
    except:
        print(filename)

    #brute force 
    gamma_bf, mcp_bf = dim(ballots, candidates)
    election_analysis.loc[row_num, "gamma bf"] = gamma_bf

    election_analysis.loc[row_num, "candidates"] = len(candidates)

    data = pd.read_csv(os.path.join(directory, filename), low_memory=False)
    choices = int(data.columns[-1][-1])
    election_analysis.loc[row_num, "choices"] = choices
    
    freq, first = freq(ballots, candidates)
    most_frequennt = []
    for c in sorted(freq):
        most_frequennt.append(freq[c])

    for rank in range(1, min(6, len(candidates) + 1)):
        column = '#' + str(rank) + " candidate"
        election_analysis.loc[row_num, column] = most_frequennt[rank - 1]

    
   



In [10]:
file = "dataverse_files/Maine_11062018_CongressionalDistrict2.csv"


mds = rcv_dimensionality.perform_rcv_and_normalize(file)
print("mds: ", mds)

ballots, candidates = rcv_distribution.parse_election_data(file)
print(candidates)
print(ballots)

  distance = 1 / np.sqrt(freq_upper_triangle)


mds:  {'DEM Golden, Jared F.': 0.0, 'Bond, Tiffany L.': 0.9459271114896626, 'Hoar, William R.S.': 1.733120651865034, 'REP Poliquin, Bruce': 3.0}
['REP Poliquin, Bruce', 'DEM Golden, Jared F.', 'Bond, Tiffany L.', 'Hoar, William R.S.']
{('REP Poliquin, Bruce',): 89669, ('DEM Golden, Jared F.', 'Bond, Tiffany L.'): 10505, ('REP Poliquin, Bruce', 'DEM Golden, Jared F.', 'Bond, Tiffany L.', 'Hoar, William R.S.'): 3467, ('Hoar, William R.S.', 'REP Poliquin, Bruce', 'Bond, Tiffany L.', 'DEM Golden, Jared F.'): 306, (): 5928, ('DEM Golden, Jared F.', 'Bond, Tiffany L.', 'Hoar, William R.S.', 'REP Poliquin, Bruce'): 23140, ('REP Poliquin, Bruce', 'Hoar, William R.S.', 'Bond, Tiffany L.', 'DEM Golden, Jared F.'): 11600, ('Hoar, William R.S.', 'Bond, Tiffany L.', 'REP Poliquin, Bruce', 'DEM Golden, Jared F.'): 579, ('DEM Golden, Jared F.',): 51673, ('Hoar, William R.S.', 'REP Poliquin, Bruce', 'DEM Golden, Jared F.', 'Bond, Tiffany L.'): 200, ('DEM Golden, Jared F.', 'Hoar, William R.S.', 'REP P

In [13]:
file = "dataverse_files/Maine_11062018_CongressionalDistrict2.csv"
data = pd.read_csv(file, low_memory=False)

print(int(data.columns[-1][-1]))

5


In [15]:
print("mds: ")
mds = rcv_dimensionality.perform_rcv_and_normalize(file)
gamma, mcp = get_gamma(mds, ballots, candidates)
print("mcp: ", mcp)
print("gamma: ", gamma)

mds: 


  distance = 1 / np.sqrt(freq_upper_triangle)


mcp:  ['Lynette Lungay Dumalag', 'John Basill']
gamma:  1.0


In [5]:
ignore_values = ['^(WRITE-IN)', '^writein', '^Write-In', '^Write-in', '^skipped', '^overvote', '^Undeclared', '^undervote']
name = "me(WRITE_IN)"

print(name in ignore_values)

False
