In [3]:
import numpy as np

def generate_rcv_simulated(Ncand, numranks, Nsimvoters):
    # Set up
    a = np.arange(1, Ncand + 1)
    w = 1 / np.arange(1, Ncand + 1)  # Exclude 0 to avoid division by zero

    N = Nsimvoters * numranks

    # Make some randomly chosen groups of candidates (NYC style)
    R = np.random.choice(a, size=N, p=w/np.sum(w))
    selections = R.reshape(Nsimvoters, numranks)

    # Assume candidates are at 1/(N+1) spacing
    candidateposition = a / (Ncand + 1)

    # Pick a random location on [0, 1] axis for voter
    voterposition = np.random.rand(Nsimvoters, 1)

    # Calculate distance to each candidate
    ballots = np.zeros_like(selections)
    for i in range(Nsimvoters):
        dist_to_candidate = np.abs(voterposition[i] - candidateposition[selections[i, :] - 1])  # Adjust for 0-based indexing
        preferences = np.argsort(dist_to_candidate)
        ballots[i, :] = selections[i, preferences]

    # Rank candidates by distance, make that the ballot
    return ballots

In [None]:
# Test the function with some parameters
Ncand = 4  # Number of candidates
numranks = 5  # Number of ranks
Nsimvoters = 200000  # Number of voters
ballots = generate_rcv_simulated(Ncand, numranks, Nsimvoters)
print(ballots)

In [22]:
from sklearn.manifold import MDS
import pymds
import matplotlib.pyplot as plt

def rcv_dimensionality(ballots, choices, names, Ncand):
    numballots, numranks = ballots.shape

    # ranked candidates 1...C; other codes count down from 99
    # get rid of zeros to simplify later calculations
    ballots[ballots == 0] = 97

    # count up frequencies of consecutive-pair ballot choices
    counts = np.zeros((Ncand, Ncand))
    for i in range(numballots):
        for j in range(numranks - 1):
            if ballots[i, j] <= Ncand and ballots[i, j+1] <= Ncand:
                counts[ballots[i, j]-1, ballots[i, j+1]-1] += 1

    # how many ballots contain a particular pair of preferences anywhere in list?
    mentioned_together = np.zeros((Ncand, Ncand))
    for i in range(numballots):
        for j in range(numranks):
            for k in range(numranks):
                if ballots[i, j] <= Ncand and ballots[i, k] <= Ncand:
                    mentioned_together[ballots[i, j]-1, ballots[i, k]-1] += 1

    # normalize to frequencies relative to votes cast for the two candidates
    freq = counts / mentioned_together

    # combine freq in either direction to create symmetric matrix
    # self-self pairs are zeroed out
    freq_upptri = np.zeros((Ncand, Ncand))
    for i in range(Ncand):
        for j in range(i+1, Ncand):
            freq_upptri[i, j] = (freq[i, j] + freq[j, i]) / 2
            freq_upptri[j, i] = freq_upptri[i, j]

    foo = np.min(freq_upptri[freq_upptri > 0])
    d = 1 / np.sqrt(freq_upptri)
    d[np.isnan(d) | np.isinf(d)] = 2 / foo
    for i in range(Ncand):
        d[i, i] = 0

    # MDS for 1 dimension
    #mds = MDS(n_components=1, dissimilarity='precomputed', metric=False)
    # YY = mds.fit_transform(d)
    YY = pymds.mds(d, n_components=1)
    

    plt.figure()
    plt.scatter(YY * 0, YY)
    for i in range(Ncand):
        plt.text(0.2, YY[i], names[i])
    plt.xlim([-1, 1.5])
    plt.ylim([np.min(YY), np.max(YY) * 1.2])

    # MDS for 2 dimensions
    #mds = MDS(n_components=2, dissimilarity='precomputed', metric=False)
    #Y = mds.fit_transform(d)
    Y = pymds.mds(d, n_components=2)
    

    plt.figure()
    plt.scatter(Y[:, 0], Y[:, 1])
    for i in range(Ncand):
        plt.text(Y[i, 0] + 0.02, Y[i, 1] + 0.02, names[i])
    plt.xlim([np.min(Y[:, 0]) * 1.2, np.max(Y[:, 0]) * 1.2])
    plt.ylim([np.min(Y[:, 1]) * 1.2, np.max(Y[:, 1]) * 1.2])
    plt.grid(True)

    # Return the frequency, counts and distance matrix for verification
    return freq, counts, d

In [None]:
# Test the function with some parameters
choices = [1, 2, 3, 4, 97, 98, 99]
names = ["Poliquin", "Golden", "Bond", "Hoar", "skipped", "overvote", "writein"]
Ncand = 4  # Number of candidates
freq, counts, d = rcv_dimensionality(ballots, choices, names, Ncand)
print("Frequencies:")
print(freq)
print("\nCounts:")
print(counts)
print("\nDistance matrix:")
print(d)

In [18]:
import os
from scipy.io import loadmat

# Load the .mat file
mat_file_path = os.path.join('NYCspace.mat')
mat_data = loadmat(mat_file_path)

# Prepare the inputs for the rcv_dimensionality function
Ncand = mat_data['Ncand'][0, 0]
ballots = mat_data['ballots']
choices = mat_data['choices']
names = ["Adams","Wiley","Donovan","Stringer","Garcia","Morales","Prince","Yang","McGuire","Taylor","Chang","Foldenauer","Wright", "skipped", "overvote", "writein"]

In [23]:
# Call the rcv_dimensionality function
freq, counts, d = rcv_dimensionality(ballots, choices, names, Ncand)

print("Frequencies:")
print(freq)
print("\nCounts:")
print(counts)
print("\nDistance matrix:")
print(d)

  d = 1 / np.sqrt(freq_upptri)


TypeError: 'module' object is not callable