In [2]:
import pandas as pd
import numpy as np
from dist_analysis.distance import DistanceSim, kendall_tau
from dist_analysis.rcv_elections import rcvElections

**Cambridge Data Cleaning**

In [3]:
cambridge = pd.read_csv("data/Cambridge city council 2009.csv")
cambridge = cambridge[['rank1', 'rank2', 'rank3']]
parts = []
for rank in cambridge.columns:
    parts.append(cambridge[rank].value_counts().reset_index())

agg = parts[0].merge(parts[1], on='index')
agg = agg.merge(parts[2], on='index')

clean_agg = agg[~agg['index'].isin(['overvote', 'skipped', 'Write-In 2', 'Write-In 3'])]
clean_agg

Unnamed: 0,index,rank1,rank2,rank3
0,"Davis, Henrietta",1847,1745,1410
1,"Simmons, E. Denise",1776,1719,1499
2,"Toomey, Jr., Timothy J.",1737,1093,968
3,"Decker, Marjorie C.",1284,599,370
4,"Maher, David P.",1283,1111,921
5,"Kelley, Craig A.",1245,887,797
6,"Reeves, Kenneth E.",1157,1214,983
7,"Seidel, Sam",899,1152,1145
8,"Sullivan, Edward J.",877,955,659
9,"Cheung, Leland",755,662,651


**IRV Ranking for Cambridge**

In [4]:
cam_ballots = cambridge.values.tolist()
cam_cand_list = cambridge['rank3'].unique()
num_seats = 1

camb_irv = rcvElections(cam_ballots, list(cam_cand_list), num_seats).rcv_run()
to_remove = ['overvote', 'skipped', 'Write-In 2', 'Write-In 3', 'Write-In 4','Write-In 9']
for cand in camb_irv:
    if cand  in to_remove:
        camb_irv.remove(cand)

cam_irv = camb_irv[0:-2]
cam_id = dict([(c, int(v)) for c, v in zip(clean_agg['index'], range(1, len(clean_agg)+1))])
clean_agg['IRV order'] = list(map(lambda k: cam_id[k], cam_irv))
clean_agg = clean_agg.rename(columns={'index' : 'candidate', 'rank1' : 'first', 'rank2' : 'second', 'rank3' : 'third'})
clean_agg

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_agg['IRV order'] = list(map(lambda k: cam_id[k], cam_irv))


Unnamed: 0,candidate,first,second,third,IRV order
0,"Davis, Henrietta",1847,1745,1410,1
1,"Simmons, E. Denise",1776,1719,1499,2
2,"Toomey, Jr., Timothy J.",1737,1093,968,3
3,"Decker, Marjorie C.",1284,599,370,5
4,"Maher, David P.",1283,1111,921,6
5,"Kelley, Craig A.",1245,887,797,7
6,"Reeves, Kenneth E.",1157,1214,983,4
7,"Seidel, Sam",899,1152,1145,8
8,"Sullivan, Edward J.",877,955,659,10
9,"Cheung, Leland",755,662,651,9


In [5]:
clean_agg[clean_agg['IRV order'].isin(list(range(10)))]

Unnamed: 0,candidate,first,second,third,IRV order
0,"Davis, Henrietta",1847,1745,1410,1
1,"Simmons, E. Denise",1776,1719,1499,2
2,"Toomey, Jr., Timothy J.",1737,1093,968,3
3,"Decker, Marjorie C.",1284,599,370,5
4,"Maher, David P.",1283,1111,921,6
5,"Kelley, Craig A.",1245,887,797,7
6,"Reeves, Kenneth E.",1157,1214,983,4
7,"Seidel, Sam",899,1152,1145,8
9,"Cheung, Leland",755,662,651,9


In [7]:
cam_model = DistanceSim(clean_agg, kendall_tau)

m = 5
cam_model.calc_distances(1000)
cam_model.gen_3d_plot()

In [None]:
min_sets = cam_model.find_min_sets()
min_sets

Unnamed: 0,a,b,c,dist
501499,1000.0,1.0,0.0,7.0
501500,1000.0,0.0,0.0,7.0


MN Data Cleaning

In [None]:
mn_ballots = pd.read_csv('data/MN2013_ballots.csv')
#Subset correct columns of crv
mn_ballots = mn_ballots[mn_ballots.columns[2:5]]
#Convert columns to list of lists
mn_ballot_lst = mn_ballots.values.tolist()
#Generate list of unique candidates
mn_cands = list(mn_ballots[mn_ballots.columns[0]].unique())

In [None]:
from dist_analysis.rcv_elections import rcvElections

mn_rcv = rcvElections(mn_ballot_lst, list(mn_cands), 1)
mn_irv = mn_rcv.rcv_run()
mn_irv_clean = []
for cand in mn_irv:
    if cand not in ['overvote', 'undervote', 'UWI']:
        mn_irv_clean.append(cand)
mn_irv_clean

['BETSY HODGES',
 'MARK ANDREW',
 'DON SAMUELS',
 'CAM WINTON',
 'JACKIE CHERRYHOMES',
 'BOB FINE',
 'DAN COHEN',
 'STEPHANIE WOODRUFF',
 'MARK V ANDERSON',
 'DOUG MANN',
 'OLE SAVIOR',
 'JAMES EVERETT',
 'ALICIA K. BENNETT',
 'ABDUL M RAHAMAN "THE ROCK"',
 'CAPTAIN JACK SPARROW',
 'CHRISTOPHER CLARK',
 'TONY LANE',
 'JAYMIE KELLY',
 'MIKE GOULD',
 'KURTIS W. HANNA',
 'CHRISTOPHER ROBIN ZIMMERMAN',
 'JEFFREY ALAN WAGNER',
 'NEAL BAXTER',
 'TROY BENJEGERDES',
 'GREGG A. IVERSON',
 'MERRILL ANDERSON',
 'JOSHUA REA',
 'BILL KAHN',
 'JOHN LESLIE HARTWIG',
 'EDMUND BERNARD BRUYERE',
 'JAMES "JIMMY" L. STROUD, JR.',
 'RAHN V. WORKCUFF',
 'BOB "AGAIN" CARNEY JR',
 'CYD GORMAN',
 'JOHN CHARLES WILSON']