In [6]:
import os

#! Modified "Minneapolis 2013-board of estimation and taxation cvr.csv" to "Minneapolis_00002013_board of estimation and taxation cvr.csv"

# list of election records
elections = []

# default dictionary for each election record
election = {
    'location': '',
    'date': '',
    'office': '',
    'type': '',
    'cvr_path': '',
}

# here we iterate over the CSV filenames in the tabulator_input directory
subdirs = [x[0] for x in os.walk('tabulator_input')][1:]
for subdir in subdirs:
    files = os.walk(subdir).__next__()[2]

    # get election type from subdir name
    type = subdir.split('/')[1]
    #print(type)

    if (len(files) > 0):
        for file in files:
            
            # create new election record
            election = {}
            election['type'] = type

            # get election location from filename
            location = file.split('_')[0]
            election['location'] = location
            #print(location)

            # get election date from filename
            date = file.split('_')[1]
            date = date[0:2] + '/' + date[2:4] + '/' + date[4:]
            election['date'] = date
            #print(date)

            # get election office from filename
            office = file.split('_')[2]
            election['office'] = office
            #print(office)

            # get election cvr_path from filename
            cvr_path = subdir + '/' + file
            election['cvr_path'] = cvr_path
            #print(cvr_path)

            # add election record to list
            elections.append(election)
            #print(election)

In [None]:
""" The contest set csv file should contain the following columns:
state: name of jurisdiction state
jurisdiction: name of jurisdiction, required
year: year
date: date in mm/dd/yyyy format, required
office: office being elected, required
notes: arbitrary notes
exhaust_on_overvote_marks: TRUE or FALSE, default is FALSE
exhaust_on_N_repeated_skipped_marks: Number of repeated skipped marks after which the ballot is exhausted, default is 0 (no amount of repeated skipped marks exhaust a ballot)
exhaust_on_duplicate_candidate_marks: TRUE or FALSE, default is FALSE
exclude_writein_marks: TRUE or FALSE, default is FALSE. Write-in ballot markings are ignored.
combine_writein_marks: TRUE or FALSE, default is FALSE. Any candidates named ‘UWI’ or that contain the string ‘write’ in their name are combined into single write-in candidate.
treat_combined_writeins_as_exhaustable_duplicates: TRUE or FALSE, default is FALSE. If write-ins are combined, decide whether or not the newly combined writeins count as duplicate rankings for the purpose of ballot exhaustion.
multi_winner_rounds: TRUE or FALSE, default is TRUE
n_winners: an integer, defaults to 1. Only applies to RCV variants requiring a set number of winners (multi winner STV and Sequential IRV).
rcv_type: name of RCV variant class
bottoms_up_threshold: number between 0 and 1. Only applies to bottoms up RCV variant.
split_fields: comma-separated list of column names on which to calculate split statistics
parser_func: name of parser function to use for CVR file
cvr_path: path to CVR file or CVR directory, relative to value provided in cvr_path_root field in run config.
extra_parser_args: semicolon-separated list of key-value pairs corresponding to additional arguments required by parser function. Each key-value pair should be separated by ‘=’ sign.
ignore_contest: TRUE of FALSE, default is FALSE. If TRUE, skip election when running the batch.
"""

In [8]:
# here we start building the contest set csv file starting with the header row

header = 'state,jurisdiction,year,date,office,notes,exhaust_on_overvote_marks,exhaust_on_N_repeated_skipped_marks,exhaust_on_duplicate_candidate_marks,exclude_writein_marks,combine_writein_marks,treat_combined_writeins_as_exhaustable_duplicates,multi_winner_rounds,n_winners,rcv_type,bottoms_up_threshold,split_fields,parser_func,cvr_path,extra_parser_args,ignore_contest\n'

# then we iterate over the election records and add them to the contest set csv file

with open('contest_set.csv', 'w') as f:
    f.write(header)
    for election in elections:

        # grabbing the data from the election record
        state = ''
        jurisdiction = f"{election['location']} PLACEHOLDER"
        year = election['date'][-4:]
        date = election['date']
        office = election['office']
        notes = '' # could classify low ticket high ticket here
        exhaust_on_overvote_marks = 'FALSE' # this is default, we have to look into local politics to see if this is true
        exhaust_on_N_repeated_skipped_marks = '0' # this is default, we have to look into local politics to see if this is true
        exhaust_on_duplicate_candidate_marks = 'FALSE' # this is default, we have to look into local politics to see if this is true
        exclude_writein_marks = 'FALSE' # this is default, we have to look into local politics to see if this is true
        combine_writein_marks = 'FALSE' # this is default, we have to look into local politics to see if this is true
        treat_combined_writeins_as_exhaustable_duplicates = 'FALSE' # this is default, we have to look into local politics to see if this is true
        multi_winner_rounds = 'TRUE' # this is default, we have to look into local politics to see if this is true
        n_winners = '1' # this is default, we have to look into local politics to see if this is true
        rcv_type = 'IRV' # defaulting, we have to look into local politics to see if this is true
        bottoms_up_threshold = '0' # this is default, we have to look into local politics to see if this is true
        split_fields = ''
        parser_func = '' # going to have to set this manually?
        cvr_path = election['cvr_path']
        extra_parser_args = ''
        ignore_contest = 'FALSE' # this is default, we have to look into local politics to see if this is true

        # building the row
        row = f"{state},{jurisdiction},{year},{date},{office},{notes},{exhaust_on_overvote_marks},{exhaust_on_N_repeated_skipped_marks},{exhaust_on_duplicate_candidate_marks},{exclude_writein_marks},{combine_writein_marks},{treat_combined_writeins_as_exhaustable_duplicates},{multi_winner_rounds},{n_winners},{rcv_type},{bottoms_up_threshold},{split_fields},{parser_func},{cvr_path},{extra_parser_args},{ignore_contest}\n"

        # writing the row to the contest set csv file
        f.write(row)