## Chicago RRPS Determination

This notebook executes the workflow to derive the racially-reduced preference schedule of Chicago with the First-choice Distribution Model (FC) using frequency proportion.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import sys
from functools import partial
import pickle

import gerrychain as gc
from gerrychain.updaters import Tally, cut_edges
import gerrychain.tree_proposals as tp
from gerrychain import accept
import itertools

#### Data Processing

The necessary data are:
- Chicago Voting History
- RRPS of Cambridge, Oakland, and Minneapolis

Those data are available in CSVs, and are converted to pandas dataframes below.

In [2]:
def strclean(df):
    """
    Convert all strings in a DataFrame to lowercase, with underscores instead of spaces
    
    df (pandas DataFrame): the DataFrame to be converted
    return: the modified pandas DataFrame
    """
    return df.applymap(lambda x: x.lower().replace(' ', '_') if type(x) == str else x)

In [3]:
# Import Cambridge City Council Data
cambridge_cc2017 = strclean(pd.read_csv('rrps/cambridge_citycouncil_2017rrps.csv', skiprows=1))
cambridge_cc2015 = strclean(pd.read_csv('rrps/cambridge_citycouncil_2015rrps.csv'))
cambridge_cc2013 = strclean(pd.read_csv('rrps/cambridge_citycouncil_2013rrps.csv'))

# Drop unnecessary columns and rename others
# City Council 2017
cambridge_cc2017.drop(columns=['Unnamed: 28', 'Unnamed: 29', 'Unnamed: 30', 'Unnamed: 31'], inplace=True)
ccc2017_cols = ['id']
ccc2017_cols.extend(['choice' + str(x) for x in range(1, 28)])
cambridge_cc2017.columns = ccc2017_cols

# City Council 2015
cambridge_cc2015.drop(columns=['Unnamed: 25', 'Unnamed: 26', 'Unnamed: 27', 'Unnamed: 28'], inplace=True)
ccc2015_cols = ['id', 'precinct']
ccc2015_cols.extend(['choice' + str(x) for x in range(1, 24)])
cambridge_cc2015.columns = ccc2015_cols

# City Council 2013
ccc2013_cols = ['id']
ccc2013_cols.extend(['choice' + str(x) for x in range(1, 26)])
cambridge_cc2013.columns = ccc2013_cols

In [4]:
# Import Minneapolis Mayor Data
minneapolis_mayor2017 = strclean(pd.read_csv('rrps/minneapolis_mayor_2017rrps.csv', skiprows=0))
minneapolis_mayor2013 = strclean(pd.read_csv('rrps/minneapolis_mayor_2013rrps.csv', skiprows=0))
minneapolis_mayor2013.head()

# Drop unnecessary columns and rename others
# Mayoral 2017
minneapolis_mayor2017 = minneapolis_mayor2017.filter(['Precinct', 
                                                      '1st Choice_Race', 
                                                      '2nd Choice_Race', 
                                                      '3rd Choice_Race'])
minneapolis_mayor2017.columns = ['precinct', 'choice1', 'choice2', 'choice3']

# Mayoral 2013
minneapolis_mayor2013 = minneapolis_mayor2013.filter(['Precinct', 
                                                      '1ST CHOICE MAYOR MINNEAPOLIS_Race', 
                                                      '2ND CHOICE MAYOR MINNEAPOLIS_Race', 
                                                      '3RD CHOICE MAYOR MINNEAPOLIS_Race'])
minneapolis_mayor2013.columns = ['precinct', 'choice1', 'choice2', 'choice3']

In [5]:
# Import Minneapolis Parks and Recreation Commissioner At Large Data
minneapolis_pal2017 = strclean(pd.read_csv('rrps/minneapolis_parkatlarge_2017rrps.csv', skiprows=0))
minneapolis_pal2013 = strclean(pd.read_csv('rrps/minneapolis_parkatlarge_2013rrps.csv', skiprows=0))

# Drop unnecessary columns and rename others
# Parks and Recreation Commissioner At Large 2017
minneapolis_pal2017 = minneapolis_pal2017.filter(['Precinct', 
                                                  '1st Choice_Race', 
                                                  '2nd Choice_Race', 
                                                  '3rd Choice_Race'])
minneapolis_pal2017.columns = ['precinct', 'choice1', 'choice2', 'choice3']

# Parks and Recreation Commissioner At Large 2013
minneapolis_pal2013 = minneapolis_pal2013.filter(['Precinct',
                                                  '1ST CHOICE PARK AND RECREATION COMMISSIONER AT LARGE MINNEAPOLIS_Race',
                                                  '2ND CHOICE PARK AND RECREATION COMMISSIONER AT LARGE MINNEAPOLIS_Race',
                                                  '3RD CHOICE PARK AND RECREATION COMMISSIONER AT LARGE MINNEAPOLIS_Race'])

minneapolis_pal2013.columns = ['precinct', 'choice1', 'choice2', 'choice3']

In [6]:
# Import Oakland Mayor Data
oakland_mayor2014 = strclean(pd.read_csv('rrps/oakland_mayor_2014rrps.csv', skiprows=0))

# Drop unnecessary columns and rename others
# Mayoral 2014
oakland_mayor2014 = oakland_mayor2014.filter(['Pref_Voter_Id', 
                                              'Choice_1_Race_Id', 
                                              'Choice_2_Race_Id', 
                                              'Choice_3_Race_Id'])
oakland_mayor2014.columns = ['pref_voter_id', 'choice1', 'choice2', 'choice3']

#### RRPS

The following function takes a pre-processed voting file for Oakland, Cambridge, or Minneapolis and returns a dictionary that maps from the races of the top three candidates on the ballot to the frequency that permuatation received in the election.

In [7]:
def rrps(df, all_choices=False):
    """
    Get the RRPS of a city.
    
    df (pandas DataFrame): the vote breakdown of a city
    all_choices (bool): whether all-choice permutations should be returned
                        if False, 3-choice permutations will be returned
    return: a dictionary with the RRPS
    """
    num_choices = int(df.columns[-1][-1]) if all_choices else 3
    results = {}
    choices = ['choice' + str(x) for x in range(1, num_choices + 1)]
    for name, group in df.groupby(choices):
        abbr = ''
        for race in name:
            abbr += race[0].upper()
        results[abbr] = group[df.columns[0]].count()
    # Handle the case of 000 in Oakland
    if '000' in results:
        del results['000']
    return results

#### Chicago Vote History

The data for Chicago's voting history are available in CSVs and are converted to pandas dataframes below.

In [8]:
# Import Chicago Mayoral 2015 Data
chicago_mayor_2015 = pd.read_csv('chicago_vote_history/Chicago_citywide2015.csv', skiprows=0)

In [9]:
# Import Chicago Mayoral 2019 Data
chicago_mayor_2019 = pd.read_csv('chicago_vote_history/Mayoral19_precincts.csv', skiprows=0)

In [10]:
# Reduce the 2015 Mayoral data to the candidates' racial IDs
chicago_mayor_2015['white'] = chicago_mayor_2015['Emanuel_M15'] + chicago_mayor_2015['Fioretti_M15']
chicago_mayor_2015['black'] = chicago_mayor_2015['Wilson_M15'] + chicago_mayor_2015['Walls_M15']
chicago_mayor_2015['hispanic'] = chicago_mayor_2015['Garcia_M15']

chicago_mayor_2015 = chicago_mayor_2015.filter(['ID', 'white', 'black', 'hispanic'])

In [11]:
# Reduce the 2019 Mayoral data to the candidates' racial IDs
chicago_mayor_2019['white'] = chicago_mayor_2019['Joyce'] + chicago_mayor_2019['Daley'] + chicago_mayor_2019['Fioretti'] + chicago_mayor_2019['Kozlar'] + chicago_mayor_2019['McCarth'] + chicago_mayor_2019['Vallas']
chicago_mayor_2019['black'] = chicago_mayor_2019['Enyia'] + chicago_mayor_2019['Ford'] + chicago_mayor_2019['Lightft'] + chicago_mayor_2019['Preckw'] + chicago_mayor_2019['Wilson'] + chicago_mayor_2019['Sal_Grif']
chicago_mayor_2019['hispanic'] = chicago_mayor_2019['Chico'] + chicago_mayor_2019['Mendoza']

chicago_mayor_2019 = chicago_mayor_2019.filter(['JOINID', 'white', 'black', 'hispanic'])
chicago_mayor_2019.columns = ['ID', 'white', 'black', 'hispanic']

Unnamed: 0,ID,white,black,hispanic
0,C01001,55,209,51
1,C01002,51,149,42
2,C01003,74,185,25
3,C01004,95,159,39
4,C01005,70,175,28


In [12]:
# Get the first-choice distribution from the RRPS
def first_choice(rrps_data):
    first_choice_freq = {'A': 0, 'B': 0, 'H': 0, 'M': 0, 'U': 0, 'W': 0}
    first_choice_dist = {}
    for perm, freq in rrps_data.items():
        first_choice_freq[perm[0]] += freq
    for perm, freq in rrps_data.items():
        first_choice_dist[perm] = freq / first_choice_freq[perm[0]]
    return first_choice_dist

In [13]:
# Run the FCFP distribution model
# Initialize the data structures
def fcfp(chicago_data, other_city):
    """
    Return the first-choice distribution (frequency proportion method)
    of Chicago based on another city's RRPS data.
    
    chicago_data (pandas DataFrame): Chicago vote history
    other_city (pandas DataFrame): raw RRPS data
    
    return: a pandas DataFrame with the RRPS of Chicago
    """
    # Initialize the cartesian product for columns
    cartesian_product = list(itertools.product(['W', 'B', 'H'], repeat=3))
    columns = []
    # Convert the product to the working format
    for prod in cartesian_product:
        abbr = ''
        for ele in prod:
            abbr += ele
        columns.append(abbr)

    # Extract the first-choice distribution model from the data
    fc_dist = first_choice(rrps(other_city))
    
    # Initialize the final distribution dictionary
    final_dist = {}
    
    # Iterate through every precinct
    for precinct in chicago_data.itertuples():
        # Store the racial data
        num_white = getattr(precinct, 'white')
        num_black = getattr(precinct, 'black')
        num_hispanic = getattr(precinct, 'hispanic')
        precinct = getattr(precinct, 'ID')
        final_dist[precinct] = []

        # For every voting permutation, find the distribution of votes
        for perm in columns:
            result = 0
            if perm[0] == 'W':
                if perm in fc_dist:
                    result = fc_dist[perm] * num_white
            elif perm[0] == 'B':
                if perm in fc_dist:
                    result = fc_dist[perm] * num_black
            elif perm[0] == 'H':
                if perm in fc_dist:
                    result = fc_dist[perm] * num_hispanic
            final_dist[precinct].append(result)
    final_df = pd.DataFrame.from_dict(final_dist, orient='index')
    final_df.columns = columns
    return final_df

In [14]:
# Run the model on a few combinations of cities/races
# All the runs below are for Chicago Mayoral 2015 data

# Oakland Mayoral 2014
oakland_m14_to_chi = fcfp(chicago_mayor_2015, oakland_mayor2014)

# Minneapolis Mayoral 2017 and 2013
minneapolis_m17_to_chi = fcfp(chicago_mayor_2015, minneapolis_mayor2017)
minneapolis_m13_to_chi = fcfp(chicago_mayor_2015, minneapolis_mayor2013)

# Minneapolis Parks and Recreation Commissioner at Large 2017 and 2013
minneapolis_pal17_to_chi = fcfp(chicago_mayor_2015, minneapolis_pal2017)
minneapolis_pal13_to_chi = fcfp(chicago_mayor_2015, minneapolis_pal2013)

# Cambridge City Council 2017, 2015, and 2013
# cambridge_cc17_to_chi = fcfp(chicago_mayor_2015, cambridge_cc2017)
cambridge_cc15_to_chi = fcfp(chicago_mayor_2015, cambridge_cc2015)
cambridge_cc13_to_chi = fcfp(chicago_mayor_2015, cambridge_cc2013)

In [15]:
# Save the data to CSVs
oakland_m14_to_chi.to_csv('rrps_results/m15/oakland_m14.csv')
minneapolis_m17_to_chi.to_csv('rrps_results/m15/minneapolis_m17.csv')
minneapolis_m13_to_chi.to_csv('rrps_results/m15/minneapolis_m13.csv')
minneapolis_pal17_to_chi.to_csv('rrps_results/m15/minneapolis_pal17.csv')
minneapolis_pal13_to_chi.to_csv('rrps_results/m15/minneapolis_pal17.csv')
cambridge_cc15_to_chi.to_csv('rrps_results/m15/cambridge_cc15.csv')
cambridge_cc13_to_chi.to_csv('rrps_results/m15/cambridge_cc13.csv')

In [16]:
# Run the model on a few combinations of cities/races
# All the runs below are for Chicago Mayoral 2019 data

# Oakland Mayoral 2014
oakland_m14_to_chi = fcfp(chicago_mayor_2019, oakland_mayor2014)

# Minneapolis Mayoral 2017 and 2013
minneapolis_m17_to_chi = fcfp(chicago_mayor_2019, minneapolis_mayor2017)
minneapolis_m13_to_chi = fcfp(chicago_mayor_2019, minneapolis_mayor2013)

# Minneapolis Parks and Recreation Commissioner at Large 2017 and 2013
minneapolis_pal17_to_chi = fcfp(chicago_mayor_2019, minneapolis_pal2017)
minneapolis_pal13_to_chi = fcfp(chicago_mayor_2019, minneapolis_pal2013)

# Cambridge City Council 2017, 2015, and 2013
# cambridge_cc17_to_chi = fcfp(chicago_mayor_2015, cambridge_cc2017)
cambridge_cc15_to_chi = fcfp(chicago_mayor_2019, cambridge_cc2015)
cambridge_cc13_to_chi = fcfp(chicago_mayor_2019, cambridge_cc2013)

In [17]:
oakland_m14_to_chi.to_csv('rrps_results/m19/oakland_m14.csv')
minneapolis_m17_to_chi.to_csv('rrps_results/m19/minneapolis_m17.csv')
minneapolis_m13_to_chi.to_csv('rrps_results/m19/minneapolis_m13.csv')
minneapolis_pal17_to_chi.to_csv('rrps_results/m19/minneapolis_pal17.csv')
minneapolis_pal13_to_chi.to_csv('rrps_results/m19/minneapolis_pal17.csv')
cambridge_cc15_to_chi.to_csv('rrps_results/m19/cambridge_cc15.csv')
cambridge_cc13_to_chi.to_csv('rrps_results/m19/cambridge_cc13.csv')