## Precinct Matching

This notebook executes matches precincts in different cities by their demographc similarity.

In [1]:
import numpy as np
import pandas as pd
from scipy import spatial
import collections
import pickle
import os

In [2]:
# Define the races and the demographic columns
races = ['white','black','hispanic','asian','middle eastern','undetermined']
demo_cols = ['NH_WHITE', 'NH_BLACK', 'NH_AMIN',	'NH_ASIAN',	'NH_NHPI', 'NH_OTHER', 'NH_2MORE', 'HISP']

In [3]:
# Read in Chicago demographic data
df_chic_demo = pd.read_csv('chicago_prec_demo.csv',index_col=0).dropna()[demo_cols]
df_chic_demo.reindex(sorted(df_chic_demo.columns), axis=1)
df_chic_demo = df_chic_demo[(df_chic_demo.T !=0).any()]

In [4]:
# Read in Cambridge demographic data
df_camb_demo = pd.read_csv('camb_prec_demo.csv',index_col=0).dropna()[demo_cols]
df_camb_demo.reindex(sorted(df_camb_demo.columns), axis=1)
df_camb_demo = df_camb_demo[(df_camb_demo.T !=0).any()]

In [5]:
# Read in Minneapolis demographic data
df_minn_demo = pd.read_csv('minn_prec_demo.csv',index_col=0).dropna()[demo_cols]
df_minn_demo.reindex(sorted(df_minn_demo.columns), axis=1)
df_minn_demo = df_minn_demo[(df_minn_demo.T !=0).any()]

In [6]:
# Initialize a dictionary
# keys: Chicago precincts
# values: (P, s)
# P is a precinct in another city
# s is the cosine similarity in demographic space
chic_dict = {}

for row in df_chic_demo.itertuples():
    distpairs1 = []
    distpairs2 = []

    for r2 in df_camb_demo.itertuples():
        distpairs1.append((list(r2)[0],  1 - spatial.distance.cosine(list(row)[1:], list(r2)[1:])))

    for r2 in df_minn_demo.itertuples():
        distpairs2.append((list(r2)[0],  1 - spatial.distance.cosine(list(row)[1:], list(r2)[1:])))
    
    # We're grabbing the 5 most similar in cambridge and the 15 most similar in mpls
    distpairs1.sort(key=lambda x: x[1], reverse=True)
    distpairs2.sort(key=lambda x: x[1], reverse=True)

    chic_dict[list(row)[0]] = distpairs1[:5] + distpairs2[:15]

# Choose the columns to be kept
camb_keep_cols = ['ID','1st Choice','2nd Choice','3rd Choice','4th Choice','5th Choice']
camb_prec_votes = collections.defaultdict(list)

In [7]:
# Read in Cambridge Voting Data
df_camb_vote = pd.read_excel('Cambridge City Council CVR 2017.xlsx')

# Format the data
df_camb_vote['ID'] = df_camb_vote['ID'].apply(lambda x: str(x)[2:4] + '-' + str(x)[4:6])
df_camb_vote['ID'] = df_camb_vote['ID'].apply(lambda x: str(x).replace("-0","-"))
df_camb_vote['ID'] = df_camb_vote['ID'].apply(lambda x: str(x)[1:] if str(x)[0] == '0' else str(x))
df_camb_vote = df_camb_vote.drop(columns=[c for c in list(df_camb_vote) if c not in camb_keep_cols ])

# Read in the demographic IDs
df_camb_id = pd.read_csv('camb_demo_id_2017.csv',delimiter='\t',header=None)
df_camb_id = dict(zip(df_camb_id[0],df_camb_id[1]))

# Reformat the Cambridge voting data with demographics
for c in camb_keep_cols[1:]:
    df_camb_vote[c] = df_camb_vote[c].apply(lambda e: df_camb_id[e].capitalize() if e in df_camb_id.keys() else np.nan)

df_camb_vote = df_camb_vote[pd.notnull(df_camb_vote['1st Choice'])]

# Record the precinct votes
for row in df_camb_vote.itertuples():
    camb_prec_votes[list(row)[1]].append(tuple([ str(s).lower().replace("middle eastern","asian") for s in list(row)[2:5]] + []))

for k,v in camb_prec_votes.items():
    camb_prec_votes[k] = collections.Counter(v)

In [8]:
# Read in Minneapolis Voting Data
minn_prec_votes = collections.defaultdict(list)
df_minn_vote = pd.read_csv('2017-mayor-cvr.csv').drop(columns = ['1st Choice','2nd Choice','3rd Choice','Count'])

df_minn_vote = df_minn_vote[pd.notnull(df_minn_vote['1st Choice_Race'])]

# Record the precinct votes
for row in df_minn_vote.itertuples():
    k = list(row)[1].replace("MINNEAPOLIS ",'').replace("W-",'W').replace("P-","P").replace(' ','-').replace("P0",'P')
    minn_prec_votes[k].append(tuple([str(s).lower().replace("middle eastern","asian") for s in list(row)[2:]]))

for k,v in minn_prec_votes.items():
    minn_prec_votes[k] = collections.Counter(v)

In [9]:
# Remove empty votes
chic_impute = collections.defaultdict(dict)

for k,v in camb_prec_votes.items():
    if v == []:
        print("remove")
        camb_prec_votes.pop(k,None)

In [10]:
# For each precinct in chicago
# For each match precint P in that dictionary
# Use the weight s on P times the percentage of
# people in P voting each preference schedule
# and sum, then normalize so that the total
# votes is equal to VAP
for prec in chic_dict.keys():
    for match in chic_dict[prec]:
        if match[0][0] == 'W':
            for k,v in minn_prec_votes[match[0]].items():
                if k in chic_impute[prec].keys():
                    chic_impute[prec][k] += v * match[1] ** 1
                else:
                    chic_impute[prec][k] = v * match[1] ** 1
        else:
            if match[0] == "3-2A":
                m = "3-2"
            else: 
                m = match[0]

            for k,v in camb_prec_votes[m].items():
                if k in chic_impute[prec].keys():
                    chic_impute[prec][k] += v * match[1] ** 1
                else:
                    chic_impute[prec][k] = v * match[1] ** 1

In [11]:
# Read in precinct demographics
df_chic_demo = pd.read_csv('chicago_prec_demo.csv',index_col=0).dropna()
for k,v in chic_impute.items():
    tot = sum(list(v.values()))
    for k2 in v.keys():
        v[k2] = v[k2]/tot * df_chic_demo.at[k,'VAP']

In [12]:
%%capture
# Results:
chic_impute
chic_dict

In [13]:
# Summarise data
totvots = collections.defaultdict(float)

for v in chic_impute.values():
    for k2,v2 in v.items():
        totvots[k2] += v2

print(len(totvots.keys()))

races = ['white','black','hispanic','asian','undetermined']
for r in races:
    sums = [0,0,0]
    for k,v in totvots.items():
        for i in range(1):
            if k[i] == r:
                sums[i] += v
    for i in range(1):
        print("{} Choice {}: {}".format(i+1,r,sums[i]))
for r in races:
    tot = 0
    for k,v in totvots.items():
        if r in k:
            tot+=v

    print("{} in top 3: {}".format(r,tot))

71
1 Choice white: 1544285.2855284999
1 Choice black: 459879.6702216003
1 Choice hispanic: 0
1 Choice asian: 93912.52588821895
1 Choice undetermined: 2534.5183616806394
white in top 3: 1972033.456538633
black in top 3: 1076221.3580279176
hispanic in top 3: 0
asian in top 3: 268800.35078889976
undetermined in top 3: 15347.242811505115
