Code to calculate the gerrymandering score(s) (total avg # gmandered, democrats avg # gmandered, republicans avg # gmandered). 

Alter the values under "Input pairing, electoral/population data, PK_PCT" to change what map the score is being calculated for & what percent change is necessary before a district is called packed.

In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
pd.options.mode.chained_assignment = None  # default='warn'
pd.set_option('display.max_columns', None)

In [3]:
DRIVE_PATH = "/Users/carolyn/Drive/quantgerry/"
def full_path(filename):
    return DRIVE_PATH + filename

### Input pairing, electoral/population data, PK_PCT

In [4]:
dist_VTD_path = full_path('code_data_NC_NCAbs_2016.txt') # pairings
dist_elect_path = full_path('NCabs_VTD') # all the data
PK_PCT = 0.05

### Process Data

In [5]:
# Combine today's datasets
basic_data = gpd.read_file(dist_elect_path)
pairings = pd.read_csv(dist_VTD_path, sep='\t', header=None).rename(columns={0:"VTD_num", 1:"district"}) # Not actually used?

##### Get comparator data

In [6]:
def get_comparators(main_df, orig_filepath, rank_num):
    #Initialize original district stats
    orig = pd.read_csv(orig_filepath, sep='\t', header=None)
    orig["sample_num"] = -1

    #Grab election data from main DataFrame
    election_data = main_df.loc[:, ['VTD_num', "EL16G_PR_D", 'EL16G_PR_R', 'EL16G_PR_T']]

    #Grab a list of compliant numbers
    compliant = pd.read_csv(full_path("ensembles_main_rank_" + str(rank_num) + "_compliantVerbose.txt"), sep='\t', header=None)
    compliant.rename(columns={0:"sample_num", 1:"is_compliant"}, inplace=True)
    compliant_nums = compliant[compliant.is_compliant==1].sample_num.tolist()

    #Takes about 5 seconds
    #Iterate through all the compliant samples and read in their data
    pd_lists = []
    for sample_num in compliant_nums:
        filepath = full_path("ensembles_main_rank_" + str(rank_num) 
                             + "_districtingMaps/districtingMap") + str(sample_num) + ".txt"
        new = pd.read_csv(filepath, sep='\t', header=None)
        new["sample_num"] = sample_num
        pd_lists.append(new)
    pd_lists.append(orig)

    #Concat and rename
    final = pd.concat(pd_lists)
    final.rename(columns={0:"VTD_num", 1:"district"}, inplace=True)

    #Join with election data
    final = final.merge(election_data, how='left', on="VTD_num")

    #Group at district level
    grouped = final.groupby(["sample_num", "district"]).sum()

    #Grouped at the district level, calculate district-level stats like dem prop and rep prop
    grouped["dem_prop"] = grouped["EL16G_PR_D"]/grouped["EL16G_PR_T"]
    grouped['rep_prop'] = grouped["EL16G_PR_R"]/grouped["EL16G_PR_T"]
    grouped['dem_win'] = 0
    grouped.loc[grouped.dem_prop > grouped.rep_prop, "dem_win"] = 1
    grouped.drop(columns=['EL16G_PR_D', 'EL16G_PR_R', 'EL16G_PR_T', 'VTD_num'], inplace=True)

    flattened = grouped.reset_index()

    #Add district-level stats to each VTD
    final2 = final.merge(flattened, how="left", left_on=['sample_num', 'district'], right_on=['sample_num', 'district'])

    orig_dst_lvl = final2.loc[final2.sample_num==-1]
    comp_dst_lvl = final2.loc[final2.sample_num!=-1]

    #Add original districting data to each comparator VTD entry — for easy calculation of differences between comparator
    final3 = comp_dst_lvl.merge(orig_dst_lvl, how="left", left_on='VTD_num', right_on='VTD_num', suffixes=('', '_orig'))
    
    #Actually, we do want orig to be labeled with column names! Since we use it later
    orig.rename(columns={0:"VTD_num", 1:"district"}, inplace=True)
    
    return final3

In [7]:
with_comparators = get_comparators(basic_data, orig_filepath=dist_VTD_path, rank_num=0)

In [8]:
NUM_SAMPLE_NUM = with_comparators.sample_num.nunique()

### Calculate gerrymandering score

In [11]:
def calc_gmandered_score(df, pk_pct, NUM_SAMPLE_NUM):
    dem_filter = ((df.dem_prop > df.rep_prop) & (df.dem_prop_orig < df.rep_prop_orig)) | (((df.dem_prop_orig - df.dem_prop) > pk_pct) & (df.dem_prop > df.rep_prop))
    rep_filter = ((df.rep_prop > df.dem_prop) & (df.rep_prop_orig < df.dem_prop_orig)) | (((df.rep_prop_orig - df.rep_prop) > pk_pct) & (df.rep_prop > df.dem_prop))


    df['dem_gmandered'] = np.where(dem_filter, df['EL16G_PR_D'], 0)
    df['rep_gmandered'] = np.where(rep_filter, df['EL16G_PR_R'], 0)
    df['tot_gmandered'] = df.dem_gmandered + df.rep_gmandered
    
    #Summing up in total and dividing by sample num is the same as grouping by and taking mean.
    total = df.tot_gmandered.sum()/NUM_SAMPLE_NUM
    dem = df.dem_gmandered.sum()/NUM_SAMPLE_NUM
    rep = df.rep_gmandered.sum()/NUM_SAMPLE_NUM
    
    return total, dem, rep

In [12]:
calc_gmandered_score(with_comparators, PK_PCT, NUM_SAMPLE_NUM)

(1151027.4312796209, 959777.2246445498, 191250.20663507108)