In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
%matplotlib inline

Calculate population deviation as defined in "Automated Congressional Redistricting" paper. 

Definition: Let n be the total population in NC. Let k be the number of districts in NC.

Then L = n/k is the desired population of each district.

If the district with the largest deviation from L has deviation x, then the population deviation of NC under a particular districting is: x/L.

In [3]:
df = gpd.read_file('../Data/cen_prec_data3/cen_prec_data3.shp')

In [4]:
df.columns

Index(['COUNTY_NAM', 'loc_prec', 'special', 'district', 'writein', 'votes',
       'dem_votes', 'rep_votes', 'lib_votes', 'dem_prop', 'rep_prop',
       'lib_prop', 'abs_dem', 'abs_rep', 'abs_lib', 'total_dem', 'total_rep',
       'total_lib', 'total_vote', 'dem_percen', 'rep_percen', 'my_neighbo',
       'HOUSING10', 'total_pop', 'total_18+', 'hispanic', 'not_hispan',
       'pop_1_race', 'white', 'african_am', 'am_indian_', 'asian',
       'hawaii/pac', 'other_race', '2+races', 'geometry'],
      dtype='object')

In [12]:
df['total_pop']

0       4468
1       2802
2       3113
3       4940
4       6086
        ... 
2699     625
2700    5918
2701    3581
2702    4082
2703    4690
Name: total_pop, Length: 2704, dtype: int64

In [18]:
n = df['total_pop'].sum()
n

9535483

In [15]:
k = 13

In [17]:
L = n/k
L

733498.6923076923

In [24]:
districts = df.groupby('district').sum()
districts

Unnamed: 0_level_0,votes,dem_votes,rep_votes,lib_votes,dem_prop,rep_prop,lib_prop,abs_dem,abs_rep,abs_lib,...,hispanic,not_hispan,pop_1_race,white,african_am,am_indian_,asian,hawaii/pac,other_race,2+races
district,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,141743.0,85648.0,52041.0,4054.0,13.358426,12.68056,13.987069,6665943.0,2184795.0,175301.0,...,37362,529744,524387,262060,246780,3926,10827,176,618,5357
2,153565.0,58033.0,95529.0,3.0,4.14928,4.497497,0.012931,15115220.0,9779095.0,1275.0,...,37674,490984,485925,373457,100351,2422,9067,189,439,5059
3,178395.0,56973.0,121422.0,0.0,16.353091,16.689084,0.0,1280090.0,2120971.0,14344.0,...,30321,533270,526783,401429,115243,2321,6799,559,432,6487
4,136604.0,79636.0,56968.0,0.0,1.810288,1.65272,0.0,29656510.0,16814338.0,11040.0,...,48825,513150,505634,346291,119732,1570,36922,224,895,7516
5,266990.0,112978.0,154012.0,0.0,10.074462,10.081494,0.0,1495050.0,1892530.0,0.0,...,38023,529631,525168,440873,76126,1380,6241,159,389,4463
6,303124.0,117574.0,185550.0,0.0,7.310783,7.394027,0.0,464369.0,360254.0,0.0,...,39909,518341,513831,396980,107479,2229,6563,142,438,4510
7,249080.0,95063.0,154017.0,0.0,7.686866,7.772256,0.0,1254241.0,1758302.0,0.0,...,39694,530799,525726,404068,112320,4499,4078,241,520,5073
8,235877.0,98417.0,137460.0,0.0,6.227803,6.395068,0.0,1513404.0,2087656.0,0.0,...,39529,506314,498958,365529,115943,6185,9865,856,580,7356
9,269876.0,106555.0,163321.0,0.0,6.030491,6.376746,0.0,1590630.0,1272307.0,0.0,...,31850,503659,498228,343276,101471,42165,10434,254,628,5431
10,206619.0,66955.0,139664.0,0.0,6.519355,6.40143,0.0,3785496.0,4341906.0,0.0,...,25543,543437,538828,466084,63686,1589,6976,133,360,4609


In [27]:
all_deviations = districts['total_pop'].subtract(L).abs()
all_deviations

district
1      175.692308
2      856.692308
3      894.692308
4      273.692308
5      367.307692
6       55.307692
7     2913.307692
8     1787.307692
9     2003.692308
10    2550.307692
11      55.692308
12     401.307692
13    3814.692308
Name: total_pop, dtype: float64

In [29]:
x = all_deviations.max()
x

3814.692307692254

In [32]:
pop_dev_percent = x/L * 100
pop_dev_percent

0.520068044796465

Based on our datafile and approximations of how precinct population was allocated to each district, the population deviation of NC for its 2016 districting is 0.52%.