In [1]:
import pandas as pd

We imported the General Election results from the [DC Board of Elections](https://electionresults.dcboe.org/election_results/2012-General-Election) for the years 2012-2018, inclusive. Earlier historic election results are available, but would require dealing with ANC boundary changes.

In [2]:
dc_2012 = pd.read_csv('Data/November_6_2012_General_and_Special_Election_Certified_Results.csv')
dc_2014 = pd.read_csv('Data/November_4_2014_General_Election_Certified_Results.csv')
dc_2016 = pd.read_csv('Data/November_8_2016_General_Election_Certified_Results.csv')
dc_2018 = pd.read_csv('Data/November_6_2018_General_Election_Certified_Results.csv')

2012-2016 had consistent headers. 2018 was renamed to be consistent.

In [3]:
# rename the 2018 dataframe headers
dc_2018 = dc_2018.rename(columns = {'ElectionDate':'ELECTION_DATE',  'ElectionName': 'ELECTION_NAME', 'ContestNumber': 'CONTEST_ID',
                'ContestName': 'CONTEST_NAME', 'PrecinctNumber': 'PRECINCT_NUMBER', 'WardNumber': 'WARD',
                'Candidate': 'CANDIDATE', 'Party': 'PARTY', 'Votes': 'VOTES'}, index=str)

In [4]:
dc_2018.shape

(9596, 9)

In [5]:
dc_2012.shape

(7130, 9)

In [6]:
dc_2014.shape

(13059, 9)

In [7]:
dc_2016.shape

(8724, 9)

In [8]:
dc_2012.shape[0] + dc_2014.shape[0] + dc_2016.shape[0] + dc_2018.shape[0]

38509

In [9]:
dc_2012.shape[0] + dc_2014.shape[0] + dc_2016.shape[0] 

28913

In [10]:
dc_2016['CONTEST_NAME'] = dc_2016['CONTEST_NAME'].str.strip()
dc_2016_obj = dc_2016.select_dtypes(['object'])
dc_2016[dc_2016_obj.columns] = dc_2016_obj.apply(lambda x: x.str.strip())

In [11]:
# initial merge
dc_2012_2014 = pd.concat([dc_2012, dc_2014], sort=False, axis = 0)

In [12]:
dc_2012_2014.shape

(20189, 9)

2012-2016 and 2018 had slightly different formats for expressing CONTEST_NAME. We extracted the name of each ANC Single Member District from the CONTEST_NAME column and created a new column for the SMD.

In [13]:
# add the appropriate SMD column for the various years
dc_2012_2014['SMD'] = dc_2012_2014.CONTEST_NAME.str[-4:]
dc_2016['SMD'] = dc_2016.CONTEST_NAME.str[6:10]
dc_2018['SMD'] = dc_2018.CONTEST_NAME.str[6:10]

In [14]:
dc_2012_2018 = pd.concat([dc_2012_2014,dc_2016, dc_2018], sort=False, axis = 0)

In [15]:
dc_2012_2018.shape
dc_2012_2018_obj = dc_2012_2018.select_dtypes(['object'])
dc_2012_2018[dc_2012_2018_obj.columns] = dc_2012_2018_obj.apply(lambda x: x.str.strip())

In [16]:
dc_2012_2018.ELECTION_DATE.value_counts()

11/4/2014                13059
11/6/2018 12:00:00 AM     9596
11/8/2016                 8724
11/6/2012                 7130
Name: ELECTION_DATE, dtype: int64

In [17]:
# filter for just the results that include the name "ANC" or "ADVISORY NEIGHBORHOOD COMMISSIONER"
anc_only =  dc_2012_2018[(dc_2012_2018['CONTEST_NAME'].str.contains("ANC")) | (dc_2012_2018['CONTEST_NAME'].str.contains("ADVISORY NEIGHBORHOOD COMMISSIONER")) ]

In [18]:
anc_only.ELECTION_DATE = anc_only.ELECTION_DATE.apply(pd.to_datetime)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value


In [19]:
anc_only.shape

(8521, 10)

In [20]:
anc_only.dtypes

ELECTION_DATE      datetime64[ns]
ELECTION_NAME              object
CONTEST_ID                  int64
CONTEST_NAME               object
PRECINCT_NUMBER             int64
WARD                        int64
CANDIDATE                  object
PARTY                      object
VOTES                       int64
SMD                        object
dtype: object

In [21]:
anc_only.groupby(['ELECTION_DATE', 'SMD', 'CANDIDATE']).VOTES.sum()

ELECTION_DATE  SMD   CANDIDATE               
2012-11-06     1A01  LISA KRALOVIC               374
                     WRITE-IN                     24
               1A02  ALEXANDER GALLO             295
                     VICKEY A. WRIGHT-SMITH      432
                     WRITE-IN                     11
               1A03  STEVE SWANK                 406
                     WRITE-IN                     36
               1A04  LAINA AQUILINE              430
                     SENTAMU KIREMERWA           120
                     WRITE-IN                     18
               1A05  KEVIN HOLMES                440
                     WRITE-IN                     16
               1A06  KEVIN E. CLINESMITH          87
                     PATRICK W. FLYNN            411
                     WILLIAM "BILL" BROWN JR.    218
                     WRITE-IN                      9
               1A07  THOMAS BOISVERT             633
                     WRITE-IN                     44


In [22]:
# verify ward 8 against current officeholders
anc_only[anc_only.WARD==8].groupby(['ELECTION_DATE', 'SMD', 'CANDIDATE']).VOTES.sum()

ELECTION_DATE  SMD   CANDIDATE               
2012-11-06     8A01  HOLLY MUHAMMAD              935
                     WRITE-IN                     36
               8A02  BARBARA J. CLARK            799
                     RANDI K. POWELL             200
                     WRITE-IN                     47
               8A03  L. YVONNE (L.Y.) MOORE      834
                     WRITE-IN                     52
               8A04  MOSES SMITH                 479
                     WRITE-IN                     39
               8A05  CHARLES E. WILSON           539
                     JEREMY J. PHILLIPS          297
                     WRITE-IN                     11
               8A06  GRETA J. FULLER             352
                     KENDALL J. GRAHAM           441
                     STEPHEN COOKE               201
                     WRITE-IN                     19
               8A07  NATALIE WILLIAMS            806
                     WRITE-IN                     28


In [23]:
#with pd.option_context("max.rows", 300):
    #print(dc_2012_2018.CONTEST_NAME.value_counts())

In [24]:
df = anc_only.groupby(['ELECTION_DATE', 'SMD', 'CANDIDATE']).VOTES.sum()

In [25]:
df = df.reset_index()

In [26]:
df.ELECTION_DATE.value_counts()

2016-11-08    1305
2014-11-04    1301
2018-11-06    1286
2012-11-06     681
Name: ELECTION_DATE, dtype: int64

In [27]:
grouper = df.groupby(['ELECTION_DATE', 'SMD'])
# Number of candidates in each SMD ANC race. Usually if there are 2 "candidates" in the race, the winner was unopposed as the other "candidate"
# were the pile of write-ins.
grouper.CANDIDATE.count()

ELECTION_DATE  SMD 
2012-11-06     1A01    2
               1A02    3
               1A03    2
               1A04    3
               1A05    2
               1A06    4
               1A07    2
               1A08    2
               1A09    2
               1A10    3
               1A11    3
               1A12    3
               1B01    2
               1B02    2
               1B03    2
               1B04    2
               1B05    2
               1B06    2
               1B07    2
               1B08    2
               1B09    2
               1B10    2
               1B11    2
               1B12    5
               1C01    4
               1C02    2
               1C03    3
               1C04    2
               1C05    2
               1C06    3
                      ..
2018-11-06     8A06    5
               8A07    4
               8B01    5
               8B02    4
               8B03    5
               8B04    4
               8B05    4
               8B06    4
     

In [28]:
# there are 296 SMDs as per https://thedcline.org/2018/08/14/districts-296-anc-races-draw-as-many-as-five-candidates-but-two-thirds-are-uncontested/
df.SMD.value_counts()

6E02    23
5E09    21
8A06    21
5E04    20
5E07    19
6E04    19
5E03    19
7C07    19
6A06    19
7F01    19
6D05    19
5E05    18
7E04    18
1B12    18
3D05    18
7D01    18
4B07    18
6D07    18
5C04    18
5B01    18
5E06    18
5C06    18
6E01    18
6B05    18
5B04    18
5D05    18
5D06    18
5C01    18
4B01    18
2B04    17
        ..
5A07    14
3E03    14
3B03    14
4B03    14
3C06    14
8E06    14
3D04    14
2D01    14
2A06    14
1B11    14
8D02    14
6C04    14
8D01    14
4A06    14
3D08    14
3B04    14
8D06    14
1A03    14
4A07    14
7F06    13
2A08    13
3D06    13
2E04    13
7D02    13
5A04    13
7F07    13
7B03    13
3D07    13
8E05    13
8D05    13
Name: SMD, Length: 296, dtype: int64

In [29]:
df

Unnamed: 0,ELECTION_DATE,SMD,CANDIDATE,VOTES
0,2012-11-06,1A01,LISA KRALOVIC,374
1,2012-11-06,1A01,WRITE-IN,24
2,2012-11-06,1A02,ALEXANDER GALLO,295
3,2012-11-06,1A02,VICKEY A. WRIGHT-SMITH,432
4,2012-11-06,1A02,WRITE-IN,11
5,2012-11-06,1A03,STEVE SWANK,406
6,2012-11-06,1A03,WRITE-IN,36
7,2012-11-06,1A04,LAINA AQUILINE,430
8,2012-11-06,1A04,SENTAMU KIREMERWA,120
9,2012-11-06,1A04,WRITE-IN,18


In [30]:
df['WARD'] = df.SMD.str[:1]

In [31]:
df.tail()

Unnamed: 0,ELECTION_DATE,SMD,CANDIDATE,VOTES,WARD
4568,2018-11-06,8000000.0,WRITE-IN,17,8
4569,2018-11-06,80000000.0,OVER VOTES,0,8
4570,2018-11-06,80000000.0,Stephen A Slaughter,455,8
4571,2018-11-06,80000000.0,UNDER VOTES,102,8
4572,2018-11-06,80000000.0,WRITE-IN,40,8


In [32]:
df = df.set_index('ELECTION_DATE')

In [33]:
# save progress
df.to_csv('anc_electoral_history_2012_2018.csv')