# Historical Election Results

Process historical election result CSVs from DCBOE from 2012 to 2018

In [1]:
import pandas as pd

In [2]:
file_root = '../data/dcboe/election_results/'

year_files = {
    2012: 'November_6_2012_General_and_Special_Election_Certified_Results.csv'
    , 2014: 'November_4_2014_General_Election_Certified_Results.csv'
    , 2016: 'November_8_2016_General_Election_Certified_Results.csv'
    , 2018: 'November_6_2018_General_Election_Certified_Results.csv'
}

In [3]:
all_contests = {}
anc_by_year = {}

for year in year_files:
    all_contests[year] = pd.read_csv(file_root + year_files[year])
    all_contests[year] = all_contests[year].rename(columns={
        'Candidate': 'candidate_name'
        , 'CANDIDATE': 'candidate_name'
        , 'ContestName': 'contest_name'
        , 'CONTEST_NAME': 'contest_name'
        , 'ContestNumber': 'contest_number'
        , 'CONTEST_ID': 'contest_number'
        , 'Votes': 'votes'
        , 'VOTES': 'votes'
    })
    
    # Exclude the Over Vote and Under Vote rows
    all_contests[year] = all_contests[year][
        (~all_contests[year]['candidate_name'].str.contains('UNDER VOTES').fillna(False))
        & (~all_contests[year]['candidate_name'].str.contains('OVER VOTES').fillna(False))
    ].copy()

    # Include only ANC contests
    anc_by_year[year] = all_contests[year][
        (all_contests[year]['contest_name'].str.contains('SINGLE MEMBER DISTRICT'))
        | (all_contests[year]['contest_name'].str.contains('ADVISORY NEIGHBORHOOD COMMISSIONER'))
    ].copy()



### Top race in each year

In [4]:
for year in year_files:
    votes_by_contest = all_contests[year].groupby('contest_name').votes.sum().sort_values(ascending=False)
    print(f'\n{year}')
    print(votes_by_contest.head())


2012
contest_name
REGISTERED VOTERS - TOTAL                                483775
AT - LARGE MEMBER OF THE COUNCIL DISTRICT OF COLUMBIA    387131
BALLOTS CAST - TOTAL                                     294814
PRESIDENT AND VICE PRESIDENT DISTRICT OF COLUMBIA        293764
DELEGATE U.S. HOUSE OF REPRESENTATIVES DISTRICT OF       278563
Name: votes, dtype: int64

2014
contest_name
REGISTERED VOTERS - TOTAL                                461325
AT - LARGE MEMBER OF THE COUNCIL DISTRICT OF COLUMBIA    272869
BALLOTS CAST - TOTAL                                     177377
MAYOR OF THE DISTRICT OF COLUMBIA                        175270
DELEGATE U.S. HOUSE OF REPRESENTATIVES                   171893
Name: votes, dtype: int64

2016
contest_name
REGISTERED VOTERS - TOTAL                                   478688
AT - LARGE MEMBER OF THE COUNCIL DISTRICT OF COLUMBIA       443144
BALLOTS CAST - TOTAL                                        312575
ELECTORS OF PRESIDENT AND VICE PRESIDENT          

In [5]:
for year in year_files:
    if year <= 2014:
        anc_by_year[year]['smd_id'] = 'smd_' + anc_by_year[year]['contest_name'].str[35:40]
    else:
        anc_by_year[year]['smd_id'] = 'smd_' + anc_by_year[year]['contest_name'].str[6:10]

In [6]:
anc_by_year[2012].groupby('candidate_name').votes.sum().sort_values(ascending=False).head(10)

candidate_name
WRITE-IN                    8406
PHILLIP J. HAMMOND          1172
DWAYNE M. TOLIVER           1113
BRIAN FLAHAVEN              1072
ROBERT T. (BOB) RICHARDS    1055
JO-ANNE PRUE                1054
ADRIAN JORDAN               1028
ROBERT A. JORDAN            1027
GALE BLACK                  1018
YVONNE A. JEFFERSON          973
Name: votes, dtype: int64

In [7]:
anc = pd.concat(anc_by_year, names=['year']).reset_index()
anc['candidate_name'] = anc['candidate_name'].str.title()

In [8]:
anc.groupby('year').votes.sum().map('{:,d}'.format)

year
2012    206,425
2014    131,666
2016    244,608
2018    190,628
Name: votes, dtype: object

In [9]:
anc[anc['candidate_name'].str.contains('Lightman')]

Unnamed: 0,year,level_1,ELECTION_DATE,ELECTION_NAME,contest_number,contest_name,PRECINCT_NUMBER,WARD,candidate_name,PARTY,votes,smd_id,ElectionDate,ElectionName,PrecinctNumber,WardNumber,Party
2579,2014,12965,11/4/2014,D.C. Generation Election,330,ADVISORY NEIGHBORHOOD COMMISSIONER 6D01,142.0,6.0,Marjorie Lightman,NON,664,smd_6D01,,,,,


In [10]:
anc[anc['smd_id'] == 'smd_6D01']

Unnamed: 0,year,level_1,ELECTION_DATE,ELECTION_NAME,contest_number,contest_name,PRECINCT_NUMBER,WARD,candidate_name,PARTY,votes,smd_id,ElectionDate,ElectionName,PrecinctNumber,WardNumber,Party
1268,2012,7069,11/6/2012,D.C. Generation Election,329,ADVISORY NEIGHBORHOOD COMMISSIONER 6D01,142.0,6.0,Donna L Hopkins,NON,937,smd_6D01,,,,,
1269,2012,7070,11/6/2012,D.C. Generation Election,329,ADVISORY NEIGHBORHOOD COMMISSIONER 6D01,142.0,6.0,Write-In,NON,33,smd_6D01,,,,,
2579,2014,12965,11/4/2014,D.C. Generation Election,330,ADVISORY NEIGHBORHOOD COMMISSIONER 6D01,142.0,6.0,Marjorie Lightman,NON,664,smd_6D01,,,,,
2580,2014,12966,11/4/2014,D.C. Generation Election,330,ADVISORY NEIGHBORHOOD COMMISSIONER 6D01,142.0,6.0,Write-In,NON,28,smd_6D01,,,,,
3912,2016,8650,11/8/2016,D.C. General Election,526,ANC - 6D01 SINGLE MEMBER DISTRICT 01-ANC 6D ...,142.0,6.0,Gail Fast,NON,495,smd_6D01,,,,,
3913,2016,8651,11/8/2016,D.C. General Election,526,ANC - 6D01 SINGLE MEMBER DISTRICT 01-ANC 6D ...,142.0,6.0,Martin Welles,NON,288,smd_6D01,,,,,
3914,2016,8652,11/8/2016,D.C. General Election,526,ANC - 6D01 SINGLE MEMBER DISTRICT 01-ANC 6D ...,142.0,6.0,Wes Ven Johnson,NON,118,smd_6D01,,,,,
3915,2016,8653,11/8/2016,D.C. General Election,526,ANC - 6D01 SINGLE MEMBER DISTRICT 01-ANC 6D ...,142.0,6.0,"Dorinda ""Rindi"" White",NON,346,smd_6D01,,,,,
3916,2016,8654,11/8/2016,D.C. General Election,526,ANC - 6D01 SINGLE MEMBER DISTRICT 01-ANC 6D ...,142.0,6.0,Write-In,NON,25,smd_6D01,,,,,
5204,2018,9530,,,528,ANC - 6D01 SINGLE MEMBER DISTRICT 01-ANC 6D,,,Gail Fast,,1080,smd_6D01,11/6/2018 12:00:00 AM,General Election,142.0,6.0,NON


## Group by Candidates

Results are split out over precincts. Group by candidates to know who won each race. 

In [11]:
candidates = anc.groupby(['year', 'smd_id', 'contest_number', 'candidate_name']).votes.sum().reset_index()

In [12]:
candidates['ranking'] = candidates.groupby(['year', 'smd_id']).votes.rank(method='first', ascending=False)
candidates['winner'] = candidates['ranking'] == 1

In [13]:
candidates['write_in_winner'] = candidates['winner'] & (candidates['candidate_name'] == 'Write-In')

In [14]:
candidates = candidates.sort_values(by=['year', 'smd_id', 'votes'], ascending=[True, True, False])

In [15]:
total_votes = candidates.groupby(['year', 'smd_id']).votes.sum()
total_votes.name = 'total_votes'
candidates = pd.merge(candidates, total_votes, how='inner', on=['year', 'smd_id'])
candidates['vote_share'] = candidates['votes'] / candidates['total_votes']

In [17]:
candidates #.to_clipboard()

Unnamed: 0,year,smd_id,contest_number,candidate_name,votes,ranking,winner,write_in_winner,total_votes,vote_share
0,2012,smd_1A01,117,Lisa Kralovic,374,1.0,True,False,398,0.939698
1,2012,smd_1A01,117,Write-In,24,2.0,False,False,398,0.060302
2,2012,smd_1A02,118,Vickey A. Wright-Smith,432,1.0,True,False,738,0.585366
3,2012,smd_1A02,118,Alexander Gallo,295,2.0,False,False,738,0.399729
4,2012,smd_1A02,118,Write-In,11,3.0,False,False,738,0.014905
...,...,...,...,...,...,...,...,...,...,...
2792,2018,smd_8E05,609,Write-In,41,2.0,False,False,566,0.072438
2793,2018,smd_8E06,610,Karlene (K.) Armstead,378,1.0,True,False,395,0.956962
2794,2018,smd_8E06,610,Write-In,17,2.0,False,False,395,0.043038
2795,2018,smd_8E07,611,Stephen A Slaughter,455,1.0,True,False,495,0.919192


In [20]:
# How many people are in this dataset? I have 615 in the OpenANC dataset right now
len(candidates.candidate_name.unique())

1301