![](../additional_materials/logos/darden_rice_logo_SM.png)

### 2021 Municipal Election Day Doc Processing

This notebook contains code to process and format data according to Adrienne Bogen's [E Day Doc](https://docs.google.com/spreadsheets/d/1M6EKaDWyVTHzpNTi2cdLXDYZfKgGVtChcbCmEbIla4k/edit#gid=0) for the 2021 Pinellas County municipal primary election on Google Sheets.

Data sources: 
* [Pinellas County SOE](https://www.votepinellas.com/Election-Results)

---
---

In [1]:
import pandas as pd
from pandas.tseries.offsets import BDay
pd.set_option('display.max_columns', None)

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns

import datetime

In [2]:
df = pd.read_csv('../data/mp_eday_2021_raw/voted_6PM_08-24-21.csv')

In [3]:
df.head(3)

Unnamed: 0,RegNum,LastName,MiddleName,FirstName,StreetNumber,StreetSuffix,StreetDir,StreetName,StreetType,StreetDirSuffix,UnitType,ApartmentNumber,CityName,ZipCode,Precinct,Party,VoteDate,QualifiedCertificate
0,107242621,O'NEIL,,ALLEN,6900,,,23RD,ST,S,,,ST PETERSBURG,33712,107.0,CPF,2021-08-11 11:59:03Z,
1,107151101,WIGGINS,WILLIAM,RICHARD,524,,,30TH,AVE,N,,,ST PETERSBURG,33704,136.0,CPF,2021-08-24 17:07:19Z,
2,118810192,CANTLAY,COPELAND,BLAKE,777,,,30TH,AVE,N,,#2,ST PETERSBURG,33704,137.0,CPF,2021-08-16 13:00:13Z,


In [4]:
df.shape

(51906, 18)

In [5]:
df.columns

Index(['RegNum', 'LastName', 'MiddleName', 'FirstName', 'StreetNumber',
       'StreetSuffix', 'StreetDir', 'StreetName', 'StreetType',
       'StreetDirSuffix', 'UnitType', 'ApartmentNumber', 'CityName', 'ZipCode',
       'Precinct', 'Party', 'VoteDate', 'QualifiedCertificate'],
      dtype='object')

In [6]:
# Drop unnecessary columns
drop_cols = ['RegNum', 'LastName', 'MiddleName', 'FirstName', 'StreetNumber', 'StreetSuffix', 'StreetDir', 
             'StreetName', 'StreetType', 'StreetDirSuffix', 'UnitType', 'ApartmentNumber', 'CityName', 
             'ZipCode', 'QualifiedCertificate']

df.drop(columns=drop_cols, inplace=True)

In [7]:
df.head()

Unnamed: 0,Precinct,Party,VoteDate
0,107.0,CPF,2021-08-11 11:59:03Z
1,136.0,CPF,2021-08-24 17:07:19Z
2,137.0,CPF,2021-08-16 13:00:13Z
3,140.0,CPF,2021-08-16 12:35:39Z
4,155.1,CPF,2021-08-04 12:20:58Z


In [8]:
# Remove decimals from precinct numbers (this is an error in the original report: e.g., precinct 155.1 should be 155)
df['Precinct'] = df['Precinct'].apply(lambda x: round(x, 0)).astype(int)

In [9]:
df.head()

Unnamed: 0,Precinct,Party,VoteDate
0,107,CPF,2021-08-11 11:59:03Z
1,136,CPF,2021-08-24 17:07:19Z
2,137,CPF,2021-08-16 13:00:13Z
3,140,CPF,2021-08-16 12:35:39Z
4,155,CPF,2021-08-04 12:20:58Z


In [10]:
# Convert VoteDate to datetime and remove timestamps. All votes prior to EDay (08-24-21) need to be filtered out.
df['VoteDate'] = pd.to_datetime(df['VoteDate']).dt.date
df['VoteDate'] = pd.to_datetime(df['VoteDate'])

In [11]:
df.dtypes

Precinct             int64
Party               object
VoteDate    datetime64[ns]
dtype: object

In [12]:
df.head()

Unnamed: 0,Precinct,Party,VoteDate
0,107,CPF,2021-08-11
1,136,CPF,2021-08-24
2,137,CPF,2021-08-16
3,140,CPF,2021-08-16
4,155,CPF,2021-08-04


In [13]:
# Remove all votes cast prior to EDay (08-24-21)
df = df[df['VoteDate'].dt.date == datetime.date.today()].copy()

In [14]:
# Sort df by precinct and re-index
df.sort_values('Precinct', inplace=True)

df.reset_index(drop=True, inplace=True)

In [15]:
df['Party'].value_counts()

DEM    8148
REP    5305
NPA    1913
IND     103
LPF      59
GRE       8
PSL       6
CPF       2
ECO       1
Name: Party, dtype: int64

In [16]:
# Change all non-Dem/Rep parties to 'OTH'
others = ['IND', 'NPA', 'PSL', 'LPF', 'GRE', 'ECO', 'CPF']

df['Party'] = df['Party'].replace(others, 'OTH')

In [17]:
df['Party'].value_counts()

DEM    8148
REP    5305
OTH    2092
Name: Party, dtype: int64

In [18]:
# Group df by precinct
precinct_gb_df = df.groupby(['Precinct', 'Party']).count().unstack()

In [19]:
precinct_gb_df.head()

Unnamed: 0_level_0,VoteDate,VoteDate,VoteDate
Party,DEM,OTH,REP
Precinct,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
101,258.0,36.0,46.0
102,134.0,15.0,13.0
103,43.0,9.0,35.0
104,159.0,24.0,18.0
105,255.0,35.0,38.0


In [20]:
# Removing MultiIndex and converting precincts to a column
precinct_gb_df.columns = precinct_gb_df.columns.get_level_values(1)

precinct_gb_df.reset_index(level=0, inplace=True)

In [21]:
precinct_gb_df.isnull().sum()

Party
Precinct    0
DEM         2
OTH         2
REP         1
dtype: int64

In [22]:
null_df = precinct_gb_df[precinct_gb_df.isnull().any(axis=1)]

In [23]:
null_df

Party,Precinct,DEM,OTH,REP
16,117,82.0,4.0,
21,122,4.0,,2.0
56,165,,,2.0
67,217,,1.0,1.0


In [24]:
# Impute 0 for null values
precinct_gb_df.fillna(0, inplace=True)

In [25]:
precinct_gb_df.isnull().sum()

Party
Precinct    0
DEM         0
OTH         0
REP         0
dtype: int64

In [26]:
# Convert counts to integers
precinct_gb_df = precinct_gb_df.astype(int)

In [27]:
precinct_gb_df.head()

Party,Precinct,DEM,OTH,REP
0,101,258,36,46
1,102,134,15,13
2,103,43,9,35
3,104,159,24,18
4,105,255,35,38


In [28]:
# Reorder columns
precinct_gb_df = precinct_gb_df[['Precinct', 'DEM', 'REP', 'OTH']]

In [29]:
# Create total TO column
precinct_gb_df['Total_TO'] = precinct_gb_df['DEM'] + precinct_gb_df['REP'] + precinct_gb_df['OTH']

# Create columns for percent Dem/Rep/Oth
precinct_gb_df['Dem_pct'] = round(precinct_gb_df['DEM'] / precinct_gb_df['Total_TO'], 4).copy()
precinct_gb_df['Rep_pct'] = round(precinct_gb_df['REP'] / precinct_gb_df['Total_TO'], 4).copy()
precinct_gb_df['Oth_pct'] = round(precinct_gb_df['OTH'] / precinct_gb_df['Total_TO'], 4).copy()

In [30]:
precinct_gb_df

Party,Precinct,DEM,REP,OTH,Total_TO,Dem_pct,Rep_pct,Oth_pct
0,101,258,46,36,340,0.7588,0.1353,0.1059
1,102,134,13,15,162,0.8272,0.0802,0.0926
2,103,43,35,9,87,0.4943,0.4023,0.1034
3,104,159,18,24,201,0.7910,0.0896,0.1194
4,105,255,38,35,328,0.7774,0.1159,0.1067
...,...,...,...,...,...,...,...,...
86,237,3,3,4,10,0.3000,0.3000,0.4000
87,239,121,111,46,278,0.4353,0.3993,0.1655
88,240,44,86,13,143,0.3077,0.6014,0.0909
89,241,86,111,43,240,0.3583,0.4625,0.1792


In [31]:
precinct_gb_df.to_csv('../data/mp_eday_2021_processed/CLEAN_voted_6PM_08-24-21.csv', index=False)

---
---