## Alabama 2022 Primary Election Returns

### Georgia 2022 Primary Election Returns

### Sections
- <a href="#ETL">Cleaning Precinct-Level Election Results</a><br>
- <a href="#check">Vote Totals Checks</a><br>
- <a href ="#discrepancies"> Examine/Fix Anomalies <a/><br>
- <a href="#readme">Creating README</a><br>
- <a href="#exp">Exporting Cleaned Precinct-Level Dataset</a><br>

#### Sources

- [Alabama Primary Election Results, Precint Level](https://www.sos.alabama.gov/sites/default/files/election-data/2022-06/2022%20Primary%20Precinct%20Results.zip)
- [Secretary of State Certified Results, Democratic Party](https://www.sos.alabama.gov/sites/default/files/election-2022/AL%20Democratic%20Party%202022%20Primary%20Results.xlsx)
- [Secretary of State Certified Results, Republican Party](https://www.sos.alabama.gov/sites/default/files/election-2022/AL%20Republican%20Party%202022%20Primary%20Results%20Official.xlsx)

In [2]:
import geopandas as gp
import pandas as pd
import os
import numpy as np
import re
from collections import Counter
import AL_22_helper_functions as hlp

<p><a name="ETL"></a></p>

### Cleaning Precinct Level Election Returns

Load-In + Clean Election Results

In [25]:
ph_county_list = []
ar_county_list = []
clean_index = []
index_issue = []
files = os.listdir('./raw-from-source/2022 Primary Precinct Results/')

for idx, file in enumerate(files):
    #Load county files
    temp = pd.read_excel('./raw-from-source/2022 Primary Precinct Results/' + file)
    
    # Get the county name, clean "StClair" to match pattern
    county_name = file.split("-")[-1][0:-4]
    if county_name == "StClair":
        county_name = "St. Clair"
    ar_county_list.append(county_name)
        
    # Clean the party name
    temp["Party"] = temp["Party"].str.strip()
    temp["Party"] = temp["Party"].fillna("")
    
    # Create a column to pivot on
    temp["pivot_col"] = temp["Contest Title"].str.strip()+"-:-"+temp["Candidate"].str.strip()
    temp["pivot_col"] = np.where(temp["Party"]=="",temp["pivot_col"],temp["pivot_col"]+"-:-"+temp["Party"].str.strip())
    
    # Drop columns that are no longer needed
    temp.drop(["Contest Title", "Party", "Candidate"], axis = 1, inplace = True)
    
    # Add the county name to the precinct
    rename_dict = {i:i+"-:-"+county_name for i in temp.columns if i != "pivot_col"}
    temp.rename(columns = rename_dict, inplace = True)
    
    # Transpose the dataframe
    temp_transpose = temp.set_index("pivot_col").T
    temp_transpose["County"] = county_name
    temp_transpose.reset_index(inplace = True, drop = False)
    temp_transpose["County"] = county_name
    
    #Make sure cols and indexes unique
    if temp_transpose.columns.nunique() == len(temp_transpose.columns) and temp_transpose.index.is_unique:
        clean_index.append(county_name)
    else:
        index_issue.append(str(county_name) + ' ' + str(idx))
    
#     #Test to make sure all the indexes are unique
#     for val in temp_transpose.index:
#         index_testing.append(val)
#         index_df = pd.DataFrame(index_testing)
        

#     # Add to the list of counties
#     ph_county_list.append(temp_transpose)
    
#     # Concatenate into one file
#     comb = pd.concat(ph_county_list, axis = 0)

In [5]:
def check_dups(index, n):
    s = set(index)
    if len(s) != len(index):
        print(f'df_list[{n}]:', index, end='\n\n')

for n, df in enumerate(ph_county_list):
    check_dups(df.columns, n)

df_list[44]: Index(['index', 'REGISTERED VOTERS - TOTAL-:-Registered Voters - Total',
       'BALLOTS CAST - TOTAL-:-Ballots Cast - Total',
       'BALLOTS CAST - DEMOCRAT-:-Ballots Cast - Alabama Democratic P-:-DEM',
       'BALLOTS CAST - REPUBLICAN-:-Ballots Cast - Alabama Republican P-:-REP',
       'BALLOTS CAST - NON-PARTISAN-:-Ballots Cast - Nonpartisan',
       'BALLOTS CAST - BLANK-:-Ballots Cast - Blank',
       'UNITED STATES SENATOR-:-Will Boyd-:-DEM',
       'UNITED STATES SENATOR-:-Lillie Boddie-:-REP',
       'UNITED STATES SENATOR-:-Brandaun Dean-:-DEM',
       ...
       'MADISON COUNTY REP EXEC COMM DIST 9 PL 9                TRICT 11-:-Elizabeth Black-:-REP',
       'MADISON COUNTY REP EXEC COMM DIST 9 PL 9                TRICT 11-:-Charles Coats-:-REP',
       'MADISON COUNTY REP EXEC COMM DIST 9 PL 9                TRICT 11-:-William Cody Garton-:-REP',
       'MADISON COUNTY REP EXEC COMM DIST 9 PL 9                TRICT 11-:-Over Votes-:-REP',
       'MADISON COU

In [26]:
len(clean_index)

66

In [27]:
index_issue

['Madison 44']

In [30]:
files[44]

'2022-Primary-Madison.xls'

In [31]:
issue_file = files[44]

In [33]:
temp = pd.read_excel('./raw-from-source/2022 Primary Precinct Results/' + issue_file)

In [35]:
issue_file = files[44]
temp = pd.read_excel('./raw-from-source/2022 Primary Precinct Results/' + issue_file)
    
# Get the county name, clean "StClair" to match pattern
county_name = file.split("-")[-1][0:-4]
ar_county_list.append(county_name)
        
# Clean the party name
temp["Party"] = temp["Party"].str.strip()
temp["Party"] = temp["Party"].fillna("")
    
# Create a column to pivot on
temp["pivot_col"] = temp["Contest Title"].str.strip()+"-:-"+temp["Candidate"].str.strip()
temp["pivot_col"] = np.where(temp["Party"]=="",temp["pivot_col"],temp["pivot_col"]+"-:-"+temp["Party"].str.strip())
    
# Drop columns that are no longer needed
temp.drop(["Contest Title", "Party", "Candidate"], axis = 1, inplace = True)
    
# Add the county name to the precinct
rename_dict = {i:i+"-:-"+county_name for i in temp.columns if i != "pivot_col"}
temp.rename(columns = rename_dict, inplace = True)
    
# Transpose the dataframe
temp_transpose = temp.set_index("pivot_col").T
temp_transpose["County"] = county_name
temp_transpose.reset_index(inplace = True, drop = False)
temp_transpose["County"] = county_name
    
#Make sure cols and indexes unique
print(temp_transpose.columns.nunique() == len(temp_transpose.columns))
print(temp_transpose.index.is_unique)
    

False
True


In [39]:
col_dups = pd.DataFrame(temp_transpose.columns.to_list())

In [43]:
col_dups.columns = ['cols']

In [44]:
col_dups['cols'].value_counts()

-:-Over Votes-:-DEM                                                                     4
-:-Under Votes-:-DEM                                                                    4
STATE DEMOCRATIC EXECUTIVE COMMITTEE (MALE), DISTRICT 53-:-Over Votes-:-DEM             1
MADISON COUNTY DEM EXEC COMM (F). DIST 6-:-Laura Hall-:-DEM                             1
STATE REPUBLICAN EXECUTIVE COMMITTEE MADISON COUNTY, PLACE 3-:-Tobias Vogt-:-REP        1
                                                                                       ..
MADISON COUNTY REP EXEC COMM DIST 9 PL 9                TRICT 11-:-Under Votes-:-REP    1
PROPOSED STATEWIDE AMENDMENT NUMBER ONE (1)-:-Yes                                       1
GOVERNOR-:-Under Votes-:-REP                                                            1
GOVERNOR-:-Lew Burdette-:-REP                                                           1
STATE REPUBLICAN EXECUTIVE COMMITTEE MADISON COUNTY, DISTRICT 1-:-Over Votes-:-REP      1
Name: cols

In [36]:
temp_transpose.head()

pivot_col,index,REGISTERED VOTERS - TOTAL-:-Registered Voters - Total,BALLOTS CAST - TOTAL-:-Ballots Cast - Total,BALLOTS CAST - DEMOCRAT-:-Ballots Cast - Alabama Democratic P-:-DEM,BALLOTS CAST - REPUBLICAN-:-Ballots Cast - Alabama Republican P-:-REP,BALLOTS CAST - NON-PARTISAN-:-Ballots Cast - Nonpartisan,BALLOTS CAST - BLANK-:-Ballots Cast - Blank,UNITED STATES SENATOR-:-Will Boyd-:-DEM,UNITED STATES SENATOR-:-Lillie Boddie-:-REP,UNITED STATES SENATOR-:-Brandaun Dean-:-DEM,...,MADISON COUNTY REP EXEC COMM DIST 9 PL 9 TRICT 11-:-Elizabeth Black-:-REP,MADISON COUNTY REP EXEC COMM DIST 9 PL 9 TRICT 11-:-Charles Coats-:-REP,MADISON COUNTY REP EXEC COMM DIST 9 PL 9 TRICT 11-:-William Cody Garton-:-REP,MADISON COUNTY REP EXEC COMM DIST 9 PL 9 TRICT 11-:-Over Votes-:-REP,MADISON COUNTY REP EXEC COMM DIST 9 PL 9 TRICT 11-:-Under Votes-:-REP,MADISON COUNTY REP EXEC COMM DIST 11 PL 2 TRICT 11-:-Heath B. Jones-:-REP,MADISON COUNTY REP EXEC COMM DIST 11 PL 2 TRICT 11-:-Stella Stooksbury-:-REP,MADISON COUNTY REP EXEC COMM DIST 11 PL 2 TRICT 11-:-Over Votes-:-REP,MADISON COUNTY REP EXEC COMM DIST 11 PL 2 TRICT 11-:-Under Votes-:-REP,County
0,A & M UNIVERSITY NEW GYM-:-Winston,5742.0,148.0,102.0,46.0,0.0,0.0,70.0,0.0,13.0,...,,,,,,,,,,Winston
1,ABSENTEE-:-Winston,0.0,1816.0,356.0,1456.0,4.0,0.0,211.0,7.0,67.0,...,45.0,29.0,21.0,0.0,64.0,19.0,6.0,0.0,20.0,Winston
2,ALDERSGATE UMC-:-Winston,4962.0,1432.0,136.0,1295.0,1.0,0.0,69.0,6.0,43.0,...,,,,,,,,,,Winston
3,ALL NATIONS-:-Winston,5571.0,549.0,141.0,406.0,2.0,0.0,84.0,1.0,29.0,...,,,,,,,,,,Winston
4,BOB HARRISON SR_ CTR_-:-Winston,3451.0,629.0,531.0,98.0,0.0,0.0,386.0,1.0,57.0,...,,,,,,,,,,Winston


In [11]:
len(ar_county_list)

67

In [9]:
len(ph_county_list)

0

In [None]:
comb.shape

In [None]:
comb.head()

In [None]:
comb['County']

In [None]:
temp_transpose.reset_index(inplace = True, drop = False)

In [None]:
index_df.columns = ['test']

In [None]:
index_df.shape

In [None]:
index_df.head()

In [None]:
index_df['test'].value_counts()

<p><a name="check"></a></p>

### Vote Totals Check

<p><a name="discrepancies"></a></p>

### Checks

<p><a name="readme"></a></p>

### Creating README

<p><a name="exp"></a></p>

### Exporting Cleaned Precinct-Level Dataset