In [1]:
"""
Clean the datasets of precinct level results from the Georgia Secretary
of State website. Create a .csv file that will be used to create the maps.

Requirements: 
- Download the 2020 election results with the file Download_Data_Precinct
- Set the correct full paths in the code below
"""

import pandas as pd
import win32com.client as win32
import os
import numpy as np

In [2]:
# As this version of .xls is not supported by pandas, convert all files to .xlsx
directory = r'C:###Jupyter/DownloadedXls/'
for entry in os.scandir(directory):
    fname = entry.path
    if (fname.endswith(".xls")):
        excel = win32.gencache.EnsureDispatch('Excel.Application')
        wb = excel.Workbooks.Open(fname)   
        #FileFormat = 51 is for .xlsx extension
        #FileFormat = 56 is for .xls extension 
        # Add - to make strings work. It is remove in the next snipper of code
        # USE '\' INSTEAD OF '/' IN PATH 
        # https://stackoverflow.com/questions/3730428/why-cant-i-save-as-an-excel-file-from-my-python-code/3730512#3730512
        wb.SaveAs("C:###Jupyter\DownloadedXls\_" + fname.split('/')[6] + "x", FileFormat=51) 
        wb.Close()
        excel.Application.Quit()
        os.remove(entry.path)
    else: 
        print("done")
        break
        
# Delete _ from filenames
directory = r'C:###Jupyter/DownloadedXls/'
for entry in os.scandir(directory):
    fname = entry.path
    print(fname)

done


In [3]:
# Read, clean and join tables

# dfTot is the dataframe to save in the final .csv file
dfTot = pd.DataFrame(columns = [])
directory = r'C:###Jupyter/DownloadedXls/'

# Repeat for each file. There is one file per county
for entry in os.scandir(directory):
    fname = entry.path
    county = fname.split("/")[6].title()
    county = county.replace('.Xlsx', '').replace("-", " ")       

    # Read Presidential elections table
    dfP = pd.read_excel(fname, "2", header=None, skiprows=[0])
    # create column lables https://stackoverflow.com/questions/27420263/pandas-parse-merged-header-columns-from-excel
    # Fill blank cells with nearby values
    dfP.iloc[0] = dfP.iloc[0].fillna(method='ffill')
    dfP.iloc[0] = dfP.iloc[0].fillna('')
    dfP.iloc[0][17] = ''
    # Set headers to joint values of forst two rows: candidate name + numerical value name
    dfP.columns = dfP.iloc[0:2].apply(lambda x: '.'.join([y for y in x if y]), axis=0)
    # Delete first two rows
    dfP = dfP.iloc[2:]
    # Delete total row at the end and all columns but total votes
    dfP.drop(dfP.tail(1).index,inplace=True)
    dfP.drop(dfP.columns[[1,2,3,4,5,7,8,9,10,12,13,14,15]], axis =1, inplace = True)
    # Rename columns
    dfP.columns = ["Precinct", "Donald_J_Trump", "Joe_R_Biden", "Jo_Jorgensen", "Total_Pres"]
    dfP["County"] = county
    dfP["UniqueID"] = dfP["County"] + "," + dfP["Precinct"]
    # Create % values
    dfP["Don_Per"] = (dfP["Donald_J_Trump"] / dfP["Total_Pres"] * 10000).astype(int) / 100
    dfP["Joe_Per"] = (dfP["Joe_R_Biden"] / dfP["Total_Pres"] * 10000).astype(int) / 100  
    dfP["Joe_MOV"] = dfP["Joe_Per"] - dfP["Don_Per"] + 100
    dfP = dfP[["UniqueID", "County", "Precinct", "Donald_J_Trump", "Joe_R_Biden", "Jo_Jorgensen", "Don_Per", "Joe_Per", "Joe_MOV", "Total_Pres"]]

    # Read senate 1 race
    dfS1 = pd.read_excel(fname, "3", header=None, skiprows=[0])
    dfS1.iloc[0] = dfS1.iloc[0].fillna(method='ffill')
    dfS1.iloc[0] = dfS1.iloc[0].fillna('')
    dfS1.iloc[0][17] = ''
    dfS1.columns = dfS1.iloc[0:2].apply(lambda x: '.'.join([y for y in x if y]), axis=0)
    dfS1 = dfS1.iloc[2:]
    dfS1.drop(dfS1.tail(1).index,inplace=True)
    dfS1.drop(dfS1.columns[[0,1,2,3,4,5,7,8,9,10,12,13,14,15,16]], axis =1, inplace = True)
    dfS1.columns = ["David_A_Purdue", "Jon_Ossoff", "Total_Sen1"]
    dfS1["Pur_Per"] = (dfS1["David_A_Purdue"] / dfS1["Total_Sen1"] * 10000).astype(int) / 100
    dfS1["Oss_Per"] = (dfS1["Jon_Ossoff"] / dfS1["Total_Sen1"] * 10000).astype(int) / 100
    dfS1["Oss_MOV"] = dfS1["Oss_Per"] - dfS1["Pur_Per"] + 100
    dfS1 = dfS1[["David_A_Purdue", "Jon_Ossoff", "Pur_Per", "Oss_Per", "Oss_MOV", "Total_Sen1"]]

    # Read senate exceptional race elections table
    dfS2 = pd.read_excel(fname, "4", header=None, skiprows=[0])
    dfS2.iloc[0] = dfS2.iloc[0].fillna(method='ffill')
    dfS2.iloc[0] = dfS2.iloc[0].fillna('')
    dfS2.iloc[0][102] = ''
    dfS2.columns = dfS2.iloc[0:2].apply(lambda x: '.'.join([y for y in x if y]), axis=0)
    dfS2 = dfS2.iloc[2:]
    dfS2.drop(dfS2.tail(1).index,inplace=True)
    # Drop all columns not referring to total votes
    col_names = list(dfS2)
    dfS2.drop(dfS2.columns.difference(['Total'] + [x for x in col_names if '.Total' in x]), axis =1, inplace = True)
    # Summ all total votes of candidates other than the two competing in the Jan 5 runoff
    col_names = list(dfS2)
    col_not_to_sum = ['Loeffler', 'Warnock']
    dfS2["Others"] = dfS2[[x for x in col_names if all(y not in x for y in col_not_to_sum)]].sum(axis=1) - dfS2["Total"]
    # Drop columns of candidates other than the two competing in the Jan 5 runoff
    dfS2.drop(dfS2.columns.difference(['Total', 
                                       'Kelly Loeffler (I) (Rep).Total Votes', 
                                       'Raphael Warnock (Dem).Total Votes',
                                        'Others']), axis =1, inplace = True)
    dfS2.columns = ["Kelly_Loeffler", "Raphael_Warnock", "Total_Sen2", "Others"]
    dfS2["Loe_Per"] = (dfS2["Kelly_Loeffler"] / dfS2["Total_Sen2"] * 10000).astype(int) / 100
    dfS2["War_Per"] = (dfS2["Raphael_Warnock"] / dfS2["Total_Sen2"] * 10000).astype(int) / 100
    dfS2["War_MOV"] = dfS2["War_Per"] - dfS2["Loe_Per"] + 100
    dfS2 = dfS2[["Kelly_Loeffler", "Raphael_Warnock", "Loe_Per", "War_Per", "War_MOV", "Others", "Total_Sen2"]]
    
    
    # Concat the datafram of all races of a column
    dfT=pd.concat([dfP, dfS1, dfS2], axis=1)    
    # Concat county dataframe to final dataframe
    dfTot = pd.concat([dfTot, dfT])
    
    

    
dfTot.head()
dfTot.to_csv("C:###Jupyter/GA_by_precinct.csv",index=False)
print(str(len(dfTot.County.unique())))

159
