In [None]:
import numpy as np
import pandas as pd
from modules import *

In [None]:
import os #Used when reading/writing csv files programatically

In [None]:
fed_folder_path = '../data/FEC/'

fec_files = [file for file in os.listdir(fed_folder_path) if os.path.isfile(os.path.join(fed_folder_path, file))]

In [None]:

# Empty list to hold FEC files
FEC_files = []

for i in fec_files: # Call item in the file list
    
    # Read each file from the FEC file list
    file = pd.read_csv(fr'{fed_folder_path}{i}', index_col=0)
    
    
    # Generate a name for each dataframe based on the filename without the file extension
    name = f'{i}' 
    name = name[:-4] 
    
    # Assign the dataframe to the variable name
    globals()[name] = file # from the documentation: 'the globals() function is a built-in function that returns a dictionary representing the current global symbol table' only half understand this, but it works (#programming)
    
    # Append both to the empty list
    FEC_files.append(file)
    

In [None]:
elec_folder_path = '../data/raw_elec_totals'

OH_files = [file for file in os.listdir(elec_folder_path) if os.path.isfile(os.path.join(elec_folder_path, file)) and file.startswith('OH')]

In [None]:
#create empty list to hold OH formatted dataframes
formatted_OH = [] 
OH_names = []

In [None]:
for i in OH_files: # Call item in the file list
    file = pd.read_excel(f'{elec_folder_path}/{i}', sheet_name='U.S. Congress', header=1) #minor pre-processing was required due to inconsistencies in OH SoS formatting
    
    # Apply the Ohio formatting function
    formated = format_OH(file)
    
    # Generate a name for each dataframe based on the filename without the file extension
    name = f'{i}' 
    name = name[:-6]+'_f' 
    
    # Assign the dataframe to the variable name
    globals()[name] = formated # from the documentation: 'the globals() function is a built-in function that returns a dictionary representing the current global symbol table' only half understand this, but it works (#programming)
    
    # Append both to the empty list creating a list of names and corresponding dataframes
    formatted_OH.append(formated)
    OH_names.append(name)

In [None]:
# Create zipped list of formatted State Election data, FEC list of candidates and parties, and the filenames found in raw_elec data
zipped_OH_FEC = zip(formatted_OH, FEC_files, OH_names)

In [None]:
# Further process and transform election data, grouping vote totals by party and incumbancy
# Allows analysis on these two metrics
for i, j, k in zipped_OH_FEC:
    
    # Joins FEC and State data for each year, produces list of counties as well
    # If an error is generated here, there is likely a mismatch between the counties in these files
    formatted_OH_FEC, counties = state_join_FEC(i,j)
    transformed_data = state_trans(formatted_OH_FEC, counties)
    
    # Writes the transformed data to a .csv file whose name references the original filename
    transformed_data.to_csv(fr"../data/formatted_house_totals/{k[:7]}.csv", index=False)
    

In [None]:
# # Get Ohio data taken from OH SoS page
# ohio_2012_path = '../data/raw_elec_totals/ohio 2012precinct.xlsx'
# excel_2012 = pd.ExcelFile(ohio_2012_path)
# sheet_names_2012 = excel_2012.sheet_names

# ohio_2014_path = '../data/raw_elec_totals/ohio 2014precinct.xlsx'
# excel_2014 = pd.ExcelFile(ohio_2014_path)
# sheet_names_2014 = excel_2014.sheet_names

# ohio_2016_path = '../data/raw_elec_totals/ohio 2016precinct.xlsx'
# excel_2016 = pd.ExcelFile(ohio_2016_path)
# sheet_names_2016 = excel_2016.sheet_names

# ohio_2018_path = '../data/raw_elec_totals/ohio 2018precinct.xlsx'
# excel_2018 = pd.ExcelFile(ohio_2018_path)
# sheet_names_2018 = excel_2018.sheet_names

# ohio_2020_path = '../data/raw_elec_totals/ohio 2020precinct.xlsx'
# excel_2020 = pd.ExcelFile(ohio_2020_path)
# sheet_names_2020 = excel_2020.sheet_names

In [6]:
sheet_names = ['cong_OH_'+ str(i) for i in range(2012,2021) if i%2 == 0] #this was SLIGHTLY less of a headache than writing out the individual names

In [7]:
for i in sheet_names:
    print(i)

cong_OH_2012
cong_OH_2014
cong_OH_2016
cong_OH_2018
cong_OH_2020


In [8]:
sheet_names[0]

'cong_OH_2012'

In [9]:
# #Will do pre-processing to aid in automating this

# cong_OH_2012 = pd.read_excel(ohio_2012_path, sheet_name='U.S. Congress', header=1)
# cong_OH_2014 = pd.read_excel(ohio_2014_path, sheet_name='U.S. Congress', header=1)
# cong_OH_2016 = pd.read_excel(ohio_2016_path, sheet_name='U.S. Congress', header=1)
# cong_OH_2018 = pd.read_excel(ohio_2018_path, sheet_name='U.S. Congress', header=1)
# cong_OH_2020 = pd.read_excel(ohio_2020_path, sheet_name='U.S. Congress', header=1)

In [11]:
OH_files

['OH_2012precinct.xlsx',
 'OH_2014precinct.xlsx',
 'OH_2016precinct.xlsx',
 'OH_2018precinct.xlsx',
 'OH_2020precinct.xlsx']

In [None]:
# data1 = format_OH(data1)
# data2 = format_OH(data2)
# data3 = format_OH(data3)
# data4 = format_OH(data4)
# data5 = format_OH(data5)

In [None]:
# fec_2020 = pd.read_csv(r'data\FEC\candidates_2020.csv', index_col=0)
# fec_2018 = pd.read_csv(r'data\FEC\candidates_2018.csv', index_col=0)
# fec_2016 = pd.read_csv(r'data\FEC\candidates_2016.csv', index_col=0)
# fec_2014 = pd.read_csv(r'data\FEC\candidates_2014.csv', index_col=0)
# fec_2012 = pd.read_csv(r'data\FEC\candidates_2012.csv', index_col=0)

In [None]:
# OH_2012_f, county_2012 = OH_join_FEC2(data1, fec_2012)
# OH_2014_f, county_2012 = OH_join_FEC2(data2, fec_2014)
# OH_2016_f, county_2012 = OH_join_FEC2(data3, fec_2016)
# OH_2018_f, county_2012 = OH_join_FEC2(data4, fec_2018)
# OH_2020_f, county_2012 = OH_join_FEC2(data5, fec_2020)

In [22]:
FEC_files[0]

Unnamed: 0,STATE ABBREVIATION,D,CANDIDATE NAME (First),CANDIDATE NAME (Last),CANDIDATE NAME(f),CANDIDATE NAME,PARTY,(I),GENERAL VOTES
0,AL,01,Jo,Bonner,jo bonner,"Bonner, Jo",R,1,196374
1,AL,02,Martha,Roby,martha roby,"Roby, Martha",R,1,180591
2,AL,02,Therese,Ford,therese ford,"Ford, Therese",D,0,103092
3,AL,03,Mike,Rogers,mike rogers,"Rogers, Mike",R,1,175306
4,AL,03,John Andrew,Harris,john harris,"Harris, John Andrew",D,0,98141
...,...,...,...,...,...,...,...,...,...
1532,WY,00,Daniel Clyde,Cummings,daniel cummings,"Cummings, Daniel Clyde",OTHER,0,4963
1533,WY,00,Don,Wills,don wills,"Wills, Don",OTHER,0,3775
1534,WY,S,John,Barrasso,john barrasso,"Barrasso, John",R,1,185250
1535,WY,S,Tim,Chesnut,tim chesnut,"Chesnut, Tim",D,0,53019


In [26]:
OH_names

['OH_2012precinct_f',
 'OH_2014precinct_f',
 'OH_2016precinct_f',
 'OH_2018precinct_f',
 'OH_2020precinct_f']

In [21]:
formatted_OH[0]

Unnamed: 0,County,Sherrod Brown,Josh Mandel,Jim Berns,Steve Chabot,Jeff Sinnard,Rich Stevenson,William Smith,Brad Wenstrup,Joyce Beatty,...,David Joyce,David Macko,Elaine Mastromatteo,Erick Robinson,Steven Winfield,Aaron Zurbrugg,Pat Lang,Steve Stivers,Jim Renacci,Betty Sutton
0,Adams,3924.0,6410.0,0.0,0.0,0.0,0.0,3900.0,6598.0,0.0,...,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0
1,Allen,17456.0,28817.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0
2,Ashland,8006.0,14260.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0
3,Ashtabula,21973.0,17780.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,17706.0,1781.0,1752.0,0,0,0,0.0,0.0,0.0,0.0
4,Athens,17662.0,8066.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0,0,0,16900.0,7285.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83,Washington,11464.0,15904.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0
84,Wayne,19072.0,28138.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0,0,0,0.0,0.0,29676.0,19881.0
85,Williams,6640.0,9294.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0
86,Wood,31633.0,27905.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0


In [None]:
list(zipped_OH_FEC)[0][1]

In [18]:
OH_names[0][:8]

'OH_2012p'

In [None]:
OH_2012_f, county_2012 = state_join_FEC(data1, fec_2012)
OH_2014_f, county_2012 = state_join_FEC(data2, fec_2014)
OH_2016_f, county_2012 = state_join_FEC(data3, fec_2016)
OH_2018_f, county_2012 = state_join_FEC(data4, fec_2018)
OH_2020_f, county_2012 = state_join_FEC(data5, fec_2020)

In [None]:
# OH_12, county1 = OH_join_FEC(data1, fec_2012)

In [None]:
OH_2012_t = state_trans(OH_2012_f,county_2012)
OH_2014_t = state_trans(OH_2014_f,county_2012)
OH_2016_t = state_trans(OH_2016_f,county_2012)
OH_2018_t = state_trans(OH_2018_f,county_2012)
OH_2020_t = state_trans(OH_2020_f,county_2012)

In [None]:

OH_2012_t.to_csv(r'data\formatted_house_totals\OH_House_12.csv', index=False)
OH_2014_t.to_csv(r'data\formatted_house_totals\OH_House_14.csv', index=False)
OH_2016_t.to_csv(r'data\formatted_house_totals\OH_House_16.csv', index=False)
OH_2018_t.to_csv(r'data\formatted_house_totals\OH_House_18.csv', index=False)
OH_2020_t.to_csv(r'data\formatted_house_totals\OH_House_20.csv', index=False)