In [1]:
import numpy as np
import pandas as pd
from modules import *

import os #Used when reading/writing csv files programatically

In [2]:
fec_folder_path = '../data/FEC/'

fec_files = [file for file in os.listdir(fec_folder_path) if os.path.isfile(os.path.join(fec_folder_path, file))]

In [3]:
# Empty list to hold FEC files
FEC_files = []

for i in fec_files: # Call item in the file list
    
    # Read each file from the FEC file list
    file = pd.read_csv(fr'{fec_folder_path}{i}', index_col=0)
    
    
    # Generate a name for each dataframe based on the filename without the file extension
    name = f'{i}' 
    name = name[:-4] 
    
    # Assign the dataframe to the variable name
    globals()[name] = file # from the documentation: 'the globals() function is a built-in function that returns a dictionary representing the current global symbol table' only half understand this, but it works (#programming)
    
    # Append both to the empty list
    FEC_files.append(file)

In [4]:
elec_folder_path = '../data/raw_elec_totals'

IL_files = [file for file in os.listdir(elec_folder_path) if os.path.isfile(os.path.join(elec_folder_path, file)) and file.startswith('IL')]

In [5]:
# Create empty lists to hold dataframes and their names
formatted_IL = []
IL_names = []

In [6]:
for i in IL_files: # Call item in the file list
    file = pd.read_excel(f'{elec_folder_path}/{i}', sheet_name='TotalsByCounty')
    
    # Get only data for congressional races
    file = file[file['OfficeName'].str.contains('congress', case=False, na=False)]
    
    
    # Apply IL formatting function
    formatted = format_il(file)

    
    
    # Generate a name for each dataframe based on the filename without the file extension
    name = f'{i}' 
    name = name[:-5]+'_f' 
    
    # Assign the dataframe to the variable name
    globals()[name] = formatted # from the documentation: 'the globals() function is a built-in function that returns a dictionary representing the current global symbol table' only half understand this, but it works (#programming)
    
    # Append both to the empty list creating a list of names and corresponding dataframes
    formatted_IL.append(formatted)
    IL_names.append(name)


  warn("""Cannot parse header or footer so it will be ignored""")
  warn("""Cannot parse header or footer so it will be ignored""")
  warn("""Cannot parse header or footer so it will be ignored""")
  warn("""Cannot parse header or footer so it will be ignored""")


In [15]:
formatted_IL[0].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 102 entries, 0 to 101
Data columns (total 54 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   County             102 non-null    object 
 1   donald peloquin    102 non-null    float64
 2   bobby rush         102 non-null    float64
 3   john hawkins       102 non-null    float64
 4   brian woodworth    102 non-null    float64
 5   jesse jackson      102 non-null    float64
 6   marcus lewis       102 non-null    float64
 7   anthony williams   102 non-null    float64
 8   richard grabowski  102 non-null    float64
 9   daniel lipinski    102 non-null    float64
 10  laura anderson     102 non-null    float64
 11  luis gutierrez     102 non-null    float64
 12  hector concepcion  102 non-null    float64
 13  ymelda viramontes  102 non-null    float64
 14  dan schmitt        102 non-null    float64
 15  mike quigley       102 non-null    float64
 16  nancy wade         102 non

In [13]:
FEC_files[0]

Unnamed: 0_level_0,D,CANDIDATE NAME (First),CANDIDATE NAME (Last),CANDIDATE NAME(f),CANDIDATE NAME,PARTY,(I),GENERAL VOTES
STATE ABBREVIATION,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AL,01,Jo,Bonner,jo bonner,"Bonner, Jo",R,1,196374
AL,02,Martha,Roby,martha roby,"Roby, Martha",R,1,180591
AL,02,Therese,Ford,therese ford,"Ford, Therese",D,0,103092
AL,03,Mike,Rogers,mike rogers,"Rogers, Mike",R,1,175306
AL,03,John Andrew,Harris,john harris,"Harris, John Andrew",D,0,98141
...,...,...,...,...,...,...,...,...
WY,00,Daniel Clyde,Cummings,daniel cummings,"Cummings, Daniel Clyde",OTHER,0,4963
WY,00,Don,Wills,don wills,"Wills, Don",OTHER,0,3775
WY,S,John,Barrasso,john barrasso,"Barrasso, John",R,1,185250
WY,S,Tim,Chesnut,tim chesnut,"Chesnut, Tim",D,0,53019


In [7]:
# Create zipped list of formatted State Election data, FEC list of candidates and parties, and the filenames found in raw_elec data
zipped_IL_FEC = zip(formatted_IL, FEC_files, IL_names)

In [14]:
# Further process and transform election data, grouping vote totals by party and incumbancy
# Allows analysis on these two metrics
for i, j, k in zipped_IL_FEC:
    
    # Joins FEC and State data for each year, produces list of counties as well
    # If an error is generated here, there is likely a mismatch between the counties in these files
    formatted_IL_FEC, counties = state_join_FEC(i,j)
    transformed_data = state_trans(formatted_IL_FEC, counties)
    
    # Writes the transformed data to a .csv file whose name references the original filename
    transformed_data.to_csv(fr"../data/formatted_house_totals/{k[:7]}.csv", index=False)
    



In [9]:
formatted_IL_FEC[0]

0      6986.0
1     26133.0
2         0.0
3         0.0
4         0.0
5         0.0
6         0.0
7         0.0
8         0.0
9         0.0
10        0.0
11        0.0
12        0.0
13        0.0
14        0.0
15        0.0
16        0.0
17        0.0
18        0.0
19        0.0
20        0.0
21        0.0
22        0.0
23        0.0
24        0.0
25        0.0
26        0.0
27        0.0
28        0.0
29        0.0
30        0.0
31        0.0
32        0.0
33        0.0
34        0.0
35        0.0
36        0.0
37        0.0
38        0.0
39        0.0
40        0.0
41        0.0
42        0.0
43        0.0
44        0.0
45        0.0
Name: 0, dtype: float64

In [11]:
transformed_data

index,County,D0,D1,OTHER0,R0,R1
0,adams,6986.0,0.0,0.0,0.0,26133.0
1,brown,384.0,0.0,0.0,0.0,2103.0
2,cass,1301.0,0.0,0.0,0.0,3903.0
3,hancock,1946.0,0.0,0.0,0.0,7230.0
4,logan,3037.0,0.0,0.0,0.0,10124.0
...,...,...,...,...,...,...
97,kane,0.0,129538.0,7141.0,91192.0,0.0
98,lake,0.0,198211.0,539.0,131474.0,0.0
99,mchenry,0.0,77396.0,1203.0,83093.0,0.0
100,kankakee,0.0,20703.0,0.0,28102.0,0.0
