In [4]:
#Imports
import pandas as pd
import os

/Users/Darren/Github clones/data-projects/election-results


In [32]:
#List files and keys for years
reg_files = {
    '2018':'https://www.maine.gov/sos/cec/elec/data/data-txt/r-e-active1118.txt' #2018 General
    ,'2016':'https://www.maine.gov/sos/cec/elec/data/r-e-active-11-08-16.txt' #2016 General / Referendum
    ,'2014':'https://www.maine.gov/sos/cec/elec/data/r-e-active-11-4-14.txt' #2014 General
    ,'2012':'https://www.maine.gov/sos/cec/elec/data/r-e-active12.txt' #2012 General
    ,'2010':'https://www.maine.gov/sos/cec/elec/data/20101102r-e-active.txt' #2010 General
    ,'2008':'https://www.maine.gov/sos/cec/elec/data/20081104r-e-active.txt' #2008 General
}

print(reg_files.keys())

dict_keys(['2018', '2016', '2014', '2012', '2010', '2008'])


In [46]:
#Read in files and add keys for election year
df = pd.concat([ pd.read_csv(f,sep='|') for f in reg_files.values() ],axis=0,keys=reg_files.keys()
              ,names=['year','Row ID'])

#Fill nulls with blanks
df=df.fillna('')

##Column transformations
#Shorten to 4 chars
cols = list(df.columns)
cols = [str(x)[:4] for x in cols]
df.columns = cols

#Create dict to remap column names, based on:
#https://www.maine.gov/sos/cec/elec/data/regandenrolllayout.html
col_names = {
    'A':'A'
    ,'AL':'alliance'
    ,'CC':'county_comm'
    ,'CG':'congress_dist'
    ,'COUN':'county'
    ,'D':'democrat'
    ,'G':'green'
    ,'L':'libertarian'
    ,'MUNI':'municipality'
    ,'R':'republican'
    ,'S':'socialist'
    ,'SR':'state_rep'
    ,'SS':'state_sen'
    ,'TOTA':'total_reg'
    ,'U':'unenrolled'
    ,'W/P':'ward_precinct'
}

#Drop unnamed and rename useful ones
#(axis 1 for columns, axis=0 for rows)
df = df.drop(['Unna'],axis=1).rename(columns=col_names)

#Create dict to convert town names to align with shapefile
#Moves areas based on final structure, ie. removes places like
#Bancroft, which was deorganized in 2015
#Left: Secy of State name | Right: MEGIS name

muni_names = {
    'BANCROFT':'BANCROFT TWP'
    ,'CROSS LAKE TWP (T17 R5)':'CROSS LAKE TWP'
    ,'INDIAN TOWNSHIP':'INDIAN TWP RES'
    ,'MADAWASKA LAKE':'MADAWASKA LAKE TWP'
    ,'MADAWASKA LAKE TWP (T16 R4)':'MADAWASKA LAKE TWP'
    ,'OXBOW PLT':'OXBOW TWP'
    ,'PENOBSCOT NATION VOTING DISTRICT':'INDIAN ISLAND'
    ,'PLEASANT POINT VOTING DISTRICT':'PLEASANT POINT'
    ,'ROCKWOOD STRIP TWP':'ROCKWOOD STRIP T1 R1 NBKP'
    ,'SINCLAIR':'SINCLAIR TWP'
    ,'SINCLAIR (T17 R4)':'SINCLAIR TWP'
}

#Replace values in the municipality column
df.municipality.replace(muni_names,value=None,inplace=True)

#Pivot party names to a dimension column
id_vars=['county_comm'
        ,'congress_dist'
        ,'county'
        ,'municipality'
        ,'state_rep'
        ,'state_sen'
        ,'ward_precinct'
        ,'year']

value_vars=['alliance'
           ,'democrat'
           ,'green'
           ,'libertarian'
           ,'republican'
           ,'socialist'
           ,'total_reg'
           ,'unenrolled']

df = pd.melt(df.reset_index(), id_vars=id_vars, value_vars=value_vars)

#Rename pivoted columns
df = df.rename(columns={'variable':'party'
                       ,'value':'voters'})

#Check result
df.head(50)


Unnamed: 0,county_comm,congress_dist,county,municipality,state_rep,state_sen,ward_precinct,year,party,voters
0,5,2,AND,AUBURN,62,20,1-1,2018,alliance,0
1,5,2,AND,AUBURN,64,20,1-1,2018,alliance,0
2,6,2,AND,AUBURN,64,20,1-1,2018,alliance,0
3,5,2,AND,AUBURN,62,20,2-1,2018,alliance,0
4,5,2,AND,AUBURN,64,20,2-1,2018,alliance,0
5,6,2,AND,AUBURN,64,20,2-1,2018,alliance,0
6,5,2,AND,AUBURN,62,20,3-1,2018,alliance,0
7,5,2,AND,AUBURN,63,20,3-1,2018,alliance,0
8,6,2,AND,AUBURN,63,20,3-1,2018,alliance,0
9,6,2,AND,AUBURN,64,20,3-1,2018,alliance,0


In [47]:
#Output to CSV
df.to_csv(cwd+'/me-general-election-results.csv')