In [12]:
def trim_party(data,delimiter='('):
    """this function will remove party designations for candidate names with the format used by the OH SoS, unless a different delimiter is called
    can also be used to split and return just first names from first name columns with middle names"""
    data_split = [cand.split(delimiter) for cand in data]
    cand_name = [cand[0] for cand in data_split]
    cand_name = [cand.strip() for cand in cand_name]
    return cand_name

def two_party(data):
    """this will reduce the parties to D for Democrat, R for Republican or 'Other' for any 3rd parties
    There is a joke here somewhere about 'this is how the political system actually works'"""
    if data['PARTY'] in ['R', 'D']:
        return data['PARTY']
    else:
        return "OTHER"
    
def remove_keyword(data, keyword):
    trunk = []

    for i in data:
        if keyword in i:
            # Remove the keyword, strip, and add to the new list
            mod = i.replace(keyword, '').strip()
        else:
            # If the keyword is not present, keep the original entry
            mod = i

        trunk.append(mod)

    return trunk

def remove_middle_name(data):
    """this will return the start and end of a split item,
    built to remove middle names and titles from full name columns """
    no_middle = []
    for i in data:
        if len(i.split()) > 1:
            if i.split()[-1][-1] == '.':
                no_middle.append(i.split()[0] + ' ' + i.split()[-2])
            elif i.split()[-1] == 'Jr':
                no_middle.append(i.split()[0] + ' ' + i.split()[-2])
            elif i.split()[-1] == 'Sr':
                no_middle.append(i.split()[0] + ' ' + i.split()[-2])
            else:
                no_middle.append(i.split()[0] + ' ' + i.split()[-1])
        else:
            no_middle.append(i)
    return no_middle

def format_il(data):
    """
    This function is built to format election data 
    as recorded by the Illinois State Board of Elections
    Illinois produces records for the election as a whole rather than for individual offices
    """
    
    # Format names using 'trim party' function to remove middle names and titles, then concatinate so name matches FEC data
    data['CanFirstName'] = trim_party(data['CanFirstName'],' ') #removes middle names
    data['CanLastName'] = trim_party(data['CanLastName'], ' ') #removes titles appended to last names
    data['Candidate Name(f)'] = data['CanFirstName']+' '+data['CanLastName']
    #format names as lower-case
    data['Candidate Name(f)'] = data['Candidate Name(f)'].astype(str).str.lower()
    
    # Create list of candidates
    cand_list = data['Candidate Name(f)'].unique()
    
    #reduce candidate list to just names, if any name-final punctuation remains after removing titles
    cand_list = [''.join(char for char in i if char.isalpha() or char.isspace()) for i in cand_list] 

    # Since all races appear in a single sheet
    # Create tables for each candidate
    cand_tables = {}
    for i in cand_list:
        candidate_df = data[data['Candidate Name(f)'] == i][['County', 'Votes']]
        candidate_df = candidate_df.groupby('County').sum().reset_index()
        cand_tables[i] = candidate_df

    # Merge tables
    merged_df = cand_tables[cand_list[0]]
    for i in cand_list[1:]:
        merged_df = pd.merge(merged_df, cand_tables[i], on='County', how='outer', suffixes=('_' + i, ''))

    # Rename columns
    merged_df.columns = ['County'] + list(cand_list)

    # If candidate received zero votes, fill NaN
    merged_df = merged_df.fillna(0)
    return merged_df

def format_OH(data):
    col_dic = {'County Name':'County'} #column names to reformat
    bad_counties = ['Total','Percentage'] #rows with totals rather than county data
    
    #remove general information about the election, we only want the details about each canddidate, OH marks candidates by party or * if write-in
    candidates = [i for i in data.columns if i.endswith(')') or i.endswith('*')] #gets list of candidates
    data = data.rename(columns=col_dic) 
    
    #Add the renamed 'County' column (always 1st column) to our list of candidates
    candidates.insert(0, data.columns[0])
    
    #use defined "bad terms" to remove unneeded information from the table
    data = data[~data['County'].isin(bad_counties)].copy()
    
    #future cases may want to split the flow here, to evauluate other races than House races
    data_copy = data[candidates].copy()
    
     #apply trim_party to get just candidate names w/o party designation
    data_copy.columns = trim_party(data_copy.columns)
    
    #ensure all column names will be compatable with the FEC data for future mergers
    ###
    ### NOTE - this step will be complicated for states that have candidates with the same first and last name running - edge cases, but must be accounted for in the future
    ###
    data_copy.columns = remove_middle_name(list(data_copy.columns))
    #OH reports by precinct, we only need data by 'County'
    data_copy = data_copy.groupby('County').sum().reset_index()
    return data_copy

def OH_join_FEC(data, fec_data):
    
    #for OH, first column is always County names
    county_col = data.columns[0]
    
    #county names will be reinserted later for the merger with IRS data
    counties = data[county_col].tolist()
    data_t = data.drop(county_col, axis=1).copy()
    
    # Transpose the dataframe so that our columns are the county vote totals and candidates are rows
    # This is done to aid the transformation and grouping of candidates by party
    data_t=data_t.transpose()
    cand_list = list(data_t.index)
    
    # Render candidate names in lowercase to match FEC data
    cand_list = [i.lower() for i in cand_list]
    data_t.index = cand_list
    
    # Merge FEC data, associating each candidate with their party and incumbancy
    data_t = pd.merge(data_t, fec_data, left_index=True, right_on='CANDIDATE NAME(f)').reset_index(drop=True)
    
    # return dataframe of candidates and list of counties
    return data_t, counties

def OH_trans(data, counties):
    
    # Group candidates by party and incumbancy
    grouped_data = data.groupby(['PARTY','(I)']).sum()
    
    # Keep only data from counties (these columns will be numeric, since they were named for the indices assigned by the OH_join_FEC function)
    drop_cols = [i for i in grouped_data.columns.tolist() if str(i).isnumeric()]
    fit_data = grouped_data[drop_cols]
    
    # Flatted the resulting multi-indexed dataframe
    result_data = fit_data.apply(lambda x: x.droplevel(1).T.reset_index(drop=True)).reset_index(drop=True)
    
    # Create a column based on the multi-indicies
    result_data['index'] = grouped_data.index.get_level_values('PARTY')+grouped_data.index.get_level_values('(I)').astype(str)
    
    # Transpose again, resetting the format to columns of candidates (reduced to party) and rows corresponding to counties
    reset_data = result_data.T
    reset_data.columns = reset_data.iloc[-1]
    reset_data = reset_data[:-1]
    cols = rest_data.columns
    cols.insert(0,'County')
    rest_data['County'] = counties
    return reset_data