In [25]:
import pandas as pd
import numpy as np
import re
from us import states
from matplotlib import pyplot as plt
from utils import get_names2abbrs_dict

In [26]:
def lineplot(x_data, y_data, x_label="", y_label="", title=""):
    # Create the plot object
    _, ax = plt.subplots()

    # Plot the best fit line, set the linewidth (lw), color and
    # transparency (alpha) of the line
    ax.plot(x_data, y_data, lw = 2, color = '#539caf', alpha = 1)

    # Label the axes and provide a title
    ax.set_title(title)
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)

In [27]:
data = pd.read_csv('election_data/all-state-changes.csv')

In [28]:
def convert_state(line):
    match = re.search(r'^([\WA-Za-z]+)\ \(EV\:\ [\d]+\)$', line)
    if match:
        return get_names2abbrs_dict()[match.groups(1)[0]]
    else:
        return line
    
def simplify(in_df):
    df = in_df.copy()
    df['state_po'] = list(map(convert_state, df['state']))
    return df

In [29]:
df = simplify(data)

In [30]:
def get_final_vote_share(in_df, raw):
    df = in_df.copy()
    all_states = set(df['state_po'])
    state_dict = dict()
    for state in all_states:
        final = df[df['state_po'] == state].sort_values(by='timestamp').iloc[-1]
        assert final['leading_candidate_name'] in ['Trump', 'Biden']
        assert final['trailing_candidate_name'] in ['Trump', 'Biden']
        if final['leading_candidate_name'] == 'Trump':
            r_votes = final['leading_candidate_votes'].item()
            d_votes = final['trailing_candidate_votes'].item()
            assert r_votes > d_votes
        else:
            d_votes = final['leading_candidate_votes'].item()
            r_votes = final['trailing_candidate_votes'].item()
            assert d_votes > r_votes
        if raw:
            total_votes = final['total_votes_count']
        else:
            total_votes = r_votes + d_votes
        state_dict[state] = {
            'rep_vote_share': r_votes/total_votes, 'dem_vote_share': d_votes/total_votes,
            'totalvotes': total_votes, 'state': getattr(states, state).name,
        }
    return state_dict

In [34]:
final_results = pd.DataFrame.from_dict(get_final_vote_share(df, False)).T.reset_index().rename(columns={'index': 'state_po'})

In [35]:
final_results.to_csv('election_data/final_results_by_state.csv', index=False)

In [36]:
final_results_raw = pd.DataFrame.from_dict(get_final_vote_share(df, True)).T.reset_index().rename(columns={'index': 'state_po'})

In [37]:
final_results_raw.to_csv('election_data/final_results_raw_by_state.csv', index=False)