In [17]:
import pandas as pd
import numpy as np
import re
from us import states
from matplotlib import pyplot as plt
from utils import get_names2abbrs_dict

In [9]:
def lineplot(x_data, y_data, x_label="", y_label="", title=""):
    # Create the plot object
    _, ax = plt.subplots()

    # Plot the best fit line, set the linewidth (lw), color and
    # transparency (alpha) of the line
    ax.plot(x_data, y_data, lw = 2, color = '#539caf', alpha = 1)

    # Label the axes and provide a title
    ax.set_title(title)
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)

In [15]:
data = pd.read_csv('election_data/all-state-changes.csv')

In [34]:
def convert_state(line):
    match = re.search(r'^([\WA-Za-z]+)\ \(EV\:\ [\d]+\)$', line)
    if match:
        return get_names2abbrs_dict()[match.groups(1)[0]]
    else:
        return line
    
def simplify(in_df):
    df = in_df.copy()
    df['state_po'] = list(map(convert_state, df['state']))
    return df

In [59]:
df = simplify(data)

In [70]:
def get_final_vote_share(in_df):
    df = in_df.copy()
    states = set(df['state_po'])
    state_dict = dict()
    for state in states:
        final = df[df['state_po'] == state].sort_values(by='timestamp').iloc[-1]
        assert final['leading_candidate_name'] in ['Trump', 'Biden']
        assert final['trailing_candidate_name'] in ['Trump', 'Biden']
        if final['leading_candidate_name'] == 'Trump':
            r_votes = final['leading_candidate_votes'].item()
            d_votes = final['trailing_candidate_votes'].item()
            assert r_votes > d_votes
        else:
            d_votes = final['leading_candidate_votes'].item()
            r_votes = final['trailing_candidate_votes'].item()
            assert d_votes > r_votes
        total_votes = r_votes + d_votes
        state_dict[state] = {
            'r_vote_share': r_votes, 'd_vote_share': d_votes,
            'total_votes': total_votes, 'd_prob': d_votes/total_votes
        }
    return state_dict

In [77]:
final_results = pd.DataFrame.from_dict(get_final_vote_share(df)).T.reset_index().rename(columns={'index': 'state_po'})

In [78]:
final_results.to_csv('election_data/final_results_by_state.csv', index=False)