In [None]:
# Mock data structure for picklings

elections = []

class Election:
    record = "some/path"
    codes = {
        "office" : False,
        "type" : True,
        "level" : True,
        "state": "Data Structure",
        "locality": "Data Structure",
        "datetime": "Datatime",
        "candidates": 0,
        "choices": 0,
    }
    measures = {
        "ashmans" : 0,
        "hartigan" : 0,
        "conistency" : 0,
    }
    calculations = {
        "mds_ordering" : 0,
        "candidate_names" : 0,
        "round_winners": {},
    }

In [10]:
import pickle

import numpy as np

from scipy.stats import gaussian_kde

from scipy.integrate import trapz


# Specify the filename for the pickle file
pickle_filename = "election_consistency_points.pkl"

# Open the pickle file in read-binary mode and load the dictionary
with open(pickle_filename, 'rb') as f:
    election_consistency_points = pickle.load(f)

print(election_consistency_points)

{'Minneapolis_11072017_BoardofEstimateandTaxation.csv': {1.0510943863794198: 7191, 2.0: 17436, 1.2883207897845648: 11544, 0.26277359659485494: 648, 0.5: 62, 0.3813867982974275: 147, 0.0: 598, 1.1569339914871373: 37}, 'Minneapolis_11072017_ParkBoardAtLarge.csv': {6.014948444934124: 784, 6.017633446019069: 5473, 6.0135427319108015: 1523, 6.0122390951643: 763, 6.012725972271748: 87, 6.0109692062304685: 1078, 6.0124188990330625: 2244, 6.012152526376739: 260, 6.011301662119656: 31, 6.016181111935773: 1029, 6.012179160541379: 3341, 6.013027170312215: 307, 6.013208712072627: 318, 6.013771527612035: 2957, 6.013179648389977: 151, 6.763225084514302: 76, 6.014068967537446: 142, 9.0: 717, 6.013054496103216: 291, 6.012577252309043: 177, 6.014737007213793: 836, 6.016544195456597: 121, 6.012874692234453: 286, 6.013206974180978: 1461, 6.014961287313676: 1396, 6.01338851594139: 172, 6.012635266177618: 142, 6.014386717041098: 329, 6.014918477622663: 329, 6.015311528454948: 167, 6.013899888965074: 79, 6.

In [1]:
def normalize_kde(kde_x, kde_y):
    
    # Normalize KDE
    min_x = min(kde_x)
    max_x = max(kde_x)
    normalized_x = (kde_x - min_x) / (max_x - min_x)
    area = trapz(kde_y, normalized_x)
    normalized_y = kde_y / area
    
    # Check if the normalized area is equal to 1
    new_area = trapz(normalized_y, normalized_x)
    if not np.isclose(new_area, 1, atol=1e-6):
        raise ValueError("Normalized KDE does not have an area equal to 1")
    
    # Calculate area to the left of midpoint for normalized KDE
    midpoint_idx = np.searchsorted(normalized_x, 0.5)
    area_left_original = trapz(normalized_y[:midpoint_idx], normalized_x[:midpoint_idx])
    
    # Calculate area to the left of midpoint for flipped KDE
    flipped_x = 1 - normalized_x
    flipped_y = normalized_y[::-1]
    area_left_flipped = trapz(flipped_y[:midpoint_idx], flipped_x[:midpoint_idx])
    
    # Return the KDE with the larger area to the left of the midpoint
    if area_left_flipped > area_left_original:
        return flipped_x, flipped_y
    else:
        return normalized_x, normalized_y

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [8]:
import pickle

import numpy as np

from scipy.stats import gaussian_kde

from scipy.integrate import trapz


# Specify the filename for the pickle file
pickle_filename = "election_consistency_points.pkl"

# Open the pickle file in read-binary mode and load the dictionary
with open(pickle_filename, 'rb') as f:
    election_consistency_points = pickle.load(f)


def compute_kde(data_points, num_points=1000):
    """
    Compute Kernel Density Estimation (KDE) for given data points.
    
    Parameters:
    - data_points: List or array of data points.
    - num_points: Number of points for the KDE x-axis.
    
    Returns:
    - x_vals: X values for the kernal density estimation.
    - y_vals: Y values (density) for the kernal density estimation.
    """
    
    # Calculate kernel density estimation
    density = gaussian_kde(data_points)
    x_vals = np.linspace(min(data_points), max(data_points), num_points)
    y_vals = density(x_vals)
    
    return x_vals, y_vals


def normalize_kde(kde_x, kde_y):
    """
    Normalize the KDE values:
    1. Scale x-values between 0 and 1.
    2. Ensure the area under the KDE curve is 1.
    
    Parameters:
    - kde_x: X values of the KDE.
    - kde_y: Y values (density) of the KDE.
    
    Returns:
    - normalized_x: Normalized x-values.
    - normalized_y: Normalized y-values.
    """

    # Scale x-values between 0 and 1
    min_x = min(kde_x)
    max_x = max(kde_x)
    normalized_x = (kde_x - min_x) / (max_x - min_x)
    
    # Ensure the area under the KDE curve is 1
    area = trapz(kde_y, normalized_x)
    normalized_y = kde_y / area
    
    # Check if the normalized area is equal to 1
    new_area = trapz(normalized_y, normalized_x)
    if not np.isclose(new_area, 1, atol=1e-6):
        raise ValueError("Normalized KDE does not have an area equal to 1")

    return normalized_x, normalized_y


In [11]:
import pickle
import numpy as np
from scipy.stats import gaussian_kde
from scipy.integrate import trapz

def flip_kde(kde_x, kde_y):
    midpoint = 0.5
    flipped_x = 2 * midpoint - kde_x
    flipped_y = kde_y[::-1]
    return flipped_x, flipped_y

def normalize_and_check_kde(kde_x, kde_y):
    normalized_x, normalized_y = normalize_kde(kde_x, kde_y)
    midpoint = 0.5
    left_area = trapz(normalized_y[normalized_x <= midpoint], normalized_x[normalized_x <= midpoint])
    if left_area < 0.5:
        normalized_x, normalized_y = flip_kde(normalized_x, normalized_y)
    return normalized_x, normalized_y

def convert_histogram_to_data(hist):
    return [key for key, count in hist.items() for _ in range(count)]

def sample_data(data, sample_size=10000):
    if len(data) > sample_size:
        return np.random.choice(data, sample_size, replace=False)
    return data

# Main script
pickle_filename = "election_consistency_points.pkl"
with open(pickle_filename, 'rb') as f:
    election_consistency_points = pickle.load(f)

points, hist = election_consistency_points.items()
points = convert_histogram_to_data(hist)
sampled_points = sample_data(points)

kde_x, kde_y = compute_kde(sampled_points)
normalized_x, normalized_y = normalize_and_check_kde(kde_x, kde_y)

ValueError: too many values to unpack (expected 2)

In [None]:
def plot_kde(kde_x, kde_y, title='Kernel Density Estimation', xlabel='Value', ylabel='Density'):
    """
    Plot the KDE using the provided x and y values.
    
    Parameters:
    - kde_x: X values of the KDE.
    - kde_y: Y values (density) of the KDE.
    - title: Title for the plot.
    - xlabel: Label for x-axis.
    - ylabel: Label for y-axis.
    """
    plt.figure(figsize=(10, 6))
    plt.plot(kde_x, kde_y, label="KDE", color='blue')
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.grid(True)
    plt.legend()
    plt.show()

In [None]:
def average_kdes(election_names, normalized_kde_data):
    """
    Average the KDEs for the provided list of election names.
    
    Parameters:
    - election_names: List of election names.
    - normalized_kde_data: Dictionary containing normalized KDE data for elections.
    
    Returns:
    - averaged_x: Averaged x-values for KDE.
    - averaged_y: Averaged y-values for KDE.
    """
    
    # Check if election_names is not empty
    if not election_names:
        raise ValueError("The list of election names is empty.")
    
    first_election = str(election_names[0])
    
    # Initialize accumulators for KDE values
    sum_x = np.zeros_like(normalized_kde_data[first_election][0])
    sum_y = np.zeros_like(normalized_kde_data[first_election][1])
    
    # Accumulate KDE values
    for election in election_names:
        election_str = str(election)  # Ensure that the key used is a string
        sum_x += normalized_kde_data[election_str][0]
        sum_y += normalized_kde_data[election_str][1]
    
    # Calculate the average
    num_elections = len(election_names)
    averaged_x = sum_x / num_elections
    averaged_y = sum_y / num_elections
    
    return averaged_x, averaged_y

def average_and_plot(election_names, title):
    """
    Average the KDEs for the provided list of election names and plot the result.
    
    Parameters:
    - election_names: List of election names.
    - title: Title for the plot.
    """
    
    averaged_x, averaged_y = average_kdes(election_names, normalized_kde_data)
    plot_kde(averaged_x, averaged_y, title=title)

In [None]:
import numpy as np
import pandas as pd
import pickle
from scipy.stats import gaussian_kde
from scipy.integrate import trapz
import matplotlib.pyplot as plt
import seaborn as sns

# Load the normalized KDE data from the pickle file
with open("election_kde_normalized.pkl", 'rb') as f:
    normalized_kde_data = pickle.load(f)

# Function to plot KDE
def plot_kde(kde_x, kde_y, title='Kernel Density Estimation'):
    plt.figure(figsize=(10, 6))
    plt.plot(kde_x, kde_y, label="KDE", color='blue')
    plt.title(title)
    plt.xlabel('Value')
    plt.ylabel('Density')
    plt.grid(True)
    plt.legend()
    plt.show()

# Function to average KDEs
def average_kdes(election_names, normalized_kde_data):
    sum_x = np.zeros_like(normalized_kde_data[election_names[0]][0])
    sum_y = np.zeros_like(normalized_kde_data[election_names[0]][1])
    for election in election_names:
        sum_x += normalized_kde_data[election][0]
        sum_y += normalized_kde_data[election][1]
    num_elections = len(election_names)
    averaged_x = sum_x / num_elections
    averaged_y = sum_y / num_elections

    # Check if the normalized area is equal to 1
    new_area = trapz(averaged_y, averaged_x)
    if not np.isclose(new_area, 1, atol=1e-6):
        raise ValueError("Averaged KDE does not have an area equal to 1")

    return averaged_x, averaged_y

# Function to average and plot KDEs
def average_and_plot(election_names, title):
    averaged_x, averaged_y = average_kdes(election_names, normalized_kde_data)
    plot_kde(averaged_x, averaged_y, title=title)

# Function to filter elections
def filter_elections(filter_criteria):
    election_df = pd.read_csv("election_table.csv")
    for key, value in filter_criteria.items():
        if isinstance(value, list):
            election_df = election_df[election_df[key].isin(value)]
        else:
            election_df = election_df[election_df[key] == value]
    filtered_elections = election_df['filename'].tolist()
    election_list = [election for election in filtered_elections if election in normalized_kde_data]
    return election_list

In [None]:
# Filter and average for STATE and FEDERAL level elections with YES in partisan
filter_criteria = {'level': ['FEDERAL', 'STATE'], 'partisan': 'YES'}
state_federal_points = filter_elections(filter_criteria)
for election in state_federal_points:
    print(election)
print("(Method 1) Averaging for STATE and FEDERAL level elections with YES in partisan:")
average_and_plot(state_federal_points, "Partisan State and Federal Elections")

# Filter and average for LOCAL elections with NO in partisan
filter_criteria = {'level': 'LOCAL', 'partisan': 'NO'}
local_points = filter_elections(filter_criteria)
for election in local_points:
    print(election)
print("(Method 1) Averaging for LOCAL elections with NO in partisan:")
average_and_plot(local_points, "Non-Partisan Local Elections")

# Testing for a single election
print("Testing for target election:")
average_and_plot(["Alaska_11082022_USRepresentative.csv"], "Alaska_11082022_USRepresentative.csv")

In [None]:
# Load the normalized KDE data from the pickle file
with open("election_kde_normalized.pkl", 'rb') as f:
    normalized_kde_data = pickle.load(f)

# Filter and average for STATE and FEDERAL level elections with YES in partisan
filter_criteria = {'level': ['FEDERAL', 'STATE'], 'partisan': 'YES'}
state_federal_points = filter_elections(filter_criteria)
for election in state_federal_points:
    print(election)
print("(Method 2) Averaging for STATE and FEDERAL level elections with YES in partisan:")
average_and_plot(state_federal_points, "Partisan State and Federal Elections")

# Filter and average for LOCAL elections with NO in partisan
filter_criteria = {'level': 'LOCAL', 'partisan': 'NO'}
local_points = filter_elections(filter_criteria)
for election in local_points:
    print(election)
print("(Method 2) Averaging for LOCAL elections with NO in partisan:")
average_and_plot(local_points, "Non-Partisan Local Elections")

# Testing for a single election
print("Testing for target election:")
average_and_plot(["Alaska_11082022_USRepresentative.csv"], "Alaska_11082022_USRepresentative.csv")

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import gaussian_kde
import numpy as np

def average_and_plot(points_list, title="Averaged Data Analysis"):
    
    # Grid for KDE evaluation
    x_grid = np.linspace(0, 1, 500)
    kde_values_list = []
    
    # Normalize, scale and compute KDE for each election
    for points in points_list:
        # Normalize x-values
        abs_max_x = max(abs(min(points.keys())), abs(max(points.keys())))
        normalized_data = [(x + abs_max_x) / (2 * abs_max_x) for x in points.keys()]
        
        # Scale y-values
        max_y = max(points.values())
        scaled_weights = [v / max_y for v in points.values()]
        
        # Compute KDE values
        kde = gaussian_kde(normalized_data, weights=scaled_weights)
        kde_values_list.append(kde(x_grid))
    
    # Average the KDE values across all elections
    avg_kde = np.mean(kde_values_list, axis=0)
    
    # Plot the averaged KDE
    plt.figure(figsize=(10, 6))
    plt.plot(x_grid, avg_kde)
    plt.title(f"{title} - Kernel Density Estimation")
    plt.xlabel('Value')
    plt.ylabel('Density')
    plt.grid(True)
    plt.show()

In [None]:
# Filter and average for STATE and FEDERAL level elections with YES in partisan
filter_criteria = {'level': ['FEDERAL', 'STATE'], 'partisan': 'YES'}
state_federal_points = filter_elections(filter_criteria)
print("(Method 1) Averaging for STATE and FEDERAL level elections with YES in partisan:")
average_and_plot(state_federal_points, "Partisan State and Federal Elections")

# Filter and average for LOCAL elections with NO in partisan
filter_criteria = {'level': 'LOCAL', 'partisan': 'NO'}
local_points = filter_elections(filter_criteria)
print("(Method 1) Averaging for LOCAL elections with NO in partisan:")
average_and_plot(local_points, "Non-Partisan Local Elections")

# Testing for a single election
print("Testing for target election:")
average_and_plot([election_consistency_points["Alaska_11082022_USRepresentative.csv"]], "Alaska_11082022_USRepresentative.csv")

In [None]:
import matplotlib.pyplot as plt

def plot_kdes(kdes, x_grid, title="KDE Analysis"):
    """Plot the computed KDEs."""
    
    plt.figure(figsize=(10, 6))
    
    for kde_values in kdes:
        plt.plot(x_grid, kde_values)
    
    plt.title(title)
    plt.xlabel('Value')
    plt.ylabel('Density')
    plt.grid(True)
    plt.show()

In [None]:
from scipy.stats import gaussian_kde
import numpy as np

def compute_kdes(points_list):
    """Compute KDE for each election."""
    
    kdes = []
    x_grid = np.linspace(0, 1, 500)  # Define a grid to evaluate the KDEs
    
    # Normalize and compute KDE for each election
    for points in points_list:
        # Normalize x-values
        abs_max_x = max(abs(min(points.keys())), abs(max(points.keys())))
        normalized_data = [(x + abs_max_x) / (2 * abs_max_x) for x in points.keys()]
        
        # Compute KDE and evaluate it on the x_grid
        kde = gaussian_kde(normalized_data)
        kde_values = kde(x_grid)
        
        kdes.append(kde_values)
    
    return kdes, x_grid

In [None]:
# Filter and compute KDE for STATE and FEDERAL level elections with YES in partisan
filter_criteria = {'level': ['FEDERAL', 'STATE'], 'partisan': 'YES'}
state_federal_points = filter_elections(filter_criteria)
print("Computing KDE for STATE and FEDERAL level elections with YES in partisan:")
kdes, x_grid = compute_kdes(state_federal_points)
plot_kdes(kdes, x_grid, "Partisan State and Federal Elections KDEs")

# Filter and compute KDE for LOCAL elections with NO in partisan
filter_criteria = {'level': 'LOCAL', 'partisan': 'NO'}
local_points = filter_elections(filter_criteria)
print("Computing KDE for LOCAL elections with NO in partisan:")
kdes, x_grid = compute_kdes(local_points)
plot_kdes(kdes, x_grid, "Non-Partisan Local Elections KDEs")

# Compute and plot KDE for a single election
print("Computing KDE for target election:")
kdes, x_grid = compute_kdes([election_consistency_points["Alaska_11082022_USRepresentative.csv"]])
plot_kdes(kdes, x_grid, "Alaska_11082022_USRepresentative.csv KDE")