In [None]:
import pandas as pd

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
federalelections2020_file = pd.read_csv("/content/drive/MyDrive/CM151 Final Project /federalelections2020.csv")

In [None]:
# Inputed State Abbreviation Heatmap

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Step 1: Load the dataset (assuming CSV or your dataframe is named federalelections2020_file)
federalelections2020_file = pd.read_csv('/content/drive/MyDrive/CM151 Final Project /federalelections2020.csv')  # Adjust path as needed

# Step 2: Clean and preprocess the data
# Remove commas from 'GENERAL RESULTS' and convert to numeric
federalelections2020_file['GENERAL RESULTS'] = federalelections2020_file['GENERAL RESULTS'].replace({',': ''}, regex=True)
federalelections2020_file['GENERAL RESULTS'] = pd.to_numeric(federalelections2020_file['GENERAL RESULTS'], errors='coerce')

# Step 3: Define a function to plot the heatmap for a given state
def plot_state_election_heatmap(state_abbreviation):
    # Filter data for the specified state
    state_data = federalelections2020_file[federalelections2020_file['STATE ABBREVIATION'] == state_abbreviation.upper()]

    # Check if there is any data for the given state
    if state_data.empty:
        print(f"No election data found for state abbreviation: {state_abbreviation.upper()}")
        return

    # Step 4: Aggregate total votes for each party in the selected state
    party_votes = state_data.groupby('PARTY')['GENERAL RESULTS'].sum()

    # Step 5: Get the top two parties (Democrat and Republican)
    top_parties = party_votes.nlargest(2)
    if len(top_parties) < 2:
        print(f"Not enough party data for the state: {state_abbreviation.upper()}")
        return

    # Calculate the margin of victory between the top two parties
    margin = abs(top_parties.iloc[0] - top_parties.iloc[1])
    total_votes = top_parties.sum()
    margin_percentage = margin / total_votes

    # Step 6: Determine the winning party
    winning_party = top_parties.idxmax()

    # Step 7: Choose color based on the margin and party
    if winning_party == 'D':  # Democrat
        # Use a blue colormap for Democrat wins
        cmap = plt.cm.Blues
        color_intensity = 1 - margin_percentage  # 1 is a complete win, 0 is a very close race
    else:  # Republican
        # Use a red colormap for Republican wins
        cmap = plt.cm.Reds
        color_intensity = 1 - margin_percentage  # 1 is a complete win, 0 is a very close race

    # Step 8: Map the margin percentage to a color from the colormap
    color = cmap(color_intensity)

    # Step 9: Plot a heatmap with the color intensity based on the margin
    plt.figure(figsize=(5, 5))
    sns.heatmap([[1]], annot=True, cmap=[color], cbar=False, xticklabels=[], yticklabels=[])
    plt.title(f'{state_abbreviation.upper()} Election Results - {winning_party} Wins\n'
              f'Margin: {margin_percentage*100:.2f}%')
    plt.show()

# Step 10: User input for state abbreviation
state_abbreviation = input("Enter the state abbreviation (e.g., 'PA' for Pennsylvania, 'ME' for Maine): ")

# Step 11: Call the function to plot the heatmap for the entered state
plot_state_election_heatmap(state_abbreviation)

In [None]:
# State to State Margins of Victory Heatmap

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Load the dataset
federalelections2020_file = pd.read_csv('/content/drive/MyDrive/CM151 Final Project /federalelections2020.csv')

# Clean the data
federalelections2020_file['GENERAL RESULTS'] = federalelections2020_file['GENERAL RESULTS'].replace({',': ''}, regex=True)
federalelections2020_file['GENERAL RESULTS'] = pd.to_numeric(federalelections2020_file['GENERAL RESULTS'], errors='coerce')

# Function to compute margin percentage and winning party
def get_state_margin_percentage(state_abbreviation):
    state_abbreviation = str(state_abbreviation).strip()

    if not state_abbreviation.isalpha() or len(state_abbreviation) != 2:
        return None

    # Filter the data for the given state
    state_data = federalelections2020_file[federalelections2020_file['STATE ABBREVIATION'] == state_abbreviation.upper()]
    if state_data.empty:
        return None

    # Aggregate total votes by party for the state
    party_votes = state_data.groupby('PARTY')['GENERAL RESULTS'].sum()

    # Get the top two parties (Democrat and Republican)
    top_parties = party_votes.nlargest(2)
    if len(top_parties) < 2:
        return None

    # Calculate the margin of victory
    margin = abs(top_parties.iloc[0] - top_parties.iloc[1])
    total_votes = top_parties.sum()
    margin_percentage = margin / total_votes

    # Determine the winning party
    winning_party = top_parties.idxmax()

    return margin_percentage, winning_party

# Get the list of all states
states = federalelections2020_file['STATE ABBREVIATION'].unique()

# Separate red (Republican) and blue (Democrat) states based on margin and party
state_margin_percentage = {}
state_classification = {}

for state in states:
    result = get_state_margin_percentage(state)
    if result is not None:
        margin_percentage, winning_party = result
        state_margin_percentage[state] = margin_percentage
        if winning_party == 'R':  # Republican wins
            state_classification[state] = 'Red'
        elif winning_party == 'D':  # Democrat wins
            state_classification[state] = 'Blue'

# Prepare the data for the heatmap
# Create a final DataFrame for heatmap generation
final_margin_map = pd.DataFrame(list(state_margin_percentage.items()), columns=['State', 'Margin Percentage'])

# Add classification (Red or Blue) to the DataFrame
final_margin_map['Classification'] = final_margin_map['State'].map(state_classification)

# Sort the states by margin for better visualization
final_margin_map = final_margin_map.sort_values(by='Margin Percentage', ascending=False)

# Step 7: Prepare heatmap data (margin values)
heatmap_data = final_margin_map[['Margin Percentage']].T  # Transpose for heatmap

# Step 8: Color mapping based on winning party
# Create color map based on classification (Red or Blue)
colors = final_margin_map['Classification'].map({'Red': 'Reds', 'Blue': 'Blues'})

# Create a figure
plt.figure(figsize=(16, 4))

# Generate the heatmap with the margin of victory values
sns.heatmap(
    heatmap_data,
    annot=True,  # Annotate with margin percentages
    fmt=".2f",   # Format margin as decimal
    cmap="YlGnBu",  # Green color gradient (Yellow-Green-Blue)
    cbar_kws={'label': 'Margin of Victory Percentage'},
    xticklabels=final_margin_map['State'],  # State abbreviations on x-axis
    yticklabels=False,  # No y-tick labels
    annot_kws={"size": 12, "rotation": 90, 'color': 'black'},  # Black text for annotations for better contrast
    linewidths=0.5
)

# Title and labels
plt.title('2020 U.S. Presidential Election: Margin of Victory by State', fontsize=16)
plt.xlabel('State', fontsize=14)
plt.ylabel('State', fontsize=14)

# Rotate x-axis labels for better readability
plt.xticks(rotation=90, fontsize=12)

# Adjust layout for better spacing
plt.tight_layout()

# Show the plot
plt.show()


In [None]:
# Winning State Heatmap (Blue or Red)

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import geopandas as gpd

# Load the dataset
federalelections2020_file = pd.read_csv('/content/drive/MyDrive/CM151 Final Project /federalelections2020.csv')

# Clean the data
federalelections2020_file['GENERAL RESULTS'] = federalelections2020_file['GENERAL RESULTS'].replace({',': ''}, regex=True)
federalelections2020_file['GENERAL RESULTS'] = pd.to_numeric(federalelections2020_file['GENERAL RESULTS'], errors='coerce')

# Function to compute margin percentage and winning party
def get_state_margin_percentage(state_abbreviation):
    state_abbreviation = str(state_abbreviation).strip()

    if not state_abbreviation.isalpha() or len(state_abbreviation) != 2:
        return None

    # Filter the data for the given state
    state_data = federalelections2020_file[federalelections2020_file['STATE ABBREVIATION'] == state_abbreviation.upper()]
    if state_data.empty:
        return None

    # Aggregate total votes by party for the state
    party_votes = state_data.groupby('PARTY')['GENERAL RESULTS'].sum()

    # Get the top two parties (Democrat and Republican)
    top_parties = party_votes.nlargest(2)
    if len(top_parties) < 2:
        return None

    # Calculate the margin of victory
    margin = abs(top_parties.iloc[0] - top_parties.iloc[1])
    total_votes = top_parties.sum()
    margin_percentage = margin / total_votes

    # Determine the winning party
    winning_party = top_parties.idxmax()

    return margin_percentage, winning_party

# Get the list of all states
states = federalelections2020_file['STATE ABBREVIATION'].unique()

# Separate red (Republican) and blue (Democrat) states based on margin and party
state_margin_percentage = {}
state_classification = {}

for state in states:
    result = get_state_margin_percentage(state)
    if result is not None:
        margin_percentage, winning_party = result
        state_margin_percentage[state] = margin_percentage
        if winning_party == 'R':  # Republican wins
            state_classification[state] = 'Red'
        elif winning_party == 'D':  # Democrat wins
            state_classification[state] = 'Blue'

# Prepare the data for the heatmap
# Create a final DataFrame for heatmap generation
final_margin_map = pd.DataFrame(list(state_margin_percentage.items()), columns=['State', 'Margin Percentage'])

# Add classification (Red or Blue) to the DataFrame
final_margin_map['Classification'] = final_margin_map['State'].map(state_classification)

# Sort the states by margin for better visualization
final_margin_map = final_margin_map.sort_values(by='Margin Percentage', ascending=False)

# Map classifications to numeric values for heatmap (Blue = 0, Red = 1)
final_margin_map['Classification Numeric'] = final_margin_map['Classification'].map({'Blue': 0, 'Red': 1})

# Step 7: Prepare heatmap data
heatmap_data = final_margin_map[['Classification Numeric']].T  # Transpose for heatmap

# Step 8: Generate the heatmap with color coding only
# Prepare color map based on winning party
colors = final_margin_map['Classification'].map({'Red': 'Reds', 'Blue': 'Blues'})

# Create a figure
plt.figure(figsize=(16, 4))

# Generate the heatmap without margin annotations
sns.heatmap(heatmap_data,
            annot=False,  # No annotations (no margin displayed)
            cmap="coolwarm",  # Blue-Red gradient
            cbar_kws={'label': 'State Classification (Blue = Dem., Red = Rep.)'},
            xticklabels=final_margin_map['State'],
            yticklabels=False,  # No y-tick labels
            linewidths=0.5)

# Title and labels
plt.title('2020 U.S. Presidential Election: State Classification (Red/Blue)', fontsize=16)
plt.xlabel('State', fontsize=14)
plt.ylabel('State', fontsize=14)

# Rotate x-axis labels for readability
plt.xticks(rotation=90, fontsize=12)

# Adjust layout for better spacing
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
# United States Democrat vs. Republican Heatmap

states = [
    'Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut',
    'Delaware', 'Florida', 'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa',
    'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland', 'Massachusetts', 'Michigan',
    'Minnesota', 'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire',
    'New Jersey', 'New Mexico', 'New York', 'North Carolina', 'North Dakota', 'Ohio',
    'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota',
    'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington', 'West Virginia',
    'Wisconsin', 'Wyoming'
]

parties = [
    'R', 'R', 'D', 'R', 'D', 'D', 'D', 'D', 'R', 'D', 'D', 'R', 'D', 'R', 'R', 'R', 'R',
    'R', 'D', 'D', 'D', 'D', 'D', 'R', 'R', 'R', 'R', 'D', 'D', 'D', 'D', 'D', 'R', 'D',
    'R', 'R', 'D', 'D', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'D', 'R', 'D', 'D', 'R'
]

if len(states) > len(parties):
    difference = len(states) - len(parties)
    parties.extend(['Unknown'] * difference)

elif len(parties) > len(states):
    difference = len(parties) - len(states)
    states.extend(['Unknown'] * difference)

# Create the DataFrame
state_classification = pd.DataFrame({'STATE': states, 'Party': parties})

# Validate the lengths
print("Number of States:", len(state_classification['STATE']))
print("Number of Parties:", len(state_classification['Party']))
print(state_classification)

# Load the GeoJSON file for U.S. state boundaries
geojson_url = 'https://raw.githubusercontent.com/PublicaMundi/MappingAPI/master/data/geojson/us-states.json'
usa_map = gpd.read_file(geojson_url)

# Example election data
state_classification = pd.DataFrame({'STATE': states, 'Party': parties})

# Merge geographical data with election data
usa_map = usa_map.merge(state_classification, left_on='name', right_on='STATE')

# Assign colors for parties
color_mapping = {'D': 'blue', 'R': 'red'}
usa_map['color'] = usa_map['Party'].map(color_mapping)

# Plot the U.S. map
fig, ax = plt.subplots(1, 1, figsize=(15, 10))
usa_map.boundary.plot(ax=ax, linewidth=1)

# Check for missing values in the 'color' column
print(usa_map['color'].isna().sum())

# Fill missing values in the 'color' column with 'gray'
usa_map['color'] = usa_map['color'].fillna('gray')

# Validate the column
print(usa_map[['name', 'color']].head())

# Plot the U.S. map again with corrected colors
fig, ax = plt.subplots(1, 1, figsize=(15, 10))
usa_map.boundary.plot(ax=ax, linewidth=1)
usa_map.plot(ax=ax, color=usa_map['color'])

# Add title
plt.title('2020 U.S. Presidential Election Results', fontsize=16)
plt.axis('off')  # Turn off axes
plt.show()

In [None]:
# Democrat vs. Republican Bar Graph

# Determine if a state is predominantly Democrat (D) or Republican (R)
state_majority_party = state_party_results.pivot(index='STATE', columns='PARTY', values='GENERAL RESULTS').fillna(0)
state_majority_party['Majority'] = state_majority_party.apply(
    lambda row: 'D' if row.get('D', 0) > row.get('R', 0) else 'R', axis=1
)

# Count the number of states for each party
state_counts = state_majority_party['Majority'].value_counts()

# Bar Chart: Number of States Dominated by Each Party
plt.figure(figsize=(8, 6))
state_counts.plot(kind='bar', color=['blue', 'red'])
plt.title("Number of States Dominated by Each Party", fontsize=16)
plt.xlabel("Party", fontsize=12)
plt.ylabel("Number of States", fontsize=12)
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()