<a href="https://colab.research.google.com/github/Trisken221/QM2/blob/main/evolution_graph.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#choropleth map - change in political affiliation by US States from 1996 - 2024

In [1]:
import pandas as pd
import pylab
import matplotlib.pyplot as plt
# make the plots a little wider by default
%matplotlib inline
plt.style.use('ggplot')

pylab.rcParams['figure.figsize'] = (10., 15.)

# Load the historical election data (1996-2020) from URL
historical_data_url = "https://raw.githubusercontent.com/Trisken221/QM2/refs/heads/main/1996-2020%20.csv"
historical_data = pd.read_csv(historical_data_url)
historical_data

Unnamed: 0.1,Unnamed: 0,2020,Unnamed: 2,2016,Unnamed: 4,2012,Unnamed: 6,2008,Unnamed: 8,2004,Unnamed: 10,2000,Unnamed: 12,1996,Unnamed: 14
0,State,Democratic,Republican,Democratic,Republican,Democratic,Republican,Democratic,Republican,Democratic,Republican,Democratic,Republican,Democratic,Republican
1,,,,,,,,,,,,,,,
2,Alabama,36.57%,62.03%,34.36%,62.08%,38.36%,60.55%,38.74%,60.32%,36.84%,62.46%,41.57,56.48,43.16,50.12
3,Alaska,42.77%,52.83%,36.55%,51.28%,40.81%,54.80%,37.89%,59.42%,35.52%,61.07%,27.67,58.62,33.27,50.8
4,Arizona,49.36%,49.06%,45.13%,48.67%,44.59%,53.65%,45.12%,53.64%,44.40%,54.87%,44.73,51.02,46.52,44.29
5,Arkansas,34.78%,62.40%,33.65%,60.57%,36.88%,60.57%,38.86%,58.72%,44.55%,54.31%,45.86,51.31,53.74,36.8
6,California,63.48%,34.32%,61.73%,31.62%,60.24%,37.12%,61.01%,36.95%,54.30%,44.36%,53.45,41.65,51.1,38.21
7,Colorado,55.40%,41.90%,48.16%,43.25%,51.49%,46.13%,53.66%,44.71%,47.02%,51.69%,42.39,50.75,44.43,45.8
8,Connecticut,59.26%,39.19%,54.57%,40.93%,58.06%,40.73%,60.59%,38.22%,54.31%,43.95%,55.91,38.44,52.83,34.69
9,Delaware,58.74%,39.77%,53.09%,41.71%,58.61%,39.98%,61.94%,36.95%,53.35%,45.75%,54.96,41.9,51.82,36.58


In [2]:
# Rename columns appropriately
column_names = [
    "State", "2020_Democratic", "2020_Republican",
    "2016_Democratic", "2016_Republican",
    "2012_Democratic", "2012_Republican",
    "2008_Democratic", "2008_Republican",
    "2004_Democratic", "2004_Republican",
    "2000_Democratic", "2000_Republican",
    "1996_Democratic", "1996_Republican"
]
historical_data.columns = column_names

# Add '%' to 1996 and 2000 columns
columns_to_fix = ["1996_Democratic", "1996_Republican", "2000_Democratic", "2000_Republican"]
for col in columns_to_fix:
    historical_data[col] = historical_data[col].astype(str) + "%"

# Drop rows with missing State values
historical_data = historical_data.dropna(subset=["State"])

# Reshape data to long format
long_data = pd.melt(
    historical_data,
    id_vars=["State"],
    var_name="Year_Party",
    value_name="Vote Percentage"
)

# Split "Year_Party" into "Year" and "Party"
long_data[['Year', 'Party']] = long_data['Year_Party'].str.extract(r'(\d{4})_(\w+)')
long_data.drop(columns=["Year_Party"], inplace=True)

# Ensure "Vote Percentage" is a string and handle missing values
long_data["Vote Percentage"] = long_data["Vote Percentage"].astype(str).fillna("0%")

# Filter rows with valid numeric "Vote Percentage" values
valid_rows = long_data["Vote Percentage"].str.contains(r"^\d+(\.\d+)?%$", na=False)
long_data = long_data[valid_rows].copy()

# Clean "Vote Percentage"
long_data["Vote Percentage"] = long_data["Vote Percentage"].str.replace("%", "").astype(float)

# Ensure all years are included for each state
unique_years = long_data['Year'].unique()

# Determine winning parties by State and Year
winning_parties = long_data.loc[
    long_data.groupby(['State', 'Year'])["Vote Percentage"].idxmax()
]

# Reset index for the final output
winning_parties.reset_index(drop=True, inplace=True)

# Verify that all 7 years are present for each state
expected_years = set(["1996", "2000", "2004", "2008", "2012", "2016", "2020"])
actual_years = set(winning_parties["Year"].unique())
missing_years = expected_years - actual_years

# Outputs
print(f"Unique Years in Reshaped Data: {unique_years}")
print(f"Missing Years in Winning Parties: {missing_years}")
print(winning_parties.head())

Unique Years in Reshaped Data: ['2020' '2016' '2012' '2008' '2004' '2000' '1996']
Missing Years in Winning Parties: set()
     State  Vote Percentage  Year       Party
0  Alabama            50.12  1996  Republican
1  Alabama            56.48  2000  Republican
2  Alabama            62.46  2004  Republican
3  Alabama            60.32  2008  Republican
4  Alabama            60.55  2012  Republican


  valid_rows = long_data["Vote Percentage"].str.contains(r"^\d+(\.\d+)?%$", na=False)


In [3]:
# Load the 2024 election data
latest_data_url = "https://raw.githubusercontent.com/Trisken221/QM2/refs/heads/main/2024%20election%20results%20by%206th%20December.csv"
latest_data = pd.read_csv(latest_data_url)

# Select rows up to Wyoming (line 343)
latest_data = latest_data.iloc[:343]

# Display the subset of the 2024 data
print(f"Subset of 2024 data (rows: {len(latest_data)}):")
print(latest_data.tail())


Subset of 2024 data (rows: 343):
             State          Candidate                   Party    Votes  \
338  West Virginia          Write-ins              Write - In      370   
339        Wyoming  Donald Trump wins  Republican (incumbent)  192,633   
340        Wyoming      Kamala Harris                Democrat   69,527   
341        Wyoming       Chase Oliver             Libertarian    4,193   
342        Wyoming          Write-ins              Write - In    2,695   

    Vote share Electoral votes Expected votes counted (%)  
338      0.00%               0                        99%  
339     71.60%               3                        99%  
340     25.80%               0                        99%  
341      1.60%               0                        99%  
342      1.00%               0                        99%  


In [4]:
# Select relevant columns and rename "Vote share" to "Vote Percentage"
latest_data = latest_data[["State", "Party", "Vote share"]]
latest_data = latest_data.rename(columns={"Vote share": "Vote Percentage"})

# Display the cleaned subset of the 2024 data
print(f"Cleaned 2024 data (rows: {len(latest_data)}):")
print(latest_data.head())


Cleaned 2024 data (rows: 343):
    State                   Party Vote Percentage
0  Alaska  Republican (incumbent)          54.90%
1  Alaska                Democrat          41.10%
2  Alaska             Independent           1.70%
3  Alaska             Libertarian           0.90%
4  Alaska                   Green           0.70%


In [5]:
# Determine winning parties by State for 2024
latest_data["Vote Percentage"] = latest_data["Vote Percentage"].astype(str).str.replace("%", "").astype(float)
winners_2024 = latest_data.loc[
    latest_data.groupby("State")["Vote Percentage"].idxmax()
].copy()
winners_2024["Year"] = "2024"

# Combine the processed 2024 data with historical winning parties
combined_data = pd.concat([winning_parties, winners_2024], ignore_index=True)
combined_data.reset_index(drop=True, inplace=True)

# Display the combined dataset
print(f"Combined dataset (total rows: {len(combined_data)}):")
print(combined_data.head())

# Validate the combined data
print("\nUnique Years in Combined Data:")
print(combined_data['Year'].unique())

print("\nStates and Years Data Coverage:")
print(combined_data.groupby('Year')["State"].count())


Combined dataset (total rows: 408):
     State  Vote Percentage  Year       Party
0  Alabama            50.12  1996  Republican
1  Alabama            56.48  2000  Republican
2  Alabama            62.46  2004  Republican
3  Alabama            60.32  2008  Republican
4  Alabama            60.55  2012  Republican

Unique Years in Combined Data:
['1996' '2000' '2004' '2008' '2012' '2016' '2020' '2024']

States and Years Data Coverage:
Year
1996    51
2000    51
2004    51
2008    51
2012    51
2016    51
2020    51
2024    51
Name: State, dtype: int64


as the dataset includes Washington D.C. as a separate entity, so increases the count to 51

In [6]:
# Save combined_data as a CSV file
combined_data.to_csv("combined_data.csv", index=False)

# If you're in Colab, you can download the file with this command:
from google.colab import files
files.download("combined_data.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [9]:
!pip install plotly
import plotly.express as px
!pip install us
import us
import json
import requests
# Load GeoJSON for states
geojson_path = "https://eric.clst.org/assets/wiki/uploads/Stuff/gz_2010_us_040_00_500k.json"
state_polygons = requests.get(geojson_path).json()

# Adjust state names in the dataset to match GeoJSON identifiers
combined_data["State"] = combined_data["State"].replace({"D.C.": "District of Columbia"})
combined_data["State_Abbreviation"] = combined_data["State"].apply(lambda x: us.states.lookup(x).abbr if us.states.lookup(x) else None)
# Ensure the 'State' column matches the GeoJSON's identifier field
geojson_states = [feature['properties']['NAME'] for feature in state_polygons['features']]
data_states = combined_data['State'].unique()

# Check for mismatches
missing_in_geojson = [state for state in data_states if state not in geojson_states]
missing_in_data = [state for state in geojson_states if state not in data_states]

print(f"States missing in GeoJSON: {missing_in_geojson}")
print(f"States missing in election data: {missing_in_data}")

Collecting us
  Downloading us-3.2.0-py3-none-any.whl.metadata (10 kB)
Downloading us-3.2.0-py3-none-any.whl (13 kB)
Installing collected packages: us
Successfully installed us-3.2.0
States missing in GeoJSON: []
States missing in election data: ['Puerto Rico']


Territories like Puerto Rico are not states. They might appear in some datasets but are typically excluded from state-level analyses, so in this case we ignore it.

In [10]:
# Calculate Margin Percentage as deviation from 50%
combined_data["Margin Percentage"] = (combined_data["Vote Percentage"] - 50).abs()

# Generate a Choropleth Map
fig = px.choropleth(
    combined_data,
    locations="State_Abbreviation",  # Use state abbreviations
    locationmode="USA-states",  # Match USA states
    color="Margin Percentage",  # Use margin percentage for color scale
    animation_frame=combined_data["Year"].astype(str),  # Animate by year
    title="US Election Results by State (1996-2024) with Margin Percentage",
    height=800,
    color_continuous_scale=[
        (0.0, "lightblue"),  # Light blue for low Democratic margins
        (0.5, "blue"),       # Darker blue for higher Democratic margins
        (0.5, "lightcoral"), # Light red for low Republican margins
        (1.0, "red")         # Darker red for higher Republican margins
    ],
    range_color=[0, 50],  # Margin percentages range from 0 to 50
)

# Adjust map layout
fig.update_geos(
    resolution=50,
    projection_type="albers usa",
    center={"lat": 37.5, "lon": -96},
    projection_scale=3.5  # Zoom in
)

# Customize layout and color bar
fig.update_layout(
    margin={"r": 10, "t": 25, "l": 10, "b": 0},
    title=dict(
        text="US Election Results by State (1996-2024) (Margin Percentage Adjusted)",
        x=0.5,
        xanchor="center",
        font=dict(size=16)
    ),
    coloraxis_colorbar=dict(
        title="Margin by Party (%)",  # Updated title
        tickvals=[0, 25, 50],  # Add ticks at key points
        ticktext=["Low Margin (0%)", "Moderate Margin (25%)", "High Margin (50%)"],  # Descriptive labels
        tickfont=dict(size=10),
        len=0.5  # Adjust bar length
    )
)

# Add annotations for clarity
fig.add_annotation(
    x=0.2,  # Position on the left side of the map
    y=-0.1,  # Position below the map
    xref="paper",
    yref="paper",
    showarrow=False,
    text="Blue = Democratic Dominance | Red = Republican Dominance",
    font=dict(size=12),
    align="center"
)

# Show the map
fig.show()


In [11]:
def party_color_with_margin(row):
    # Ensure blue for Democratic and red for Republican
    if row['Party'] == 'Democratic':
        return f"rgba(0, 0, 255, {row['Margin'] / 100})"  # Blue for Democrats
    elif row['Party'] == 'Republican':
        return f"rgba(255, 0, 0, {row['Margin'] / 100})"  # Red for Republicans
    return "rgba(128, 128, 128, 0.5)"  # Neutral gray for missing/other parties


In [12]:
# Compute Margin of Victory for Each State and Year
def calculate_margin(group):
    sorted_group = group.sort_values(by="Vote Percentage", ascending=False)
    if len(sorted_group) > 1:
        margin = sorted_group.iloc[0]["Vote Percentage"] - sorted_group.iloc[1]["Vote Percentage"]
        return margin
    return 0  # No margin if only one party is present

# Apply margin calculation to the combined data
combined_data["Margin"] = combined_data.groupby(["State", "Year"]).apply(calculate_margin).reset_index(level=[0, 1], drop=True)

# Adjust Party_Color based on Margin
def party_color_with_margin(row):
    if row['Party'] == 'Democratic':
        return f"rgba(0, 0, 255, {row['Margin'] / 100})"  # Blue with intensity by margin
    elif row['Party'] == 'Republican':
        return f"rgba(255, 0, 0, {row['Margin'] / 100})"  # Red with intensity by margin
    return "rgba(128, 128, 128, 0.5)"  # Neutral gray for missing/other parties

combined_data["Party_Color"] = combined_data.apply(party_color_with_margin, axis=1)

# Generate a Choropleth Map
fig = px.choropleth(
    combined_data,
    locations="State_Abbreviation",  # Use abbreviations for states
    locationmode="USA-states",  # Match by state abbreviations
    color="Party_Color",  # Use the adjusted Party_Color
    animation_frame=combined_data["Year"].astype(str),
    title="US Election Results by State (1996-2024) with Margin",
    height=800
)

# Update map focus and layout
fig.update_geos(
    resolution=50,
    projection_type="albers usa",
    center={"lat": 37.5, "lon": -96},
    projection_scale=3.5
)
fig.update_layout(
    margin={"r": 10, "t": 25, "l": 10, "b": 0},
    title=dict(
        text="US Election Results by State (1996-2024) (Adjusted by Margin)",
        x=0.5,
        xanchor="center",
        font=dict(size=16)
    )
)

# Show the map
fig.show()






In [13]:
# Ensure Vote Percentage range is correct
print(combined_data["Vote Percentage"].describe())

# Check unique years
print(combined_data["Year"].unique())

# Generate map with State Abbreviations
fig = px.choropleth(
    combined_data,
    locations="State_Abbreviation",  # Use abbreviations for states
    locationmode="USA-states",  # Match by state abbreviations
    color="Vote Percentage",
    animation_frame=combined_data["Year"].astype(str),
    color_continuous_scale=[
        (0.0, "lightblue"),  # Light blue for low Democratic votes
        (0.5, "blue"),       # Darker blue for higher Democratic votes
        (0.5, "lightcoral"), # Light red for low Republican votes
        (1.0, "red")         # Darker red for higher Republican votes
    ],
    title="US Election Results by State (1996-2024)",
    height=800  # Reduce height slightly for better fit on screens
)

# Update map focus and projection
fig.update_geos(
    resolution=50,  # Higher resolution for state boundaries
    projection_type="albers usa",  # Use USA-focused projection
    center={"lat": 37.5, "lon": -96},  # Center map on the US
    projection_scale=3.5  # Zoom in further to fill more of the frame
)

# Add legend annotation below the title
fig.add_annotation(
    x=0.5,
    y=0.95,  # Place annotation closer to the title
    xref="paper",
    yref="paper",
    showarrow=False,
    text="Color Legend: Blue = Democratic, Red = Republican (Lighter = Lower Percentage, Darker = Higher Percentage)",
    font=dict(size=12),
    align="center"
)

# Adjust layout for compactness
fig.update_layout(
    margin={"r": 10, "t": 25, "l": 10, "b": 0},  # Reduce margins
    title=dict(
        text="US Election Results by State (1996-2024)",
        x=0.5,  # Center title
        xanchor="center",
        font=dict(size=16)  # Adjust title font size for compactness
    ),
    coloraxis_colorbar=dict(
        thickness=25,  # Adjust thickness of the color bar
        len=0.5,       # Reduce length of the color bar relative to the map
        title="Vote %",
        titlefont=dict(size=10),
        tickfont=dict(size=9)
    )
)

# Show the map
fig.show()


count    408.000000
mean      56.595613
std        7.347053
min       43.930000
25%       51.340000
50%       55.750000
75%       60.202500
max       92.460000
Name: Vote Percentage, dtype: float64
['1996' '2000' '2004' '2008' '2012' '2016' '2020' '2024']


In [14]:
# Ensure Vote Percentage range is correct
print(combined_data["Vote Percentage"].describe())

# Check unique years
print(combined_data["Year"].unique())

# Generate map with State Abbreviations
fig = px.choropleth(
    combined_data,
    locations="State_Abbreviation",  # Use abbreviations for states
    locationmode="USA-states",  # Match by state abbreviations
    color="Vote Percentage",
    animation_frame=combined_data["Year"].astype(str),
    color_continuous_scale=[
        (0.0, "lightblue"),  # Light blue for low Democratic votes
        (0.5, "blue"),       # Darker blue for higher Democratic votes
        (0.5, "lightcoral"), # Light red for low Republican votes
        (1.0, "red")         # Darker red for higher Republican votes
    ],
    title="US Election Results by State (1996-2024)",
    height=1000
)

# Update map focus and projection
fig.update_geos(
    resolution=50,  # Higher resolution for state boundaries
    projection_type="albers usa",  # Use USA-focused projection
    center={"lat": 37.5, "lon": -96},  # Center map on the US
    projection_scale=3.0  # Adjust zoom for better state visibility
)

# Add legend annotation
fig.add_annotation(
    x=0.5,
    y=-0.1,
    xref="paper",
    yref="paper",
    showarrow=False,
    text="Color Legend: Blue = Democratic, Red = Republican (Lighter = Lower Percentage, Darker = Higher Percentage)",
    font=dict(size=14),
    align="center"
)

# Adjust layout for better aesthetics
fig.update_layout(
    margin={"r": 0, "t": 50, "l": 0, "b": 50},
    title=dict(
        text="US Election Results by State (1996-2024)",
        x=0.5,  # Center title
        xanchor="center",
        font=dict(size=18)  # Adjust title font size
    )
)

# Show the map
fig.show()


count    408.000000
mean      56.595613
std        7.347053
min       43.930000
25%       51.340000
50%       55.750000
75%       60.202500
max       92.460000
Name: Vote Percentage, dtype: float64
['1996' '2000' '2004' '2008' '2012' '2016' '2020' '2024']


In [15]:
# Ensure Vote Percentage range is correct
print(combined_data["Vote Percentage"].describe())

# Check unique years
print(combined_data["Year"].unique())

# Generate map with State Abbreviations
fig = px.choropleth(
    combined_data,
    locations="State_Abbreviation",  # Use abbreviations for states
    locationmode="USA-states",  # Match by state abbreviations
    color="Vote Percentage",
    animation_frame=combined_data["Year"].astype(str),
    color_continuous_scale=px.colors.diverging.RdBu,
    title="US Election Results by State (1996-2024)",
    height=1000
)

fig.update_geos(fitbounds="locations", visible=True)
fig.update_layout(
    margin={"r": 0, "t": 30, "l": 0, "b": 0},
    geo=dict(
        projection_scale=1.2  # Adjust this scale to make the map larger
    )
)

# Show the map
fig.show()


count    408.000000
mean      56.595613
std        7.347053
min       43.930000
25%       51.340000
50%       55.750000
75%       60.202500
max       92.460000
Name: Vote Percentage, dtype: float64
['1996' '2000' '2004' '2008' '2012' '2016' '2020' '2024']
