# **Election Data Analysis: Rural vs Urban Vote Shifts**
This Jupyter Notebook explores election trends by analyzing county-level voting shifts, party flips, and the influence of demographics (race, gender). We classify counties as **rural or urban** and analyze how they changed across election cycles.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

# Set visualization style
plt.style.use('fivethirtyeight')

## **1. Load the Datasets**

In [None]:
electors_data = pd.read_csv('electors_data.csv')
gender_data = pd.read_csv('genderelection2.csv')
race_data = pd.read_csv('raceelection.csv')
elections_rural = pd.read_csv('elections_rural.csv')

# Display first few rows
elections_rural.head()

## **2. Classify Counties as Rural, Urban, or Suburban**

In [None]:
def classify_county(county_type):
    if county_type in ['Large Urban', 'Medium Urban', 'Small Urban']:
        return 'Urban'
    elif county_type == 'Large Suburban':
        return 'Suburban'
    elif county_type == 'Rural':
        return 'Rural'
    else:
        return 'Unknown'

elections_rural['county_category'] = elections_rural['county_type'].apply(classify_county)
elections_rural['county_category'].value_counts()

## **3. Identify Counties That Flipped Parties**

In [None]:
elections_presidential = elections_rural[elections_rural['office'] == 'US PRESIDENT']
elections_presidential['vote_share'] = elections_presidential['candidatevotes'] / elections_presidential['totalvotes']

# Determine winners
elections_winner = elections_presidential.sort_values(
    ['year', 'state', 'county_name', 'vote_share'], ascending=[True, True, True, False]
).drop_duplicates(subset=['year', 'state', 'county_name'], keep='first')

# Identify flips
elections_winner['previous_party'] = elections_winner.groupby(['state', 'county_name'])['party'].shift(1)
elections_winner['party_flipped'] = elections_winner['party'] != elections_winner['previous_party']

flipped_counts = elections_winner[elections_winner['party_flipped']].groupby('county_category').size().reset_index()
flipped_counts.columns = ['county_category', 'flipped_count']
flipped_counts

## **4. Calculate Vote Margin Shifts**

In [None]:
elections_margin = elections_presidential.pivot_table(
    index=['year', 'state', 'county_name'],
    columns='party',
    values='vote_share'
).fillna(0).reset_index()

# Compute margin shift (Republican - Democrat)
elections_margin['margin'] = elections_margin.get('REPUBLICAN', 0) - elections_margin.get('DEMOCRAT', 0)

# Merge back into winners dataset
elections_winner = elections_winner.merge(elections_margin[['year', 'state', 'county_name', 'margin']], on=['year', 'state', 'county_name'], how='left')
elections_winner['previous_margin'] = elections_winner.groupby(['state', 'county_name'])['margin'].shift(1)
elections_winner['margin_shift'] = elections_winner['margin'] - elections_winner['previous_margin']
elections_winner[['state', 'county_name', 'year', 'margin_shift']].dropna().head()

## **5. Analyze the Impact of Race and Gender**

In [None]:
# Convert percentages to numeric values
gender_data[['Male', 'Female']] = gender_data[['Male', 'Female']].replace('%', '', regex=True).astype(float) / 100
race_data[['White', 'Black', 'Hispanic', 'Asian']] = race_data[['White', 'Black', 'Hispanic', 'Asian']].replace('%', '', regex=True).astype(float) / 100

# Standardize state names and merge
gender_data.rename(columns={'State': 'state'}, inplace=True)
race_data.rename(columns={'State': 'state'}, inplace=True)

elections_winner = elections_winner.merge(gender_data[['state', 'Male', 'Female']], on='state', how='left')
elections_winner = elections_winner.merge(race_data[['state', 'White', 'Black', 'Hispanic', 'Asian']], on='state', how='left')

elections_winner[['Male', 'Female', 'White', 'Black', 'margin_shift']].corr()

## **6. Save Processed Data for Visualizations**

In [None]:
flipped_counts.to_csv('flipped_counts.csv', index=False)
margin_shift_data = elections_winner[['county_category', 'margin_shift']].dropna()
margin_shift_data.to_csv('margin_shift_data.csv', index=False)
county_fips_lookup = elections_rural[['state', 'county_name', 'county_fips']].drop_duplicates()
county_vote_share = elections_margin.merge(county_fips_lookup, on=['state', 'county_name'], how='left')
county_vote_share.to_csv('county_vote_share_with_fips.csv', index=False)

## **7. Create Visualizations**

In [None]:
# Bar Chart: Number of Counties That Flipped
plt.figure(figsize=(8, 5))
sns.barplot(x=flipped_counts['county_category'], y=flipped_counts['flipped_count'], palette='coolwarm')
plt.title('Number of Counties That Flipped Parties by Category')
plt.xlabel('County Category')
plt.ylabel('Number of Counties')
plt.show()

In [None]:
# Histogram: Vote Margin Shifts
plt.figure(figsize=(10, 6))
for category in ['Rural', 'Urban', 'Unknown']:
    subset = margin_shift_data[margin_shift_data['county_category'] == category]['margin_shift'].dropna()
    sns.histplot(subset, bins=30, kde=True, label=category, alpha=0.6)
plt.title('Distribution of Vote Margin Shifts by County Type')
plt.xlabel('Vote Margin Shift')
plt.ylabel('Frequency')
plt.legend(title='County Type')
plt.show()