In [2]:
import pandas as pd
import plotly.express as px

# File paths
usa_covid_data_path = '../Data/USA Covid Data.csv'
covid_deaths_by_county_path = '../Data/Provisional_COVID-19_Deaths_by_County__and_Race_and_Hispanic_Origin_20241120.csv'
election_2020_path = '../Data/us_election_2020.csv'

# Load datasets
usa_covid_data = pd.read_csv(usa_covid_data_path)
covid_deaths_by_county = pd.read_csv(covid_deaths_by_county_path)
election_2020_data = pd.read_csv(election_2020_path)

FileNotFoundError: [Errno 2] No such file or directory: '../Data/USA Covid Data.csv'

In [None]:
# Process USA COVID Data
usa_covid_data_cleaned = usa_covid_data[['USA State', 'Population']].copy()
usa_covid_data_cleaned.rename(columns={'USA State': 'State'}, inplace=True)

# Process COVID Death Data
# Example: Retain only rows with the second unique indicator
covid_deaths_by_county_filtered = covid_deaths_by_county[covid_deaths_by_county['Indicator'] == 'Distribution of COVID-19 deaths (%)']
# Drop duplicates based on State and COVID-19 Deaths
covid_deaths_by_county_deduplicated = covid_deaths_by_county.drop_duplicates(subset=['State', 'COVID-19 Deaths'])
state_death_data = covid_deaths_by_county_deduplicated.groupby('State').agg({'COVID-19 Deaths': 'sum'}).reset_index()
state_death_data.rename(columns={'COVID-19 Deaths': 'Total Deaths'}, inplace=True)

# Process 2020 Election Data
election_2020_cleaned = election_2020_data[['state', 'trump_pct', 'biden_pct']].copy()
election_2020_cleaned.rename(columns={'state': 'State', 'trump_pct': 'Republican Votes (%)', 'biden_pct': 'Democratic Votes (%)'}, inplace=True)
election_2020_cleaned['State'] = election_2020_cleaned['State'].str.title()  # Standardize state names

# Standardize state names across datasets
state_mapping = {
    "AL": "Alabama", "AK": "Alaska", "AZ": "Arizona", "AR": "Arkansas",
    "CA": "California", "CO": "Colorado", "CT": "Connecticut", "DE": "Delaware",
    "FL": "Florida", "GA": "Georgia", "HI": "Hawaii", "ID": "Idaho",
    "IL": "Illinois", "IN": "Indiana", "IA": "Iowa", "KS": "Kansas",
    "KY": "Kentucky", "LA": "Louisiana", "ME": "Maine", "MD": "Maryland",
    "MA": "Massachusetts", "MI": "Michigan", "MN": "Minnesota",
    "MS": "Mississippi", "MO": "Missouri", "MT": "Montana", "NE": "Nebraska",
    "NV": "Nevada", "NH": "New Hampshire", "NJ": "New Jersey", "NM": "New Mexico",
    "NY": "New York", "NC": "North Carolina", "ND": "North Dakota",
    "OH": "Ohio", "OK": "Oklahoma", "OR": "Oregon", "PA": "Pennsylvania",
    "RI": "Rhode Island", "SC": "South Carolina", "SD": "South Dakota",
    "TN": "Tennessee", "TX": "Texas", "UT": "Utah", "VT": "Vermont",
    "VA": "Virginia", "WA": "Washington", "WV": "West Virginia", "WI": "Wisconsin",
    "WY": "Wyoming", "DC": "District of Columbia"
}
state_death_data['State'] = state_death_data['State'].replace(state_mapping)
usa_covid_data_cleaned['State'] = usa_covid_data_cleaned['State'].str.strip()
election_2020_cleaned['State'] = election_2020_cleaned['State'].str.strip()

# Merge datasets
merged_data = usa_covid_data_cleaned.merge(state_death_data, on='State', how='inner')
merged_data = merged_data.merge(election_2020_cleaned, on='State', how='inner')

# Calculate additional metrics
merged_data['Death Rate (%)'] = (merged_data['Total Deaths'] / merged_data['Population']) * 100

# Create bubble plot: Republican Votes vs Total Deaths
fig1 = px.scatter(
    merged_data,
    x='Republican Votes (%)',
    y='Total Deaths',
    size='Population',  # Bubble size based on population
    hover_name='State',
    title='Bubble Plot: Republican Votes (%) vs COVID-19 Total Deaths',
    labels={'Republican Votes (%)': 'Republican Votes (%)', 'Total Deaths': 'COVID-19 Total Deaths'},
)

# Create bubble plot: Democratic Votes vs Total Deaths
fig2 = px.scatter(
    merged_data,
    x='Democratic Votes (%)',
    y='Total Deaths',
    size='Population',  # Bubble size based on population
    hover_name='State',
    title='Bubble Plot: Democratic Votes (%) vs COVID-19 Total Deaths',
    labels={'Democratic Votes (%)': 'Democratic Votes (%)', 'Total Deaths': 'COVID-19 Total Deaths'},
)

# Show plots
fig1.show()
fig2.show()


In [14]:
import plotly.io as pio

# Save Republican Votes vs Total Deaths plot to HTML
pio.write_html(fig1, file='Republican_Votes_vs_Total_Deaths(2020).html', auto_open=True)

# Save Democratic Votes vs Total Deaths plot to HTML
pio.write_html(fig2, file='Democratic_Votes_vs_Total_Deaths(2020).html', auto_open=True)
