In [1]:
import pandas as pd
import altair as alt

election_data = pd.read_csv('us_election_2020.csv')
covid_data = pd.read_csv('covid-cases.csv')

# Convert the date column to datetime format
covid_data['date'] = pd.to_datetime(covid_data['date'], format='%m/%d/%Y')

# Filter data for years 2021-2023, remove California data, and add a year-month column
covid_data_filtered = covid_data[(covid_data['date'].dt.year >= 2021) & (covid_data['date'].dt.year <= 2023)]
covid_data_filtered = covid_data_filtered[covid_data_filtered['state'] != 'CA']
covid_data_filtered = covid_data_filtered.copy()  # Avoid SettingWithCopyWarning
covid_data_filtered['year_month'] = covid_data_filtered['date'].dt.to_period('M').dt.to_timestamp()

# Drop duplicate counties to make sure each county is counted only once per month
covid_data_filtered_unique = covid_data_filtered.drop_duplicates(subset=['state', 'county', 'year_month'])

# Aggregate the COVID data by year-month and state to get new deaths for unique counties only
covid_deaths_by_state_month = covid_data_filtered_unique.groupby(['state', 'year_month']).agg({'New deaths': 'sum'}).reset_index()

# Merge with election data on state
merged_data_monthly = pd.merge(covid_deaths_by_state_month, election_data, left_on='state', right_on='state_abr')

# Drop unnecessary columns based on the columns actually present in merged_data_monthly
merged_data_monthly = merged_data_monthly[['state_abr', 'year_month', 'New deaths', 'trump_pct', 'biden_pct', 'trump_win', 'biden_win']]
merged_data_monthly = merged_data_monthly.rename(columns={'state_abr': 'state', 'New deaths': 'new_deaths'})

# Create a selection for user interaction
selection = alt.selection_multi(fields=['trump_win'], bind='legend')

# Create a scatter plot using Altair with interactive filtering and a customized legend
scatter_plot_monthly = alt.Chart(merged_data_monthly).mark_circle(size=60).encode(
    x=alt.X('year_month:T', title='Year-Month'),
    y=alt.Y('new_deaths:Q', title='New COVID-19 Deaths', scale=alt.Scale(domain=[0, 2500])),
    color=alt.Color('trump_win:N', scale=alt.Scale(domain=[1, 0], range=['red', 'blue']), title='Election Winner', legend=alt.Legend(
        title="Election Winner", labelExpr="datum.label == '1' ? 'Trump (Red)' : 'Biden (Blue)'")),
    opacity=alt.condition(selection, alt.value(1), alt.value(0.2)),
    tooltip=['state:N', 'year_month:T', 'new_deaths:Q', 'trump_pct:Q', 'biden_pct:Q']
).properties(
    width=800,
    height=400
).add_selection(
    selection
)

# Create line plots for Trump and Biden wins with interactive filtering
line_trump = alt.Chart(merged_data_monthly[merged_data_monthly['trump_win'] == 1]).mark_line(color='red', size=3).encode(
    x=alt.X('year_month:T', title='Year-Month'),
    y=alt.Y('mean(new_deaths):Q', title='New COVID-19 Deaths', scale=alt.Scale(domain=[0, 2500])),
    opacity=alt.condition(selection, alt.value(1), alt.value(0.2))
)

line_biden = alt.Chart(merged_data_monthly[merged_data_monthly['biden_win'] == 1]).mark_line(color='blue', size=3).encode(
    x=alt.X('year_month:T', title='Year-Month'),
    y=alt.Y('mean(new_deaths):Q', title='New COVID-19 Deaths', scale=alt.Scale(domain=[0, 2500])),
    opacity=alt.condition(selection, alt.value(1), alt.value(0.2))
)

# Layer the lines and scatter plot together
layered_chart = (scatter_plot_monthly + line_trump + line_biden).properties(
    title='New COVID-19 Deaths by State (Monthly, 2021-2023)'
)

# Display the layered chart
layered_chart


FileNotFoundError: [Errno 2] No such file or directory: 'covid-cases.csv'

In [None]:

election_data = pd.read_csv('/content/us_election_2020.csv')
covid_data = pd.read_csv('/content/Provisional_COVID-19_Deaths_by_County__and_Race_and_Hispanic_Origin_20241120.csv')

# Convert the date column to datetime format (assuming 'Data as of' column exists)
covid_data['Data as of'] = pd.to_datetime(covid_data['Data as of'], format='%m/%d/%Y')

# Extract cumulative COVID-19 deaths for the latest available date for each state
latest_date = covid_data['Data as of'].max()
covid_latest = covid_data[covid_data['Data as of'] == latest_date]

# Aggregate COVID deaths by state
covid_deaths_by_state = covid_latest.groupby('State')['COVID-19 Deaths'].sum().reset_index()

# Merge with election data on state
merged_data = pd.merge(covid_deaths_by_state, election_data, left_on='State', right_on='state_abr')

# Keep necessary columns and rename for clarity
merged_data = merged_data[['state_abr', 'COVID-19 Deaths', 'trump_win', 'biden_win']]
merged_data = merged_data.rename(columns={'state_abr': 'state', 'COVID-19 Deaths': 'cumulative_deaths'})

# Create boxplot and add individual points on top for better visibility
boxplot = alt.Chart(merged_data).mark_boxplot(size=60).encode(
    x=alt.X('trump_win:N', title='Election Winner', axis=alt.Axis(labels=True, labelExpr="datum.value == 1 ? 'Trump' : 'Biden'")),
    y=alt.Y('cumulative_deaths:Q', title='Cumulative COVID-19 Deaths'),
    color=alt.Color('trump_win:N', scale=alt.Scale(domain=[1, 0], range=['red', 'blue']), title='Election Winner', legend=alt.Legend(
        title="Election Winner", labelExpr="datum.label == '1' ? 'Trump (Red)' : 'Biden (Blue)'"))
).properties(
    width=400,
    height=400
)

# Add individual points for each state with hover capabilities
points = alt.Chart(merged_data).mark_circle(size=80, opacity=0.6).encode(
    x=alt.X('trump_win:N'),
    y=alt.Y('cumulative_deaths:Q'),
    color=alt.Color('trump_win:N', scale=alt.Scale(domain=[1, 0], range=['red', 'blue'])),
    tooltip=['state:N', 'cumulative_deaths:Q']
)

# Layer the boxplot and points together
final_chart = (boxplot + points).properties(
    title='Cumulative COVID-19 Deaths by Election Winner (Trump vs. Biden)'
)

# Display the final chart
final_chart


