In [298]:
import pandas as pd
import datetime
import matplotlib as plt
import imageio
import numpy as np
import os
from matplotlib.lines import Line2D
%matplotlib inline

# Vaccine data

In [5]:
vaccine_df = pd.read_csv('Vaccines_County.csv')

vaccine_df['formatted_date'] = vaccine_df['Date'].str[6:]+"-"+vaccine_df['Date'].str[0:2]+"-"+vaccine_df['Date'].str[3:5]

vaccine_df['FIPS_Date'] = vaccine_df['formatted_date'] + " ; " + vaccine_df['FIPS']

vaccine_df.head()

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


# Case rate data

In [10]:
case_df = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/rolling-averages/us-counties.csv')

case_df["FIPS"] = case_df["geoid"].str[4:]

case_df['FIPS_Date'] = case_df['date']+" ; "+case_df['FIPS']

case_df['date'] = pd.to_datetime(case_df['date'])

case_df = case_df.loc[case_df['date'] > datetime.datetime(2020,12,31)]

case_df['date'].min()

# Combine case and Vaccination Info 

In [15]:
combined_df = pd.merge(case_df, vaccine_df, on = "FIPS_Date", how = "inner")

combined_df['county_fips'] = combined_df['FIPS_x']

combined_df['county_fips'] = combined_df['county_fips'].astype(int)

combined_df.to_csv('combined_case_vaccine_data.csv')

# Election Data

In [164]:
election_df = pd.read_csv('elections.csv')

election_df = election_df.loc[election_df['year'] == 2020]

election_df['county_fips'] = election_df['county_fips'].astype(str)

election_df['county_fips'] = election_df['county_fips'].str[:5]

In [174]:
total_pivot = election_df.pivot_table(index = 'county_fips', values = "totalvotes", aggfunc = 'max')

election_pivot = election_df.pivot_table(index = 'county_fips', columns = ["candidate"] , values = "candidatevotes", aggfunc = 'max')

In [176]:
election_merge = pd.merge(election_pivot, total_pivot, how = "inner", on = 'county_fips')

election_merge['dt_%'] = election_merge['DONALD J TRUMP']/election_merge['totalvotes']

election_merge.reset_index(inplace = True)

# Demographic data

In [None]:
race_df = pd.read_csv('Race_Data.txt',delimiter = "\t")

race_pivot = race_df.pivot_table(index = 'County Code', columns = ["Race"] , values = "Population", aggfunc = 'max')

race_pivot.fillna(0, inplace = True)

In [None]:
total_pop = race_df.pivot_table(index = 'County Code', values = "Population", aggfunc = 'sum')

race_pop_merge = pd.merge(race_pivot, total_pop, on = "County Code", how = "inner")

In [329]:
race_pop_merge['black_pop%'] = race_pop_merge['Black or African American']/race_pop_merge['Population']

race_pop_merge.reset_index(inplace = True)

race_pop_merge['county_fips'] = race_pop_merge['County Code']

# Merge all datasets (vaccine, case, election, demographic)

In [335]:
temp_combine = pd.merge(combined_df,election_merge, on = "county_fips", how = "inner")

final_combine = pd.merge(temp_combine,race_pop_merge,on = "county_fips", how = "inner")

In [339]:
final_combine.to_csv("final_combine.csv")

# Create Values for Plotting

In [None]:
final_combine['col'] = final_combine['dt_%']

final_combine['col'].loc[final_combine['dt_%']>.5] = 'red'

final_combine['col'].loc[final_combine['col'] != 'red'] = 'blue'

final_combine['col'].loc[(final_combine['col'] == "blue") & (final_combine['black_pop%']>.1)] = 'k'

In [394]:
# subset of dataset that contains most polarized counties (1st and 4th quartile for DT vote %)
pop_final_combine = final_combine.loc[(final_combine['dt_%']>0.754582) | (final_combine['dt_%']<0.446836)]

# Created a test dataframe to try out different plot settings

In [399]:
filenames = []
plt.style.use('fivethirtyeight')
plt.rcParams.update({'font.size': 14})

legend_elements = [Line2D([0], [0], marker='o', color='w', label='Trump',
                          markerfacecolor='r', markersize=9),
                  Line2D([0], [0], marker='o', color='w', label='Biden',
                          markerfacecolor='b', markersize=9),
                  Line2D([0], [0], marker='o', color='w', label='Biden - Black Pop > 10%',
                          markerfacecolor='k', markersize=9),]


for i in range(209):
    day = datetime.datetime(2021,1,1) + datetime.timedelta(i)
    
    
    test_data = pop_final_combine.loc[pop_final_combine['date'] == day]
    
    plot = test_data.plot.scatter(x = 'Administered_Dose1_Recip_18PlusPop_Pct', 
                           y = 'cases_avg_per_100k', 
                           figsize = (10,10), 
                           ylim = (0,200),
                           xlim = (0,100),
                           xlabel = "% of Population Vaccinated",
                           ylabel = "Cases per 100K",
                           c = test_data['col'],
                           alpha = .4)
    plt.pyplot.suptitle('Covid Vaccination and Case rate by US County', fontsize = 20, fontweight = 'bold', y = .95)
    plt.pyplot.title(day.strftime("%m/%d/%Y"), fontsize = 15)
    plt.pyplot.legend(handles=legend_elements, title = "2020 Election Winner", loc = 'upper right')

    
    filename = 'Day_'+str(i)+'.png'
    
    for j in range(2):
        filenames.append(filename)
        
    if i == 208:
        for k in range(60):
            filenames.append(filename)
            
    
    plt.pyplot.savefig("vax_gif_frames/"+filename)
    plt.pyplot.close()

with imageio.get_writer('mygif.gif', mode='I') as writer:
    for filename in filenames:
        image = imageio.imread("vax_gif_frames/"+filename)
        writer.append_data(image)
        
# Remove files
for filename in set(filenames):
    os.remove("vax_gif_frames/"+filename)