In [1]:
import pandas as pd
import datetime
import matplotlib as plt
import imageio
import numpy as np
import os
from matplotlib.lines import Line2D
%matplotlib inline

# Vaccine data

In [2]:
vaccine_df = pd.read_csv('Vaccines_County.csv')

#creating a combined County Code - Date field on which I can merge the two datasets

vaccine_df['formatted_date'] = vaccine_df['Date'].str[6:]+"-"+vaccine_df['Date'].str[0:2]+"-"+vaccine_df['Date'].str[3:5]

vaccine_df['FIPS_Date'] = vaccine_df['formatted_date'] + " ; " + vaccine_df['FIPS']

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


# Case rate data

In [6]:
case_df = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/rolling-averages/us-counties.csv')

#formatting county code to remove GEO prefix
case_df["FIPS"] = case_df["geoid"].str[4:]

case_df['FIPS_Date'] = case_df['date']+" ; "+case_df['FIPS']

case_df['date'] = pd.to_datetime(case_df['date'])

#vaccinations started in January, so removing case data prior to Jan 1, 2021
case_df = case_df.loc[case_df['date'] > datetime.datetime(2020,12,31)]


# Combine case and Vaccination Info 

In [7]:
caseVax_df = pd.merge(case_df, vaccine_df, on = "FIPS_Date", how = "inner")

#created field with matching column name to merge with election data
caseVax_df['county_fips'] = caseVax_df['FIPS_x']

# Election Data

In [8]:
election_df = pd.read_csv('elections.csv')

election_df = election_df.loc[election_df['year'] == 2020]

#merge requires matching datatypes, so converting to string for merging
election_df['county_fips'] = election_df['county_fips'].astype(str)

#dropping decimal
election_df['county_fips'] = election_df['county_fips'].str[:5]

In [9]:
total_pivot = election_df.pivot_table(index = 'county_fips', values = "totalvotes", aggfunc = 'max')

election_pivot = election_df.pivot_table(index = 'county_fips', columns = ["candidate"] , values = "candidatevotes", aggfunc = 'max')

In [10]:
election_merge = pd.merge(election_pivot, total_pivot, how = "inner", on = 'county_fips')

election_merge['dt_%'] = election_merge['DONALD J TRUMP']/election_merge['totalvotes']

election_merge.reset_index(inplace = True)

# Demographic data

In [11]:
race_df = pd.read_csv('Race_Data.txt',delimiter = "\t")

race_pivot = race_df.pivot_table(index = 'County Code', columns = ["Race"] , values = "Population", aggfunc = 'max')

race_pivot.fillna(0, inplace = True)

In [12]:
total_pop = race_df.pivot_table(index = 'County Code', values = "Population", aggfunc = 'sum')

In [13]:
race_pop_merge = pd.merge(race_pivot, total_pop, on = "County Code", how = "inner")

race_pop_merge['black_pop%'] = race_pop_merge['Black or African American']/race_pop_merge['Population']

race_pop_merge.reset_index(inplace = True)

race_pop_merge['county_fips'] = race_pop_merge['County Code'].astype(str)

# Merge all datasets (vaccine, case, election, demographic)

In [14]:
temp_combine = pd.merge(caseVax_df,election_merge, on = "county_fips", how = "inner")

final_combine = pd.merge(temp_combine,race_pop_merge,on = "county_fips", how = "inner")

# Create Values for Plotting

In [45]:
final_combine['col'] = final_combine['dt_%']

final_combine['col'].loc[final_combine['dt_%']>.5] = 'red'

final_combine['col'].loc[final_combine['col'] != 'red'] = 'blue'


#only creating alternate color for high black populations in counties that voted for Biden
final_combine['col'].loc[(final_combine['col'] == "blue") & (final_combine['black_pop%']>.134)] = 'forestgreen'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


# Create GIF using ImageIO

In [59]:
filenames = []
plt.style.use('fivethirtyeight')
plt.rcParams.update({'font.size': 14})

#creates the key for the chart
legend_elements = [Line2D([0], [0], marker='o', color='w', label='Trump',
                          markerfacecolor='red', markersize=9),
                  Line2D([0], [0], marker='o', color='w', label='Biden - Large Black Pop',
                          markerfacecolor='blue', markersize=9),
                  Line2D([0], [0], marker='o', color='w', label='Biden - Small Black Pop',
                          markerfacecolor='forestgreen', markersize=9)]

#Used range of 209 for looping to iterate over relevent dates
for i in range(209):
    day = datetime.datetime(2021,1,1) + datetime.timedelta(i)
    
    
    test_data = final_combine.loc[final_combine['date'] == day]
    
    #plotting one day at a time
    plot = test_data.plot.scatter(x = 'Administered_Dose1_Recip_18PlusPop_Pct', 
                           y = 'cases_avg_per_100k', 
                           figsize = (15,10), 
                           ylim = (0,200),
                           xlim = (0,100),
                           xlabel = "% of Population Vaccinated",
                           ylabel = "Cases per 100K",
                           c = test_data['col'],
                           alpha = .4)
    
    #additional plot elements, including citation, titles, and adding the legend
    plt.pyplot.annotate('Data collected from NYT(Cases), CDC(Vaccination, Population), and MIT(Election) \nLarge black population defined as larger than the national average (13.4%)',
                 (0,0), (620,-20),
                fontsize = 9,
                xycoords = 'axes fraction',
                textcoords = 'offset points',
                va = 'top',
                style='italic')
    plt.pyplot.suptitle('Covid Vaccination and Case rate by US County', fontsize = 20, fontweight = 'bold', y = .95)
    plt.pyplot.title(day.strftime("%m/%d/%Y"), fontsize = 15)
    plt.pyplot.legend(handles=legend_elements, title = "2020 Election Winner", loc = 'upper right')

    
    filename = 'Day_'+str(i)+'.png'
    
    #creating two frames for each chart to slow down GIF speed
    for j in range(2):
        filenames.append(filename)
    
    #creating several frames of last graph to pause on last day
    if i == 208:
        for k in range(60):
            filenames.append(filename)
            
    
    plt.pyplot.savefig("vax_gif_frames/"+filename)
    plt.pyplot.close()

#use ImageIO to create gif from list of files created in loop
with imageio.get_writer('mygif.gif', mode='I') as writer:
    for filename in filenames:
        image = imageio.imread("vax_gif_frames/"+filename)
        writer.append_data(image)
        
# Remove files
for filename in set(filenames):
    os.remove("vax_gif_frames/"+filename)