In [1]:
import requests
import pandas
import json
import matplotlib.pyplot as plt
from scipy.stats import linregress

%matplotlib tk

vac_data_url = "https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/us_state_vaccinations.csv"
party_data_path = "affiliation.csv"

In [2]:
vac_data = pandas.read_csv(vac_data_url) # load vaccine data
most_recent_date = vac_data.loc[vac_data['location'] == 'Alabama']['date'].iloc[-1] # get current date (last entry for any state)
vac_data = vac_data.loc[vac_data['date'] == most_recent_date].reset_index()

In [3]:
states = ["Alabama","Alaska","Arizona","Arkansas","California","Colorado", "Connecticut","Delaware","Florida","Georgia","Hawaii","Idaho","Illinois", "Indiana","Iowa","Kansas","Kentucky","Louisiana","Maine","Maryland", "Massachusetts","Michigan","Minnesota","Mississippi","Missouri","Montana", "Nebraska","Nevada","New Hampshire","New Jersey","New Mexico","New York", "North Carolina","North Dakota","Ohio","Oklahoma","Oregon","Pennsylvania", "Rhode Island","South Carolina","South Dakota","Tennessee","Texas","Utah", "Vermont","Virginia","Washington","West Virginia","Wisconsin","Wyoming"]
vac_data.loc[vac_data['location'] == 'New York State', 'location'] = 'New York' # fix location being called 'New York State' in vaccine data set
vac_data = vac_data.loc[vac_data['location'].isin(states)]

In [4]:
party_data = pandas.read_csv(party_data_path)

In [5]:
vac_data['party'] = vac_data['location'].map(lambda state: party_data.loc[party_data['State'] == state]['Classification'].values[0]) # add party classification
vac_data['dem_advantage'] = vac_data['location'].map(lambda state: party_data.loc[party_data['State'] == state]['Dem Advantage'].values[0]) # add party affiliation
vac_data.sort_values(by='people_vaccinated_per_hundred', inplace=True, ignore_index=True)

In [6]:
# matplotlib visualization
colors = {
    'Solid Rep': "#de0100",
    'Lean Rep': "#ff8080",
    'Competitive': "#444444",
    'Lean Dem': "#8d83fb",
    'Solid Dem': "#1405BD"
}

xs = vac_data["dem_advantage"]
ys = vac_data['people_vaccinated_per_hundred']
plt.scatter(xs, ys, c=vac_data['party'].map(lambda p : colors[p]), s=100)

m, b, r, p, stderror = linregress(vac_data["dem_advantage"], vac_data['people_vaccinated_per_hundred'])
plt.plot(xs, [(m * x) + b for x in xs])

plt.show()

In [43]:
vac_data.to_csv("vac_data.csv") # export data for better visualizations