In [2]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from scipy import optimize
import statsmodels.api as sm
from statsmodels.tsa.ar_model import AutoReg, ar_select_order

plt.rc("figure", figsize=(16,8))
plt.rc("font", size=14)
from datetime import datetime

In [3]:
#import vaccine distro
df = pd.read_csv (r'https://raw.githubusercontent.com/govex/COVID-19/master/data_tables/vaccine_data/raw_data/vaccine_data_us_state_timeline.csv')

#import state populations
state_pops = pd.read_csv(r'state_thresholds.csv')


In [4]:
#clean and transpose vaccine distro 

df['date'] = pd.to_datetime(df['date'])
doses_admin = df.pivot(index='Province_State', columns='date',values='doses_admin_total')
d_a = doses_admin.transpose()
d_a.head()

Province_State,Alabama,Alaska,American Samoa,Arizona,Arkansas,Bureau of Prisons,California,Colorado,Connecticut,Delaware,...,Texas,Utah,Vermont,Veterans Health Administration,Virgin Islands,Virginia,Washington,West Virginia,Wisconsin,Wyoming
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-12-10,,,,,,,,,,,...,,,,,,,,,,
2020-12-11,,,,,,,,,,,...,,,,,,,,,,
2020-12-12,,,,,,,,,,,...,,,,,,,,,,
2020-12-13,,,,,,,,,,,...,,,,,,,,,,
2020-12-14,,,,,,,,,,,...,,,,,,,,,,


In [5]:
#get list of states to loop through
states = state_pops['State']

#set up empty dataframes for the loop
immunity = pd.DataFrame(columns=['vaccinated', 'state'], index=pd.to_datetime([]))
vacc = pd.DataFrame(columns=['vaccinated', 'state'], index=pd.to_datetime([]))

In [6]:
#get date for herd immunity from states

immunity = pd.DataFrame(columns=['vaccinated', 'state'], index=pd.to_datetime([]))

for state in states:
    d_state = d_a[state]
    dates = pd.date_range('2021-01-20', periods=len(d_state), freq="D")
    endog = pd.Series(d_state, index=dates)
    endog.index = pd.DatetimeIndex(endog.index).to_period('D')
    endog = endog.dropna()
    selection_res = ar_select_order(endog, maxlag=1, old_names=True, glob=True, seasonal=False)
    pandas_ar_res = selection_res.model.fit()
    pred = pandas_ar_res.predict(start='2021-01-20', end='2045-12-31').round(0)
    threshold = state_pops.loc[state_pops['State'] == state, 'Threshold'].iloc[0]
    for num in pred:
        if(num > threshold):
            vacc = pd.DataFrame(columns=['vaccinated', 'state'], index=pd.to_datetime([]))
            vaccination_day = pred.loc[pred==num]
            vacc['vaccinated'] = vaccination_day
            vacc['state'] = state
            immunity = immunity.append(vacc)
            break

In [7]:
immunity

Unnamed: 0,vaccinated,state
2021-06-10,10953178.0,Arizona
2021-07-28,59484230.0,California
2021-04-27,8836256.0,Colorado
2021-12-27,5360548.0,Connecticut
2021-05-12,32246303.0,Florida
2021-05-23,2736482.0,Idaho
2021-05-03,19411867.0,Illinois
2021-08-03,10123764.0,Indiana
2021-04-24,4886653.0,Iowa
2021-05-26,4451083.0,Kansas


In [8]:
immunity.to_csv(r'predicted_immunity.csv', index=True, header=None)