In [44]:
import pandas as pd
from datetime import datetime
from datetime import timedelta

In [45]:
def datetime_to_str(dtime):
    return '{0}-{1}-{2}'.format(str(dtime.month).zfill(2), str(dtime.day).zfill(2), str(dtime.year).zfill(4))
# datetime_to_str(datetime.now())

def datetime_to_str2(dtime):
    return '{0}{1}{2}'.format(str(dtime.year).zfill(4), str(dtime.month).zfill(2), str(dtime.day).zfill(2))
# datetime_to_str2(datetime.now())

In [46]:
ndays=3
now = datetime.now() - timedelta(days=1)

cols = [
    'Total Distributed',
    'Total Administered',
    'Distributed per 100K',
    'Administered per 100K',
    'People with 1+ Doses',
    'People with 1+ Doses per 100K',
    'People with 2 Doses',
    'People with 2 Doses Per 100K',
]

def get_vaccination_data(dtime, col):
    exclude = [
        'Diamond Princess',
        'Grand Princess',
        'American Samoa',
        'Virgin Islands',
        'Northern Mariana Islands'
    ]
    file = 'covid19_vaccinations_in_the_united_states-{0}.csv'.format(datetime_to_str2(dtime))
    df = pd.read_csv('vaccinations/{0}'.format(file),
                     usecols=[ 'State/Territory/Federal Entity', col]).fillna(0)
#     if dtime >= datetime(2021, 2, 5):
#         df.rename(columns={
#                     'State/Territory/Federal Entity': 'State', '{0}'.format(col): datetime_to_str(dtime),
#                     'Total Delivered': 'Total Distributed'
#                   },
#                   inplace=True)
#     else:
    df.rename(columns={ 'State/Territory/Federal Entity': 'State', '{0}'.format(col): datetime_to_str(dtime) }, inplace=True)
    df.set_index(['State'], inplace=True)
    return df

def get_vaccination_timeseries(dtime, col, ndays=5):
    df = get_vaccination_data(dtime, col)
    for days in range(1, ndays+1):
        df_2 = get_vaccination_data(dtime - timedelta(days=days), col)
        df = pd.concat([df_2, df], axis=1, join='inner')
    return df

In [47]:
df = get_vaccination_data(now, col='Total Doses Delivered')
df

Unnamed: 0_level_0,02-28-2021
State,Unnamed: 1_level_1
Alaska,342535
Alabama,1402280
Arkansas,892660
American Samoa,29450
Arizona,2148015
...,...
Vermont,206910
Washington,2245010
Wisconsin,1588665
West Virginia,605255


In [48]:
df.sum()

02-28-2021    101360065
dtype: int64

In [49]:
df = get_vaccination_data(now, col='Total Doses Administered by State where Administered')
df

Unnamed: 0_level_0,02-28-2021
State,Unnamed: 1_level_1
Alaska,268406
Alabama,948393
Arkansas,661572
American Samoa,20338
Arizona,1849259
...,...
Vermont,168050
Washington,1800030
Wisconsin,1500923
West Virginia,534053


In [50]:
df.sum()

02-28-2021    80590002
dtype: int64

In [51]:
df = get_vaccination_timeseries(now, 'Total Doses Delivered', ndays)
df

Unnamed: 0_level_0,02-25-2021,02-26-2021,02-27-2021,02-28-2021
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alaska,332415,342535,342535,342535
Alabama,1380380,1402180,1402280,1402280
Arkansas,861230,892660,892660,892660
American Samoa,29450,29450,29450,29450
Arizona,2143135,2148015,2148015,2148015
...,...,...,...,...
Vermont,206910,206910,206910,206910
Washington,2166300,2245010,2245010,2245010
Wisconsin,1588265,1588665,1588665,1588665
West Virginia,571155,605255,605255,605255


In [52]:
df = get_vaccination_timeseries(now, 'Total Doses Administered by State where Administered', ndays)
df

Unnamed: 0_level_0,02-25-2021,02-26-2021,02-27-2021,02-28-2021
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alaska,254334,259538,264741,268406
Alabama,893859,931090,942502,948393
Arkansas,597990,622665,649311,661572
American Samoa,20336,20338,20338,20338
Arizona,1671294,1728239,1792447,1849259
...,...,...,...,...
Vermont,151538,156929,162558,168050
Washington,1609190,1666839,1729208,1800030
Wisconsin,1359943,1421435,1470570,1500923
West Virginia,501913,526059,532295,534053


In [53]:
df = get_vaccination_timeseries(now, 'People with 1+ Doses by State of Residence', ndays)
df

Unnamed: 0_level_0,02-25-2021,02-26-2021,02-27-2021,02-28-2021
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alaska,161595,163739,166735,168069
Alabama,627063,643208,649745,653309
Arkansas,394906,405877,419502,425567
American Samoa,12351,12354,12355,12355
Arizona,1168054,1195110,1220706,1244879
...,...,...,...,...
Vermont,98746,101529,104583,107238
Washington,1081544,1105687,1134837,1166618
Wisconsin,909389,937388,958333,971107
West Virginia,302863,314571,317008,318168


In [54]:
df = get_vaccination_timeseries(now, 'People with 2 Doses by State of Residence', ndays)
df

Unnamed: 0_level_0,02-25-2021,02-26-2021,02-27-2021,02-28-2021
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alaska,92233,95331,97554,99916
Alabama,286095,307868,313290,316073
Arkansas,200532,214402,227450,233685
American Samoa,8079,8079,8079,8080
Arizona,462074,490688,525426,555881
...,...,...,...,...
Vermont,49372,51631,54139,56934
Washington,515850,549135,582467,621171
Wisconsin,439535,472886,500342,517730
West Virginia,189352,201077,205102,206179


In [55]:
df = get_vaccination_timeseries(now, 'Total Doses Delivered', ndays).diff(axis=1).fillna(0)
df

Unnamed: 0_level_0,02-25-2021,02-26-2021,02-27-2021,02-28-2021
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alaska,0.0,10120.0,0.0,0.0
Alabama,0.0,21800.0,100.0,0.0
Arkansas,0.0,31430.0,0.0,0.0
American Samoa,0.0,0.0,0.0,0.0
Arizona,0.0,4880.0,0.0,0.0
...,...,...,...,...
Vermont,0.0,0.0,0.0,0.0
Washington,0.0,78710.0,0.0,0.0
Wisconsin,0.0,400.0,0.0,0.0
West Virginia,0.0,34100.0,0.0,0.0


In [56]:
df = get_vaccination_timeseries(now, 'Total Doses Administered by State where Administered', ndays).diff(axis=1).fillna(0)
df

Unnamed: 0_level_0,02-25-2021,02-26-2021,02-27-2021,02-28-2021
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alaska,0.0,5204.0,5203.0,3665.0
Alabama,0.0,37231.0,11412.0,5891.0
Arkansas,0.0,24675.0,26646.0,12261.0
American Samoa,0.0,2.0,0.0,0.0
Arizona,0.0,56945.0,64208.0,56812.0
...,...,...,...,...
Vermont,0.0,5391.0,5629.0,5492.0
Washington,0.0,57649.0,62369.0,70822.0
Wisconsin,0.0,61492.0,49135.0,30353.0
West Virginia,0.0,24146.0,6236.0,1758.0


In [57]:
df = get_vaccination_timeseries(now, 'People with 1+ Doses by State of Residence', ndays).diff(axis=1).fillna(0)
df

Unnamed: 0_level_0,02-25-2021,02-26-2021,02-27-2021,02-28-2021
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alaska,0.0,2144.0,2996.0,1334.0
Alabama,0.0,16145.0,6537.0,3564.0
Arkansas,0.0,10971.0,13625.0,6065.0
American Samoa,0.0,3.0,1.0,0.0
Arizona,0.0,27056.0,25596.0,24173.0
...,...,...,...,...
Vermont,0.0,2783.0,3054.0,2655.0
Washington,0.0,24143.0,29150.0,31781.0
Wisconsin,0.0,27999.0,20945.0,12774.0
West Virginia,0.0,11708.0,2437.0,1160.0


In [58]:
df = get_vaccination_timeseries(now, 'People with 2 Doses by State of Residence', ndays).diff(axis=1).fillna(0)
df

Unnamed: 0_level_0,02-25-2021,02-26-2021,02-27-2021,02-28-2021
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alaska,0.0,3098.0,2223.0,2362.0
Alabama,0.0,21773.0,5422.0,2783.0
Arkansas,0.0,13870.0,13048.0,6235.0
American Samoa,0.0,0.0,0.0,1.0
Arizona,0.0,28614.0,34738.0,30455.0
...,...,...,...,...
Vermont,0.0,2259.0,2508.0,2795.0
Washington,0.0,33285.0,33332.0,38704.0
Wisconsin,0.0,33351.0,27456.0,17388.0
West Virginia,0.0,11725.0,4025.0,1077.0


In [59]:
df = get_vaccination_timeseries(now, 'Total Doses Delivered', ndays).sum(axis=0).diff().fillna(0)
df

02-25-2021          0.0
02-26-2021    2143220.0
02-27-2021        200.0
02-28-2021          0.0
dtype: float64

In [60]:
df = get_vaccination_timeseries(now, 'Total Doses Administered by State where Administered', ndays).sum(axis=0).diff().fillna(0)
df

02-25-2021          0.0
02-26-2021    2514956.0
02-27-2021    2537774.0
02-28-2021    1733826.0
dtype: float64

In [61]:
df = get_vaccination_timeseries(now, 'Total Doses Delivered', ndays).diff(axis=1).fillna(0)
df.loc['Connecticut']

02-25-2021       0.0
02-26-2021    7500.0
02-27-2021       0.0
02-28-2021       0.0
Name: Connecticut, dtype: float64

In [62]:
df = get_vaccination_timeseries(now, 'Total Doses Administered by State where Administered', ndays).diff(axis=1).fillna(0)
df.loc['Connecticut']

02-25-2021        0.0
02-26-2021    26058.0
02-27-2021    24256.0
02-28-2021    26838.0
Name: Connecticut, dtype: float64

In [63]:
df = get_vaccination_timeseries(now, 'People with 1+ Doses by State of Residence', ndays).diff(axis=1).fillna(0)
df.loc['Connecticut']

02-25-2021        0.0
02-26-2021    21188.0
02-27-2021    19341.0
02-28-2021    21712.0
Name: Connecticut, dtype: float64

In [64]:
df = get_vaccination_timeseries(now, 'People with 2 Doses by State of Residence', ndays).diff(axis=1).fillna(0)
df.loc['Connecticut']

02-25-2021       0.0
02-26-2021    4840.0
02-27-2021    4973.0
02-28-2021    4924.0
Name: Connecticut, dtype: float64