In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from ipywidgets import widgets

### State GDPs per Capita

- gdp: Measured in millions of dollars

In [2]:
state_gdp = pd.read_csv('./Data/csvData.csv')

In [3]:
state_gdp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   State         50 non-null     object 
 1   gdpPerCapita  50 non-null     float64
 2   gdp           50 non-null     int64  
dtypes: float64(1), int64(1), object(1)
memory usage: 1.3+ KB


In [4]:
state_gdp.sort_values(['gdpPerCapita', 'State'], ascending=False)

Unnamed: 0,State,gdpPerCapita,gdp
0,New York,0.0882,1701399
1,Massachusetts,0.0833,575635
2,Connecticut,0.0787,279653
3,Delaware,0.0773,76537
4,California,0.0762,3018337
5,Alaska,0.0757,54851
6,Washington,0.074,576624
7,North Dakota,0.0723,55657
8,New Jersey,0.0715,634721
9,Illinois,0.07,879947


### U.S. Vaccinations

In [5]:
us_vacc = pd.read_csv('./Data/us_state_vaccinations.csv', parse_dates=['date'])

- location: name of the state or federal entity.
- date: date of the observation.
- total_vaccinations: total number of doses administered. This is counted as a single dose, and may not equal the total number of people vaccinated, depending on the specific dose regime (e.g. people receive multiple doses). If a person receives one dose of the vaccine, this metric goes up by 1. If they receive a second dose, it goes up by 1 again.
- total_vaccinations_per_hundred: total_vaccinations per 100 people in the total population of the state.
- daily_vaccinations_raw: daily change in the total number of doses administered. It is only calculated for consecutive days. This is a raw measure provided for data checks and transparency, but we strongly recommend that any analysis on daily vaccination rates be conducted using daily_vaccinations instead.
- daily_vaccinations: new doses administered per day (7-day smoothed). For countries that don't report data on a daily basis, we assume that doses changed equally on a daily basis over any periods in which no data was reported. This produces a complete series of daily figures, which is then averaged over a rolling 7-day window. An example of how we perform this calculation can be found here.
- daily_vaccinations_per_million: daily_vaccinations per 1,000,000 people in the total population of the state.
- people_vaccinated: total number of people who received at least one vaccine dose. If a person receives the first dose of a 2-dose vaccine, this metric goes up by 1. If they receive the second dose, the metric stays the same.
- people_vaccinated_per_hundred: people_vaccinated per 100 people in the total population of the state.
- people_fully_vaccinated: total number of people who received all doses prescribed by the vaccination protocol. If a person receives the first dose of a 2-dose vaccine, this metric stays the same. If they receive the second dose, the metric goes up by 1.
- people_fully_vaccinated_per_hundred: people_fully_vaccinated per 100 people in the total population of the state.
- total_distributed: cumulative counts of COVID-19 vaccine doses recorded as shipped in CDC's Vaccine Tracking System.
- total_distributed_per_hundred: cumulative counts of COVID-19 vaccine doses recorded as shipped in CDC's Vaccine Tracking System per 100 people in the total population of the state.
- share_doses_used: share of vaccination doses administered among those recorded as shipped in CDC's Vaccine Tracking System.

In [6]:
us_vacc.head()

Unnamed: 0,date,location,total_vaccinations,total_distributed,people_vaccinated,people_fully_vaccinated_per_hundred,total_vaccinations_per_hundred,people_fully_vaccinated,people_vaccinated_per_hundred,distributed_per_hundred,daily_vaccinations_raw,daily_vaccinations,daily_vaccinations_per_million,share_doses_used
0,2021-01-12,Alabama,78134.0,377025.0,70861.0,0.15,1.59,7270.0,1.45,7.69,,,,0.207
1,2021-01-13,Alabama,84040.0,378975.0,74792.0,0.19,1.71,9245.0,1.53,7.73,5906.0,5906.0,1205.0,0.222
2,2021-01-14,Alabama,92300.0,435350.0,80480.0,,1.88,,1.64,8.88,8260.0,7083.0,1445.0,0.212
3,2021-01-15,Alabama,100567.0,444650.0,86956.0,0.28,2.05,13488.0,1.77,9.07,8267.0,7478.0,1525.0,0.226
4,2021-01-16,Alabama,,,,,,,,,7557.0,7498.0,1529.0,


In [7]:
us_vacc.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4833 entries, 0 to 4832
Data columns (total 14 columns):
 #   Column                               Non-Null Count  Dtype         
---  ------                               --------------  -----         
 0   date                                 4833 non-null   datetime64[ns]
 1   location                             4833 non-null   object        
 2   total_vaccinations                   4563 non-null   float64       
 3   total_distributed                    4483 non-null   float64       
 4   people_vaccinated                    4471 non-null   float64       
 5   people_fully_vaccinated_per_hundred  4149 non-null   float64       
 6   total_vaccinations_per_hundred       4251 non-null   float64       
 7   people_fully_vaccinated              4389 non-null   float64       
 8   people_vaccinated_per_hundred        4226 non-null   float64       
 9   distributed_per_hundred              4238 non-null   float64       
 10  daily_vaccin

In [32]:
# Inspect list of unique location values
us_vacc['location'].unique()

array(['Alabama', 'Alaska', 'American Samoa', 'Arizona', 'Arkansas',
       'Bureau of Prisons', 'California', 'Colorado', 'Connecticut',
       'Delaware', 'Dept of Defense', 'District of Columbia',
       'Federated States of Micronesia', 'Florida', 'Georgia', 'Guam',
       'Hawaii', 'Idaho', 'Illinois', 'Indian Health Svc', 'Indiana',
       'Iowa', 'Kansas', 'Kentucky', 'Long Term Care', 'Louisiana',
       'Maine', 'Marshall Islands', 'Maryland', 'Massachusetts',
       'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana',
       'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico',
       'New York State', 'North Carolina', 'North Dakota',
       'Northern Mariana Islands', 'Ohio', 'Oklahoma', 'Oregon',
       'Pennsylvania', 'Puerto Rico', 'Republic of Palau', 'Rhode Island',
       'South Carolina', 'South Dakota', 'Tennessee', 'Texas',
       'United States', 'Utah', 'Vermont', 'Veterans Health',
       'Virgin Islands', 'Virginia', 'Washington', 'West V

Notice that there are more locations in `us_vacc` than in the GDP.
- Want to exclude those locations and focus on the states
- Notice that there's a "New York State" in `us_vacc`. Curious what that data entails

In [9]:
nys_total_vacc = us_vacc[us_vacc['location'] == 'New York State'][['date',
                                                                   'total_vaccinations',
                                                                   'total_distributed',
                                                                   'daily_vaccinations',
                                                                   'people_fully_vaccinated']]
nys_total_vacc.sort_values(['date'], inplace=True)
nys_total_vacc.fillna(0, inplace = True)
nys_total_vacc.head()

Unnamed: 0,date,total_vaccinations,total_distributed,daily_vaccinations,people_fully_vaccinated
2960,2021-01-12,579532.0,1622100.0,0.0,36422.0
2961,2021-01-13,632473.0,1796850.0,52941.0,44512.0
2962,2021-01-14,688576.0,1872625.0,54522.0,0.0
2963,2021-01-15,757466.0,1884325.0,59311.0,73523.0
2964,2021-01-16,0.0,0.0,59662.0,0.0


### Pfizer

In [10]:
pfizer = pd.read_csv('./Data/COVID-19_Vaccine_Distribution_Allocations_by_Jurisdiction_-_Pfizer.csv',
                     parse_dates=['Week of Allocations'])

In [11]:
pfizer.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1008 entries, 0 to 1007
Data columns (total 4 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   Jurisdiction          1008 non-null   object        
 1   Week of Allocations   1008 non-null   datetime64[ns]
 2   1st Dose Allocations  1008 non-null   int64         
 3   2nd Dose Allocations  1008 non-null   int64         
dtypes: datetime64[ns](1), int64(2), object(1)
memory usage: 31.6+ KB


In [12]:
pfizer.head()

Unnamed: 0,Jurisdiction,Week of Allocations,1st Dose Allocations,2nd Dose Allocations
0,Connecticut,2021-03-29,60840,60840
1,Maine,2021-03-29,23400,23400
2,Massachusetts,2021-03-29,117000,117000
3,New Hampshire,2021-03-29,23400,23400
4,Rhode Island,2021-03-29,18720,18720


In [64]:
# Check Jurisdiction data
pfizer['Jurisdiction'].unique()

array(['Connecticut', 'Maine', 'Massachusetts', 'New Hampshire',
       'Rhode Island', 'Vermont', 'New Jersey', 'New York',
       'New York City', 'Puerto Rico', 'U.S. Virgin Islands', 'Delaware',
       'District of Columbia', 'Maryland', 'Pennsylvania', 'Philadelphia',
       'Virginia', 'West Virginia', 'Alabama', 'Florida', 'Georgia',
       'Kentucky', 'Mississippi', 'North Carolina', 'South Carolina',
       'Tennessee', 'Chicago', 'Illinois', 'Indiana', 'Michigan',
       'Minnesota', 'Ohio', 'Wisconsin', 'Arkansas', 'Louisiana',
       'New Mexico', 'Oklahoma', 'Texas', 'Iowa', 'Kansas', 'Missouri',
       'Nebraska', 'Colorado', 'Montana', 'North Dakota', 'South Dakota',
       'Utah', 'Wyoming', 'Arizona', 'California', 'Hawaii', 'Nevada',
       'American Samoa', 'Guam', 'Marshall Islands', 'Micronesia',
       'Mariana Islands', 'Palau', 'Alaska', 'Idaho', 'Oregon',
       'Washington', 'Federal Entities'], dtype=object)

Just like `us_vacc`, there are other locations in `pfizer` that include all U.S. territory.
- New York is represented as "New York" and "New York City"
- Data is aggregated by week and reported on Mondays

### Moderna

In [15]:
moderna = pd.read_csv('./Data/COVID-19_Vaccine_Distribution_Allocations_by_Jurisdiction_-_Moderna.csv', 
                      parse_dates=['Week of Allocations'])

In [16]:
moderna.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 945 entries, 0 to 944
Data columns (total 4 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   Jurisdiction          945 non-null    object        
 1   Week of Allocations   945 non-null    datetime64[ns]
 2   1st Dose Allocations  945 non-null    int64         
 3   2nd Dose Allocations  945 non-null    int64         
dtypes: datetime64[ns](1), int64(2), object(1)
memory usage: 29.7+ KB


In [17]:
moderna['Jurisdiction'].unique()

array(['Connecticut', 'Maine', 'Massachusetts', 'New Hampshire',
       'Rhode Island', 'Vermont', 'New Jersey', 'New York',
       'New York City', 'Puerto Rico', 'U.S. Virgin Islands', 'Delaware',
       'District of Columbia', 'Maryland', 'Pennsylvania', 'Philadelphia',
       'Virginia', 'West Virginia', 'Alabama', 'Florida', 'Georgia',
       'Kentucky', 'Mississippi', 'North Carolina', 'South Carolina',
       'Tennessee', 'Chicago', 'Illinois', 'Indiana', 'Michigan',
       'Minnesota', 'Ohio', 'Wisconsin', 'Arkansas', 'Louisiana',
       'New Mexico', 'Oklahoma', 'Texas', 'Iowa', 'Kansas', 'Missouri',
       'Nebraska', 'Colorado', 'Montana', 'North Dakota', 'South Dakota',
       'Utah', 'Wyoming', 'Arizona', 'California', 'Hawaii', 'Nevada',
       'American Samoa', 'Guam', 'Marshall Islands', 'Micronesia',
       'Mariana Islands', 'Palau', 'Alaska', 'Idaho', 'Oregon',
       'Washington', 'Federal Entities'], dtype=object)

### J & J

In [19]:
jj = pd.read_csv('./Data/COVID-19_Vaccine_Distribution_Allocations_by_Jurisdiction_-_Janssen.csv',
                 parse_dates=['Week of Allocations'])

In [20]:
jj.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 252 entries, 0 to 251
Data columns (total 3 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   Jurisdiction          252 non-null    object        
 1   Week of Allocations   252 non-null    datetime64[ns]
 2   1st Dose Allocations  252 non-null    int64         
dtypes: datetime64[ns](1), int64(1), object(1)
memory usage: 6.0+ KB


In [21]:
# Print unique Jurisdictions
jj['Jurisdiction'].unique()

array(['Connecticut', 'Maine', 'Massachusetts', 'New Hampshire',
       'Rhode Island', 'Vermont', 'New Jersey', 'New York',
       'New York City', 'Puerto Rico', 'U.S. Virgin Islands', 'Delaware',
       'District of Columbia', 'Maryland', 'Pennsylvania', 'Philadelphia',
       'Virginia', 'West Virginia', 'Alabama', 'Florida', 'Georgia',
       'Kentucky', 'Mississippi', 'North Carolina', 'South Carolina',
       'Tennessee', 'Chicago', 'Illinois', 'Indiana', 'Michigan',
       'Minnesota', 'Ohio', 'Wisconsin', 'Arkansas', 'Louisiana',
       'New Mexico', 'Oklahoma', 'Texas', 'Iowa', 'Kansas', 'Missouri',
       'Nebraska', 'Colorado', 'Montana', 'North Dakota', 'South Dakota',
       'Utah', 'Wyoming', 'Arizona', 'California', 'Hawaii', 'Nevada',
       'American Samoa', 'Guam', 'Marshall Islands', 'Micronesia',
       'Mariana Islands', 'Palau', 'Alaska', 'Idaho', 'Oregon',
       'Washington', 'Federal Entities'], dtype=object)

### Location Check

First thing noticed is that the `location` data in `us_vacc` doesn't correspond 1-to-1 to the `Jurisdiction` data in the individual vaccine allocations

In [23]:
jurisdiction_combined = pfizer['Jurisdiction'].append([moderna['Jurisdiction'], jj['Jurisdiction']]).unique()

In [30]:
jurisdiction_combined

array(['Alabama', 'Alaska', 'American Samoa', 'Arizona', 'Arkansas',
       'California', 'Chicago', 'Colorado', 'Connecticut', 'Delaware',
       'District of Columbia', 'Federal Entities', 'Florida', 'Georgia',
       'Guam', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas',
       'Kentucky', 'Louisiana', 'Maine', 'Mariana Islands',
       'Marshall Islands', 'Maryland', 'Massachusetts', 'Michigan',
       'Micronesia', 'Minnesota', 'Mississippi', 'Missouri', 'Montana',
       'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico',
       'New York', 'New York City', 'North Carolina', 'North Dakota',
       'Ohio', 'Oklahoma', 'Oregon', 'Palau', 'Pennsylvania',
       'Philadelphia', 'Puerto Rico', 'Rhode Island', 'South Carolina',
       'South Dakota', 'Tennessee', 'Texas', 'U.S. Virgin Islands',
       'Utah', 'Vermont', 'Virginia', 'Washington', 'West Virginia',
       'Wisconsin', 'Wyoming'], dtype=object)

In [29]:
# Locations in us_vacc not in individual allocations
not_in_us_vacc = [i for i in us_vacc['location'].unique() if i not in jurisdiction_combined]
sorted(not_in_us_vacc)

['Bureau of Prisons',
 'Dept of Defense',
 'Federated States of Micronesia',
 'Indian Health Svc',
 'Long Term Care',
 'New York State',
 'Northern Mariana Islands',
 'Republic of Palau',
 'United States',
 'Veterans Health',
 'Virgin Islands']

- Curious about "United States"
- Need to verify "New York State" coincides with "New York" in individual allocation data

In [74]:
# Locations not in individual vaccine allocations
not_in_indiv = [i for i in jurisdiction_combined if i not in us_vacc['location'].unique()]
not_in_indiv

['New York',
 'New York City',
 'U.S. Virgin Islands',
 'Philadelphia',
 'Chicago',
 'Micronesia',
 'Mariana Islands',
 'Palau',
 'Federal Entities']

- New York split between "New York" and "New York City"
- Individual cities like "Philidelphia" and "Chicago"
- Curious about "Federal Entities" and if is comparable to "Bureau of Prisons", "Dept of Defense", and possible other columns from `us_vacc`

#### "United States" in `us_vacc`

In [33]:
us_vacc[us_vacc['location'] == 'United States']

Unnamed: 0,date,location,total_vaccinations,total_distributed,people_vaccinated,people_fully_vaccinated_per_hundred,total_vaccinations_per_hundred,people_fully_vaccinated,people_vaccinated_per_hundred,distributed_per_hundred,daily_vaccinations_raw,daily_vaccinations,daily_vaccinations_per_million,share_doses_used
4070,2020-12-20,United States,556208.0,,,,0.17,,,,,,,
4071,2020-12-21,United States,614117.0,,,,0.18,,,,5.790900e+04,57909.0,174.0,
4072,2020-12-22,United States,,,,,,,,,1.969540e+05,127432.0,384.0,
4073,2020-12-23,United States,1008025.0,,,,0.30,,,,1.969540e+05,150606.0,454.0,
4074,2020-12-24,United States,,,,,,,,,3.121867e+05,191001.0,575.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4162,2021-03-22,United States,126509736.0,156734555.0,82772416.0,13.53,38.11,44910946.0,24.93,47.21,2.028324e+06,2489697.0,7499.0,0.807
4163,2021-03-23,United States,128217029.0,164300795.0,83930495.0,13.72,38.62,45533962.0,25.28,49.49,1.707293e+06,2497025.0,7521.0,0.780
4164,2021-03-24,United States,130473853.0,169223125.0,85472166.0,13.97,39.30,46365515.0,25.74,50.97,2.256824e+06,2490889.0,7503.0,0.771
4165,2021-03-25,United States,133305295.0,173525335.0,87343622.0,14.28,40.15,47419832.0,26.31,52.27,2.831442e+06,2510755.0,7563.0,0.768


Appears to be cumulative vaccine data of the United States as a whole

#### New York
As stated above, New York appears to be labeled as "New York State" in the `us_vacc` dataframe where as there appears to be "New York" and "New York City" in the individual vaccine allocation data. I'd want to verify if "New York State" in the `us_vacc` dataframe is a combination of "New York" and "New York City" from the individual allocation data.

In [38]:
# Filter New York and New York City from Pfizer data
ny_pfizer = pfizer[(pfizer['Jurisdiction'] == 'New York') |
                  (pfizer['Jurisdiction'] == 'New York City')].sort_values(['Week of Allocations'])
ny_pfizer.head()

Unnamed: 0,Jurisdiction,Week of Allocations,1st Dose Allocations,2nd Dose Allocations
985,New York City,2020-12-14,73125,73125
984,New York,2020-12-14,96525,96525
922,New York City,2020-12-21,51675,51675
921,New York,2020-12-21,68250,68250
859,New York City,2020-12-28,66300,66300


In [41]:
ny_moderna = moderna[(moderna['Jurisdiction'] == 'New York') | 
                    (moderna['Jurisdiction'] == 'New York City')].sort_values(['Week of Allocations'])
ny_moderna.head()

Unnamed: 0,Jurisdiction,Week of Allocations,1st Dose Allocations,2nd Dose Allocations
922,New York City,2020-12-21,149400,149400
921,New York,2020-12-21,196800,196800
859,New York City,2020-12-28,51600,51600
858,New York,2020-12-28,68000,68000
796,New York City,2021-01-04,51400,51400


In [42]:
ny_jj = jj[(jj['Jurisdiction'] == 'New York') |
          (jj['Jurisdiction'] == "New York City")].sort_values(['Week of Allocations'])
ny_jj.head()

Unnamed: 0,Jurisdiction,Week of Allocations,1st Dose Allocations
196,New York,2021-03-01,93700
197,New York City,2021-03-01,71100
133,New York,2021-03-15,12900
134,New York City,2021-03-15,9800
70,New York,2021-03-22,12900


In [47]:
# Join NY Pfizer, Moderna, and JJ
pfizer_moderna = ny_pfizer.merge(ny_moderna,
                                 how = 'outer', 
                                 left_on = ['Jurisdiction', 'Week of Allocations'],
                                 right_on = ['Jurisdiction', 'Week of Allocations'], 
                                 suffixes = ['_pfizer', '_moderna'])

individual_joined = pfizer_moderna.merge(ny_jj,
                                         how = 'outer',
                                         left_on = ['Jurisdiction', 'Week of Allocations'],
                                         right_on = ['Jurisdiction', 'Week of Allocations'], 
                                         suffixes = ['', '_jj'])

In [50]:
# Fill NaN values with 0
individual_joined.fillna(0, inplace = True)

In [52]:
# Consolidate New York and New York City numbers by Week of Allocation
total_ny = individual_joined.groupby('Week of Allocations').sum()
total_ny

Unnamed: 0_level_0,1st Dose Allocations_pfizer,2nd Dose Allocations_pfizer,1st Dose Allocations_moderna,2nd Dose Allocations_moderna,1st Dose Allocations
Week of Allocations,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-12-14,169650,169650,0.0,0.0,0.0
2020-12-21,119925,119925,346200.0,346200.0,0.0
2020-12-28,154050,154050,119600.0,119600.0,0.0
2021-01-04,119925,119925,119100.0,119100.0,0.0
2021-01-11,119925,119925,119600.0,119600.0,0.0
2021-01-18,124800,124800,125600.0,125600.0,0.0
2021-01-25,124800,124800,125600.0,125600.0,0.0
2021-02-01,123825,123825,165700.0,165700.0,0.0
2021-02-08,124800,124800,181200.0,181200.0,0.0
2021-02-15,124800,124800,195800.0,195800.0,0.0


In [53]:
# Add column of 1st dose allocations
total_ny['1st_dose_sum'] = total_ny[['1st Dose Allocations_pfizer', '1st Dose Allocations_moderna', '1st Dose Allocations']].sum(axis = 1)

In [54]:
# Add column of 1st and 2nd dose allocations
total_ny['total_dose_sum'] = total_ny.loc[:,'1st Dose Allocations_pfizer':'1st Dose Allocations'].sum(axis = 1)

In [57]:
# Calculate cumulative sum of allocated vaccines in New York to compare to us_vacc
total_ny['1st_dose_sum_rolled'] = total_ny['1st_dose_sum'].cumsum()
total_ny['total_dose_sum_rolled'] = total_ny['total_dose_sum'].cumsum()

In [58]:
total_ny

Unnamed: 0_level_0,1st Dose Allocations_pfizer,2nd Dose Allocations_pfizer,1st Dose Allocations_moderna,2nd Dose Allocations_moderna,1st Dose Allocations,1st_dose_sum,total_dose_sum,1st_dose_sum_rolled,total_dose_sum_rolled
Week of Allocations,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-12-14,169650,169650,0.0,0.0,0.0,169650.0,339300.0,169650.0,339300.0
2020-12-21,119925,119925,346200.0,346200.0,0.0,466125.0,932250.0,635775.0,1271550.0
2020-12-28,154050,154050,119600.0,119600.0,0.0,273650.0,547300.0,909425.0,1818850.0
2021-01-04,119925,119925,119100.0,119100.0,0.0,239025.0,478050.0,1148450.0,2296900.0
2021-01-11,119925,119925,119600.0,119600.0,0.0,239525.0,479050.0,1387975.0,2775950.0
2021-01-18,124800,124800,125600.0,125600.0,0.0,250400.0,500800.0,1638375.0,3276750.0
2021-01-25,124800,124800,125600.0,125600.0,0.0,250400.0,500800.0,1888775.0,3777550.0
2021-02-01,123825,123825,165700.0,165700.0,0.0,289525.0,579050.0,2178300.0,4356600.0
2021-02-08,124800,124800,181200.0,181200.0,0.0,306000.0,612000.0,2484300.0,4968600.0
2021-02-15,124800,124800,195800.0,195800.0,0.0,320600.0,641200.0,2804900.0,5609800.0


In [62]:
nys_total_vacc.head(15)

Unnamed: 0_level_0,total_vaccinations,total_distributed,daily_vaccinations,people_fully_vaccinated
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-01-12,579532.0,1622100.0,0.0,36422.0
2021-01-13,632473.0,1796850.0,52941.0,44512.0
2021-01-14,688576.0,1872625.0,54522.0,0.0
2021-01-15,757466.0,1884325.0,59311.0,73523.0
2021-01-16,0.0,0.0,59662.0,0.0
2021-01-17,0.0,0.0,59872.0,0.0
2021-01-18,0.0,0.0,60012.0,0.0
2021-01-19,1000319.0,1884325.0,60112.0,102187.0
2021-01-20,1046490.0,2057875.0,59145.0,106821.0
2021-01-21,1116441.0,2213975.0,61124.0,120801.0


In [60]:
nys_total_vacc.set_index('date', inplace=True)

In [61]:
nys_total_vacc.resample('W-MON').max()

Unnamed: 0_level_0,total_vaccinations,total_distributed,daily_vaccinations,people_fully_vaccinated
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-01-18,757466.0,1884325.0,60012.0,73523.0
2021-01-25,1418313.0,2395950.0,69502.0,183915.0
2021-02-01,1927665.0,2932775.0,73438.0,344325.0
2021-02-08,2418074.0,3378300.0,73211.0,554262.0
2021-02-15,2925186.0,3969050.0,85676.0,833444.0
2021-02-22,3711664.0,4171175.0,104996.0,1205614.0
2021-03-01,4355544.0,5757205.0,94610.0,1473773.0
2021-03-08,5416301.0,7010045.0,151537.0,1752004.0
2021-03-15,6443462.0,8237955.0,178982.0,2106643.0
2021-03-22,7522949.0,9586045.0,154212.0,2469786.0


In [None]:
pfizer_state = widgets.Dropdown(description = 'State: ',
                                value = 'Alabama',
                                options = sorted(pfizer['Jurisdiction'].unique().tolist()))

pfizer_trace1 = go.Scatter(x = pfizer['Week of Allocations'],
                           y = pfizer['1st Dose Allocations'],
                           name = 'Pfizer 1st Dose Allocations')
pfizer_trace2 = go.Scatter(x = pfizer['Week of Allocations'],
                           y = pfizer['2nd Dose Allocations'],
                           name = 'Pfizer 2nd Dose Allocations')

pfizer_fig = go.FigureWidget(data=[pfizer_trace1, pfizer_trace2],
                             layout = go.Layout(title=dict(text='Pfizer Dose Allocations')))

In [None]:
def validate():
    if pfizer_state.value in pfizer['Jurisdiction'].unique():
        return True
    else:
        return False
    
def response(change):
    if validate():
        temp_pfizer_df = pfizer[pfizer['Jurisdiction'] == pfizer_state.value]
    
    x = temp_pfizer_df['Week of Allocations']
    y1 = temp_pfizer_df['1st Dose Allocations']
    y2 = temp_pfizer_df['2nd Dose Allocations']
    
    with pfizer_fig.batch_update():
        pfizer_fig.data[0].x = x
        pfizer_fig.data[1].x = x
        pfizer_fig.data[0].y = y1
        pfizer_fig.data[1].y = y2

pfizer_state.observe(response, names='value')

In [None]:
pfizer_container = widgets.HBox([pfizer_state])
widgets.VBox([pfizer_container,
             pfizer_fig])

In [None]:
moderna_state = widgets.Dropdown(description = 'State: ',
                         value = 'Alabama',
                         options = sorted(moderna['Jurisdiction'].unique().tolist()))

moderna_trace1 = go.Scatter(x = moderna['Week of Allocations'],
                            y = moderna['1st Dose Allocations'],
                            name = 'Moderna 1st Dose Allocations')
moderna_trace2 = go.Scatter(x = moderna['Week of Allocations'],
                            y = moderna['2nd Dose Allocations'],
                            name = 'Moderna 2nd Dose Allocations')

moderna_fig = go.FigureWidget(data=[moderna_trace1, moderna_trace2],
                              layout = go.Layout(title=dict(text='Moderna Dose Allocations')))

In [None]:
def validate():
    if moderna_state.value in moderna['Jurisdiction'].unique():
        return True
    else:
        return False
    
def response(change):
    if validate():
        temp_moderna_df = moderna[moderna['Jurisdiction'] == moderna_state.value]
    
    x = temp_moderna_df['Week of Allocations']
    y1 = temp_moderna_df['1st Dose Allocations']
    y2 = temp_moderna_df['2nd Dose Allocations']
    
    with moderna_fig.batch_update():
        moderna_fig.data[0].x = x
        moderna_fig.data[1].x = x
        moderna_fig.data[0].y = y1
        moderna_fig.data[1].y = y2

moderna_state.observe(response, names='value')

In [None]:
moderna_container = widgets.HBox([moderna_state])
widgets.VBox([moderna_container,
             moderna_fig])

In [None]:
jj_state = widgets.Dropdown(description = 'State: ',
                            value = 'Alabama',
                            options = sorted(jj['Jurisdiction'].unique().tolist()))

jj_trace = go.Scatter(x = jj['Week of Allocations'],
                      y = jj['1st Dose Allocations'],
                      name = 'Janssen 1st Dose Allocations')

jj_fig = go.FigureWidget(data=[jj_trace],
                         layout = go.Layout(title=dict(text='Janssen Dose Allocations')))

In [None]:
def validate():
    if jj_state.value in jj['Jurisdiction'].unique():
        return True
    else:
        return False
    
def response(change):
    if validate():
        temp_jj_df = jj[jj['Jurisdiction'] == state.value]
    
    x = temp_jj_df['Week of Allocations']
    y = temp_jj_f['1st Dose Allocations']
    
    with jj_fig.batch_update():
        jj_fig.data[0].x = x
        jj_fig.data[0].y = y

jj_state.observe(response, names='value')

In [None]:
jj_container = widgets.HBox([jj_state])
widgets.VBox([jj_container,
             jj_fig])

In [None]:
vacc_state = widgets.Dropdown(description = 'State: ',
                         value = 'Alabama',
                         options = sorted(us_vacc['location'].unique().tolist()))

vacc_trace = go.Scatter(x = us_vacc['date'],
                        y = us_vacc['total_vaccinations'])

vacc_fig = go.FigureWidget(data=[vacc_trace],
                     layout = go.Layout(title=dict(text='Total Vaccinations')))

In [None]:
def validate():
    if vacc_state.value in us_vacc['location'].unique():
        return True
    else:
        return False
    
def response(change):
    if validate():
        temp_vacc_df = us_vacc[us_vacc['location'] == vacc_state.value]
    
    x = temp_vacc_df['date']
    y = temp_vacc_df['total_vaccinations']
    
    with vacc_fig.batch_update():
        vacc_fig.data[0].x = x
        vacc_fig.data[0].y = y

vacc_state.observe(response, names='value')

In [None]:
vacc_container = widgets.HBox([vacc_state])
widgets.VBox([vacc_container,
             vacc_fig])

In [None]:
us_vacc_cats = ['date','location', 'total_vaccinations_per_hundred','people_vaccinated_per_hundred', 'distributed_per_hundred']
state_gdp_cats = ['State', 'gdpPerCapita']

In [None]:
joined = pd.merge(state_gdp[state_gdp_cats],
                  us_vacc[us_vacc_cats],
                  how = 'left',
                  left_on = 'State',
                  right_on = 'location')

In [None]:
joined.head()

In [None]:
len(joined)

### Trends in U.S. Vaccinations

In [None]:
# First 3 rows are header
us_trends = pd.read_csv('./Data/trends_in_number_of_covid19_vaccinations_in_the_us.csv', skiprows = 3, parse_dates=['Date'])

In [None]:
us_trends.head(10)

In [None]:
us_trends.info()

In [None]:
us_trends[(us_trends['Program'] == 'US') & (us_trends['Date Type'] == 'Admin')]

In [None]:
us_trends.groupby(['Date Type','Program']).count()