In [20]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import plotly
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

### State GDPs per Capita

- gdp: Measured in millions of dollars

In [2]:
state_gdp = pd.read_csv('./Data/state_gdp.csv')

In [3]:
state_gdp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   State         50 non-null     object 
 1   gdpPerCapita  50 non-null     float64
 2   gdp           50 non-null     int64  
dtypes: float64(1), int64(1), object(1)
memory usage: 1.3+ KB


In [4]:
state_gdp.sort_values(['gdpPerCapita', 'State'], ascending=False)

Unnamed: 0,State,gdpPerCapita,gdp
0,New York,0.0882,1701399
1,Massachusetts,0.0833,575635
2,Connecticut,0.0787,279653
3,Delaware,0.0773,76537
4,California,0.0762,3018337
5,Alaska,0.0757,54851
6,Washington,0.074,576624
7,North Dakota,0.0723,55657
8,New Jersey,0.0715,634721
9,Illinois,0.07,879947


### U.S. Vaccinations

In [5]:
# Read in us_state_vaccinations
us_vacc = pd.read_csv('./Data/us_state_vaccinations.csv', parse_dates=['date'])

- location: name of the state or federal entity.
- date: date of the observation.
- total_vaccinations: total number of doses administered. This is counted as a single dose, and may not equal the total number of people vaccinated, depending on the specific dose regime (e.g. people receive multiple doses). If a person receives one dose of the vaccine, this metric goes up by 1. If they receive a second dose, it goes up by 1 again.
- total_vaccinations_per_hundred: total_vaccinations per 100 people in the total population of the state.
- daily_vaccinations_raw: daily change in the total number of doses administered. It is only calculated for consecutive days. This is a raw measure provided for data checks and transparency, but we strongly recommend that any analysis on daily vaccination rates be conducted using daily_vaccinations instead.
- daily_vaccinations: new doses administered per day (7-day smoothed). For countries that don't report data on a daily basis, we assume that doses changed equally on a daily basis over any periods in which no data was reported. This produces a complete series of daily figures, which is then averaged over a rolling 7-day window. An example of how we perform this calculation can be found here.
- daily_vaccinations_per_million: daily_vaccinations per 1,000,000 people in the total population of the state.
- people_vaccinated: total number of people who received at least one vaccine dose. If a person receives the first dose of a 2-dose vaccine, this metric goes up by 1. If they receive the second dose, the metric stays the same.
- people_vaccinated_per_hundred: people_vaccinated per 100 people in the total population of the state.
- people_fully_vaccinated: total number of people who received all doses prescribed by the vaccination protocol. If a person receives the first dose of a 2-dose vaccine, this metric stays the same. If they receive the second dose, the metric goes up by 1.
- people_fully_vaccinated_per_hundred: people_fully_vaccinated per 100 people in the total population of the state.
- total_distributed: cumulative counts of COVID-19 vaccine doses recorded as shipped in CDC's Vaccine Tracking System.
- total_distributed_per_hundred: cumulative counts of COVID-19 vaccine doses recorded as shipped in CDC's Vaccine Tracking System per 100 people in the total population of the state.
- share_doses_used: share of vaccination doses administered among those recorded as shipped in CDC's Vaccine Tracking System.

In [6]:
us_vacc.head()

Unnamed: 0,date,location,total_vaccinations,total_distributed,people_vaccinated,people_fully_vaccinated_per_hundred,total_vaccinations_per_hundred,people_fully_vaccinated,people_vaccinated_per_hundred,distributed_per_hundred,daily_vaccinations_raw,daily_vaccinations,daily_vaccinations_per_million,share_doses_used
0,2021-01-12,Alabama,78134.0,377025.0,70861.0,0.15,1.59,7270.0,1.45,7.69,,,,0.207
1,2021-01-13,Alabama,84040.0,378975.0,74792.0,0.19,1.71,9245.0,1.53,7.73,5906.0,5906.0,1205.0,0.222
2,2021-01-14,Alabama,92300.0,435350.0,80480.0,,1.88,,1.64,8.88,8260.0,7083.0,1445.0,0.212
3,2021-01-15,Alabama,100567.0,444650.0,86956.0,0.28,2.05,13488.0,1.77,9.07,8267.0,7478.0,1525.0,0.226
4,2021-01-16,Alabama,,,,,,,,,7557.0,7498.0,1529.0,


In [7]:
us_vacc.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5223 entries, 0 to 5222
Data columns (total 14 columns):
 #   Column                               Non-Null Count  Dtype         
---  ------                               --------------  -----         
 0   date                                 5223 non-null   datetime64[ns]
 1   location                             5223 non-null   object        
 2   total_vaccinations                   4953 non-null   float64       
 3   total_distributed                    4867 non-null   float64       
 4   people_vaccinated                    4855 non-null   float64       
 5   people_fully_vaccinated_per_hundred  4515 non-null   float64       
 6   total_vaccinations_per_hundred       4617 non-null   float64       
 7   people_fully_vaccinated              4773 non-null   float64       
 8   people_vaccinated_per_hundred        4592 non-null   float64       
 9   distributed_per_hundred              4604 non-null   float64       
 10  daily_vaccin

In [8]:
# Inspect list of unique location values
us_vacc['location'].unique()

array(['Alabama', 'Alaska', 'American Samoa', 'Arizona', 'Arkansas',
       'Bureau of Prisons', 'California', 'Colorado', 'Connecticut',
       'Delaware', 'Dept of Defense', 'District of Columbia',
       'Federated States of Micronesia', 'Florida', 'Georgia', 'Guam',
       'Hawaii', 'Idaho', 'Illinois', 'Indian Health Svc', 'Indiana',
       'Iowa', 'Kansas', 'Kentucky', 'Long Term Care', 'Louisiana',
       'Maine', 'Marshall Islands', 'Maryland', 'Massachusetts',
       'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana',
       'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico',
       'New York State', 'North Carolina', 'North Dakota',
       'Northern Mariana Islands', 'Ohio', 'Oklahoma', 'Oregon',
       'Pennsylvania', 'Puerto Rico', 'Republic of Palau', 'Rhode Island',
       'South Carolina', 'South Dakota', 'Tennessee', 'Texas',
       'United States', 'Utah', 'Vermont', 'Veterans Health',
       'Virgin Islands', 'Virginia', 'Washington', 'West V

Notice that there are NaN values (investigate). Notice that there are more locations in `us_vacc` than in the GDP. Which locations to use?
- Want to exclude those locations and focus on the states
- Notice that there's a "New York State" in `us_vacc`. Curious what that data entails

In [9]:
# Sort values by date
us_vacc.sort_values('date', inplace = True)

In [10]:
# Replace 'New York State' with 'New York'
us_vacc.replace(to_replace = 'New York State', 
                value = 'New York', 
                inplace=True)

In [11]:
# Combine GDP data with us_vacc
gdp_vacc = state_gdp.merge(us_vacc,
                           how = 'left',
                           left_on = 'State',
                           right_on = 'location')

In [12]:
# Verify number of locations is the same as number of states (50)
len(gdp_vacc['location'].unique())

50

In [13]:
# Turn date column into strings for plotly
gdp_vacc['date'] = gdp_vacc['date'].astype(str)

In [14]:
# Forward fill NaN values
gdp_vacc = gdp_vacc.groupby('State').ffill().bfill()

In [15]:
state_abbr = us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'American Samoa': 'AS',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Guam': 'GU',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}

In [16]:
# Replace State values with state abbreviations
gdp_vacc['state_abbr'] = gdp_vacc['location'].replace(to_replace=state_abbr)

In [27]:
# Plot heat map of vaccines distributed_per_hundred by state
dist_per_hund_hm = px.choropleth(gdp_vacc,
                    locations = 'state_abbr',
                    featureidkey = 'location',
                    locationmode = 'USA-states',
                    color = 'distributed_per_hundred',
                    color_continuous_scale = 'Reds',
                    range_color = [0, 100],
                    animation_frame = 'date',
                    title = 'Distributions per Hundred Heatmap')

dist_per_hund_hm.update_layout(geo_scope = 'usa')
dist_per_hund_hm['layout'].pop('updatemenus')

plotly.offline.iplot(dist_per_hund_hm)

In [31]:
dist_per_hund_gdp = px.scatter(gdp_vacc,
                               x = 'distributed_per_hundred',
                               y = 'gdp',
                               log_x = True,
                               log_y = True,
                               animation_frame = 'date',
                               hover_name = 'location',
                               size_max = 50,
                               range_x = [5, 100],
                               title = 'Total Vaccines Distributed by GDP per Capita')

dist_per_hund_gdp['layout'].pop('updatemenus')

plotly.offline.iplot(dist_per_hund_gdp)

In [34]:
# Plot heat map of vaccines people_vaccinated_per_hundred by state
fully_vacc_per_hund_hm = px.choropleth(gdp_vacc,
                    locations = 'state_abbr',
                    featureidkey = 'location',
                    locationmode = 'USA-states',
                    color = 'people_fully_vaccinated_per_hundred',
                    color_continuous_scale = 'Blues',
                    range_color = [0, 50],
                    animation_frame = 'date',
                   title = 'People Fully Vaccinated per Hundred Heatmap')

fully_vacc_per_hund_hm.update_layout(geo_scope = 'usa')

fully_vacc_per_hund_hm['layout'].pop('updatemenus')

plotly.offline.iplot(fig)

In [None]:
# combine scatter plots with map plots

In [21]:
fig = px.scatter(gdp_vacc,
                 x = 'total_vaccinations_per_hundred',
                 y = 'gdpPerCapita',
                 animation_frame = 'date',
                 hover_name = 'location',
                 log_x = False,
                 size_max = 50,
                 range_x = [1, 100],
                 title = 'People Vaccinated by State by GDP per Capita')

fig['layout'].pop('updatemenus')

fig.show()

In [None]:
fig = px.scatter(gdp_vacc,
                 x = 'daily_vaccinations_per_million',
                 y = 'gdpPerCapita',
                 animation_frame = 'date',
                 hover_name = 'location',
                 log_x = True,
                 size_max = 50,
                 range_x = [500, 20000],
                 title = 'People Vaccinated by State by GDP per Capita')

fig['layout'].pop('updatemenus')

fig.show()

### Pfizer

In [None]:
pfizer = pd.read_csv('./Data/COVID-19_Vaccine_Distribution_Allocations_by_Jurisdiction_-_Pfizer.csv',
                     parse_dates=['Week of Allocations'])

In [None]:
pfizer.sort_values(['Week of Allocations', 'Jurisdiction'], inplace = True)

In [None]:
pfizer.info()

In [None]:
pfizer.head()

In [None]:
# Check Jurisdiction data
pfizer['Jurisdiction'].unique()

Just like `us_vacc`, there are other locations in `pfizer` that include all U.S. territory.
- New York is represented as "New York" and "New York City"
- Data is aggregated by week and reported on Mondays

### Moderna

In [None]:
moderna = pd.read_csv('./Data/COVID-19_Vaccine_Distribution_Allocations_by_Jurisdiction_-_Moderna.csv', 
                      parse_dates=['Week of Allocations'])

In [None]:
moderna.sort_values(['Week of Allocations', 'Jurisdiction'], inplace = True)

In [None]:
moderna.info()

In [None]:
moderna['Jurisdiction'].unique()

### J & J

In [None]:
jj = pd.read_csv('./Data/COVID-19_Vaccine_Distribution_Allocations_by_Jurisdiction_-_Janssen.csv',
                 parse_dates=['Week of Allocations'])

In [None]:
jj.sort_values(['Week of Allocations', 'Jurisdiction'], inplace = True)

In [None]:
jj.info()

In [None]:
# Print unique Jurisdictions
jj['Jurisdiction'].unique()

### Location Check

First thing noticed is that the `location` data in `us_vacc` doesn't correspond 1-to-1 to the `Jurisdiction` data in the individual vaccine allocations

In [None]:
jurisdiction_combined = pfizer['Jurisdiction'].append([moderna['Jurisdiction'], jj['Jurisdiction']]).unique()

In [None]:
jurisdiction_combined

In [None]:
# Locations in us_vacc not in individual allocations
not_in_us_vacc = [i for i in us_vacc['location'].unique() if i not in jurisdiction_combined]
sorted(not_in_us_vacc)

- Curious about "United States"
- Need to verify "New York State" coincides with "New York" in individual allocation data

In [None]:
# Locations not in individual vaccine allocations
not_in_indiv = [i for i in jurisdiction_combined if i not in us_vacc['location'].unique()]
not_in_indiv

- New York split between "New York" and "New York City"
- Individual cities like "Philidelphia" and "Chicago"
- Curious about "Federal Entities" and if is comparable to "Bureau of Prisons", "Dept of Defense", and possible other columns from `us_vacc`

#### "United States" in `us_vacc`

In [None]:
us_vacc[us_vacc['location'] == 'United States']

Appears to be cumulative vaccine data of the United States as a whole

#### California

Want to compare numbers of a state other than New York, Pennsylvania, or Illinois due to values of Chicago, New York City and Philidelphia.

In [None]:
# Join Pfizer, Moderna, and JJ data
pfizer_moderna = pfizer.merge(moderna,
                              how = 'outer', 
                              left_on = ['Jurisdiction', 'Week of Allocations'],
                              right_on = ['Jurisdiction', 'Week of Allocations'], 
                              suffixes = ['_pfizer', '_moderna'])

individual_joined = pfizer_moderna.merge(jj,
                                         how = 'outer',
                                         left_on = ['Jurisdiction', 'Week of Allocations'],
                                         right_on = ['Jurisdiction', 'Week of Allocations'], 
                                         suffixes = ['', '_jj'])

In [None]:
# Fill NaN values with 0
individual_joined.fillna(0, inplace = True)

# Sort values  by Week of Allocations
individual_joined.sort_values(['Week of Allocations'], inplace=True)

In [None]:
# Add column for sum of 1st dose allocations
individual_joined['1st_dose_sum'] = individual_joined[['1st Dose Allocations_pfizer',
                                                       '1st Dose Allocations_moderna',
                                                       '1st Dose Allocations']].sum(axis = 1)

In [None]:
# Add column for sum of 1st and 2nd dose allocations
individual_joined['total_dose_sum'] = individual_joined.loc[:,'1st Dose Allocations_pfizer':'1st Dose Allocations'].sum(axis = 1)

In [None]:
# Groupby cumulative sum by location and date

In [None]:
cal_indiv = individual_joined[individual_joined['Jurisdiction'] == 'California']

In [None]:
# Calculate cumulative sum of allocated vaccines to compare to us_vacc
cal_indiv[['1st_dose_sum_rolled', 'total_dose_sum_rolled']] = cal_indiv[['1st_dose_sum', 'total_dose_sum']].cumsum()

In [None]:
cal_indiv[['Week of Allocations', '1st_dose_sum_rolled', 'total_dose_sum_rolled']]

In [None]:
# Filter California only from us_vacc
cal_vacc = us_vacc[us_vacc['location'] == 'California'].set_index('date')

In [None]:
cal_vacc.resample('W-MON').max()

Appears `total_dose_sum_rolled` is most representative of `total_distributed` from `us_vacc`.
- `total_dose_sum_rolled` > `total_distributed`
    - `total_dose_sum_rolled`: Amount a jurisdiction can order from as stated from data documentation
    - `total_distributed`: Final amount distributed to a location

#### New York
As stated above, New York appears to be labeled as "New York State" in the `us_vacc` dataframe where as there appears to be "New York" and "New York City" in the individual vaccine allocation data. I'd want to verify if "New York State" in the `us_vacc` dataframe is a combination of "New York" and "New York City" from the individual allocation data.

In [None]:
# Filter out New York from us_vacc
nys_vacc = us_vacc[us_vacc['location'] == 'New York State']

# Set index to the date
nys_vacc.set_index('date', inplace=True)

# Resample the NY data for weekly starting on Monday
nys_vacc = nys_vacc.resample('W-MON').max()

In [None]:
# Filter New York from individual vaccine data
ny_indiv = individual_joined[(individual_joined['Jurisdiction'] == 'New York')].sort_values(['Week of Allocations'])

In [None]:
# Filter New York and New York City from individual vaccine data
ny_nyc_indiv = individual_joined[(individual_joined['Jurisdiction'] == 'New York') |
                              (individual_joined['Jurisdiction'] == 'New York City')].sort_values(['Week of Allocations'])

In [None]:
# Consolidate New York and New York City numbers by Week of Allocation
ny_nyc_indiv = individual_joined.groupby('Week of Allocations').sum()

In [None]:
# Calculate cumulative sum of allocated vaccines to compare to us_vacc
ny_indiv[['1st_dose_sum_rolled', 'total_dose_sum_rolled']] = ny_indiv[['1st_dose_sum', 'total_dose_sum']].cumsum()
ny_nyc_indiv[['1st_dose_sum_rolled', 'total_dose_sum_rolled']] = ny_nyc_indiv[['1st_dose_sum', 'total_dose_sum']].cumsum()

In [None]:
ny_indiv[['Week of Allocations', 'total_dose_sum_rolled']]

In [None]:
ny_nyc_indiv['total_dose_sum_rolled']

In [None]:
nys_vacc['total_distributed']

`New York State` appears to encapsulate New York City which means it can be disregarded in the individual vaccine data. I will assume Chicago and Philidelphia will both be considered by their own individual state numbers.

In [None]:
pfizer_state = widgets.Dropdown(description = 'State: ',
                                value = 'Alabama',
                                options = sorted(pfizer['Jurisdiction'].unique().tolist()))

pfizer_trace1 = go.Scatter(x = pfizer['Week of Allocations'],
                           y = pfizer['1st Dose Allocations'],
                           name = 'Pfizer 1st Dose Allocations')
pfizer_trace2 = go.Scatter(x = pfizer['Week of Allocations'],
                           y = pfizer['2nd Dose Allocations'],
                           name = 'Pfizer 2nd Dose Allocations')

pfizer_fig = go.FigureWidget(data=[pfizer_trace1, pfizer_trace2],
                             layout = go.Layout(title=dict(text='Pfizer Dose Allocations')))

In [None]:
def validate():
    if pfizer_state.value in pfizer['Jurisdiction'].unique():
        return True
    else:
        return False
    
def response(change):
    if validate():
        temp_pfizer_df = pfizer[pfizer['Jurisdiction'] == pfizer_state.value]
    
    x = temp_pfizer_df['Week of Allocations']
    y1 = temp_pfizer_df['1st Dose Allocations']
    y2 = temp_pfizer_df['2nd Dose Allocations']
    
    with pfizer_fig.batch_update():
        pfizer_fig.data[0].x = x
        pfizer_fig.data[1].x = x
        pfizer_fig.data[0].y = y1
        pfizer_fig.data[1].y = y2

pfizer_state.observe(response, names='value')

In [None]:
pfizer_container = widgets.HBox([pfizer_state])
widgets.VBox([pfizer_container,
             pfizer_fig])

In [None]:
moderna_state = widgets.Dropdown(description = 'State: ',
                         value = 'Alabama',
                         options = sorted(moderna['Jurisdiction'].unique().tolist()))

moderna_trace1 = go.Scatter(x = moderna['Week of Allocations'],
                            y = moderna['1st Dose Allocations'],
                            name = 'Moderna 1st Dose Allocations')
moderna_trace2 = go.Scatter(x = moderna['Week of Allocations'],
                            y = moderna['2nd Dose Allocations'],
                            name = 'Moderna 2nd Dose Allocations')

moderna_fig = go.FigureWidget(data=[moderna_trace1, moderna_trace2],
                              layout = go.Layout(title=dict(text='Moderna Dose Allocations')))

In [None]:
def validate():
    if moderna_state.value in moderna['Jurisdiction'].unique():
        return True
    else:
        return False
    
def response(change):
    if validate():
        temp_moderna_df = moderna[moderna['Jurisdiction'] == moderna_state.value]
    
    x = temp_moderna_df['Week of Allocations']
    y1 = temp_moderna_df['1st Dose Allocations']
    y2 = temp_moderna_df['2nd Dose Allocations']
    
    with moderna_fig.batch_update():
        moderna_fig.data[0].x = x
        moderna_fig.data[1].x = x
        moderna_fig.data[0].y = y1
        moderna_fig.data[1].y = y2

moderna_state.observe(response, names='value')

In [None]:
moderna_container = widgets.HBox([moderna_state])
widgets.VBox([moderna_container,
             moderna_fig])

In [None]:
jj_state = widgets.Dropdown(description = 'State: ',
                            value = 'Alabama',
                            options = sorted(jj['Jurisdiction'].unique().tolist()))

jj_trace = go.Scatter(x = jj['Week of Allocations'],
                      y = jj['1st Dose Allocations'],
                      name = 'Janssen 1st Dose Allocations')

jj_fig = go.FigureWidget(data=[jj_trace],
                         layout = go.Layout(title=dict(text='Janssen Dose Allocations')))

In [None]:
def validate():
    if jj_state.value in jj['Jurisdiction'].unique():
        return True
    else:
        return False
    
def response(change):
    if validate():
        temp_jj_df = jj[jj['Jurisdiction'] == state.value]
    
    x = temp_jj_df['Week of Allocations']
    y = temp_jj_f['1st Dose Allocations']
    
    with jj_fig.batch_update():
        jj_fig.data[0].x = x
        jj_fig.data[0].y = y

jj_state.observe(response, names='value')

In [None]:
jj_container = widgets.HBox([jj_state])
widgets.VBox([jj_container,
             jj_fig])

In [None]:
vacc_state = widgets.Dropdown(description = 'State: ',
                         value = 'Alabama',
                         options = sorted(us_vacc['location'].unique().tolist()))

vacc_trace = go.Scatter(x = us_vacc['date'],
                        y = us_vacc['total_vaccinations'])

vacc_fig = go.FigureWidget(data=[vacc_trace],
                     layout = go.Layout(title=dict(text='Total Vaccinations')))

In [None]:
def validate():
    if vacc_state.value in us_vacc['location'].unique():
        return True
    else:
        return False
    
def response(change):
    if validate():
        temp_vacc_df = us_vacc[us_vacc['location'] == vacc_state.value]
    
    x = temp_vacc_df['date']
    y = temp_vacc_df['total_vaccinations']
    
    with vacc_fig.batch_update():
        vacc_fig.data[0].x = x
        vacc_fig.data[0].y = y

vacc_state.observe(response, names='value')

In [None]:
vacc_container = widgets.HBox([vacc_state])
widgets.VBox([vacc_container,
             vacc_fig])

In [None]:
us_vacc_cats = ['date','location', 'total_vaccinations_per_hundred','people_vaccinated_per_hundred', 'distributed_per_hundred']
state_gdp_cats = ['State', 'gdpPerCapita']

In [None]:
joined = pd.merge(state_gdp[state_gdp_cats],
                  us_vacc[us_vacc_cats],
                  how = 'left',
                  left_on = 'State',
                  right_on = 'location')

In [None]:
joined.head()

In [None]:
len(joined)

### Trends in U.S. Vaccinations

In [None]:
# First 3 rows are header
us_trends = pd.read_csv('./Data/trends_in_number_of_covid19_vaccinations_in_the_us.csv', skiprows = 3, parse_dates=['Date'])

In [None]:
us_trends.head(10)

In [None]:
us_trends.info()

In [None]:
us_trends[(us_trends['Program'] == 'US') & (us_trends['Date Type'] == 'Admin')]

In [None]:
us_trends.groupby(['Date Type','Program']).count()