# Vaccine Hesitancy for COVID-19: County and local estimates

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt
import altair_latimes as lat
import glob
import os

In [3]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

### Get the data from the CDC portal

In [4]:
# https://data.cdc.gov/Vaccinations/Vaccine-Hesitancy-for-COVID-19-County-and-local-es/q9mh-h2tw

In [5]:
url = "https://data.cdc.gov/api/views/q9mh-h2tw/rows.csv?accessType=DOWNLOAD"

In [6]:
df = pd.read_csv(url)

### Clean up the headers

In [7]:
df.columns = (
    df.columns.str.strip()
    .str.lower()
    .str.replace(" ", "_", regex=False)
    .str.replace(":", "", regex=False)
    .str.replace("/", "_", regex=False)
    .str.replace(",", "_", regex=False)
)

In [8]:
df.drop(
    ["county_boundary", "state_boundary", "geographical_point"], axis=1, inplace=True
)

In [9]:
df["fips_code"] = df["fips_code"].astype(str).str.zfill(5)

In [10]:
df.head()

Unnamed: 0,fips_code,county_name,state,estimated_hesitant,estimated_strongly_hesitant,social_vulnerability_index_(svi),svi_category,cvac_level_of_concern_for_vaccination_rollout,cvac_level_of_concern,percent_adults_fully_vaccinated_against_covid-19,percent_hispanic,percent_non-hispanic_american_indian_alaska_native,percent_non-hispanic_asian,percent_non-hispanic_black,percent_non-hispanic_native_hawaiian_pacific_islander,percent_non-hispanic_white,state_code
0,1131,"Wilcox County, Alabama",ALABAMA,0.23,0.11,0.93,Very High Concern,0.94,Very High Concern,0.228,0.0053,0.0009,0.0003,0.6938,0.0,0.2684,AL
1,1129,"Washington County, Alabama",ALABAMA,0.23,0.11,0.73,High Concern,0.82,Very High Concern,0.192,0.0146,0.0731,0.0025,0.2354,0.0,0.6495,AL
2,1133,"Winston County, Alabama",ALABAMA,0.22,0.11,0.7,High Concern,0.8,High Concern,0.085,0.0315,0.0034,0.0016,0.0073,0.0005,0.937,AL
3,1127,"Walker County, Alabama",ALABAMA,0.23,0.11,0.75,High Concern,0.68,High Concern,0.158,0.0249,0.0015,0.0049,0.0617,0.0,0.8895,AL
4,2013,"Aleutians East Borough, Alaska",ALASKA,0.26,0.12,0.58,Moderate Concern,0.87,Very High Concern,0.195,0.0901,0.4588,0.1968,0.0322,0.01,0.1321,AK


### Merge with election results

In [11]:
df_election = pd.read_json(
    "../elections/data/election_results_2020.json", dtype={"county_fips": str}
)

In [12]:
df_election.head()

Unnamed: 0,state_name,county_fips,county_name,votes_gop,votes_dem,total_votes,diff,per_gop,per_dem,per_point_diff,winner
0,Alabama,1001,Autauga County,19838,7503,27770,12335,0.714368,0.270184,0.444184,Trump
1,Alabama,1003,Baldwin County,83544,24578,109679,58966,0.761714,0.22409,0.537623,Trump
2,Alabama,1005,Barbour County,5622,4816,10518,806,0.534512,0.457882,0.076631,Trump
3,Alabama,1007,Bibb County,7525,1986,9595,5539,0.784263,0.206983,0.57728,Trump
4,Alabama,1009,Blount County,24711,2640,27588,22071,0.895716,0.095694,0.800022,Trump


In [13]:
merge = pd.merge(
    df,
    df_election,
    left_on="fips_code",
    right_on="county_fips",
    how="left",
).drop(["county_name_y"], axis=1)

In [14]:
merge.dtypes

fips_code                                                 object
county_name_x                                             object
state                                                     object
estimated_hesitant                                       float64
estimated_strongly_hesitant                              float64
social_vulnerability_index_(svi)                         float64
svi_category                                              object
cvac_level_of_concern_for_vaccination_rollout            float64
cvac_level_of_concern                                     object
percent_adults_fully_vaccinated_against_covid-19         float64
percent_hispanic                                         float64
percent_non-hispanic_american_indian_alaska_native       float64
percent_non-hispanic_asian                               float64
percent_non-hispanic_black                               float64
percent_non-hispanic_native_hawaiian_pacific_islander    float64
percent_non-hispanic_whit

In [15]:
merge.rename(columns={"county_name_x": "county_name"}, inplace=True)

In [16]:
merge[["name", "state_name"]] = merge["county_name"].str.split(", ", expand=True)

In [17]:
merge["estimated_hesitant_pct"] = round(merge["estimated_hesitant"] * 100, 2)

In [18]:
merge.head()

Unnamed: 0,fips_code,county_name,state,estimated_hesitant,estimated_strongly_hesitant,social_vulnerability_index_(svi),svi_category,cvac_level_of_concern_for_vaccination_rollout,cvac_level_of_concern,percent_adults_fully_vaccinated_against_covid-19,percent_hispanic,percent_non-hispanic_american_indian_alaska_native,percent_non-hispanic_asian,percent_non-hispanic_black,percent_non-hispanic_native_hawaiian_pacific_islander,percent_non-hispanic_white,state_code,state_name,county_fips,votes_gop,votes_dem,total_votes,diff,per_gop,per_dem,per_point_diff,winner,name,estimated_hesitant_pct
0,1131,"Wilcox County, Alabama",ALABAMA,0.23,0.11,0.93,Very High Concern,0.94,Very High Concern,0.228,0.0053,0.0009,0.0003,0.6938,0.0,0.2684,AL,Alabama,1131.0,1833.0,4048.0,5903.0,-2215.0,0.31052,0.685753,0.375233,Biden,Wilcox County,23.0
1,1129,"Washington County, Alabama",ALABAMA,0.23,0.11,0.73,High Concern,0.82,Very High Concern,0.192,0.0146,0.0731,0.0025,0.2354,0.0,0.6495,AL,Alabama,1129.0,6564.0,2258.0,8876.0,4306.0,0.739522,0.254394,0.485128,Trump,Washington County,23.0
2,1133,"Winston County, Alabama",ALABAMA,0.22,0.11,0.7,High Concern,0.8,High Concern,0.085,0.0315,0.0034,0.0016,0.0073,0.0005,0.937,AL,Alabama,1133.0,10195.0,974.0,11284.0,9221.0,0.903492,0.086317,0.817175,Trump,Winston County,22.0
3,1127,"Walker County, Alabama",ALABAMA,0.23,0.11,0.75,High Concern,0.68,High Concern,0.158,0.0249,0.0015,0.0049,0.0617,0.0,0.8895,AL,Alabama,1127.0,26002.0,4834.0,31170.0,21168.0,0.8342,0.155085,0.679115,Trump,Walker County,23.0
4,2013,"Aleutians East Borough, Alaska",ALASKA,0.26,0.12,0.58,Moderate Concern,0.87,Very High Concern,0.195,0.0901,0.4588,0.1968,0.0322,0.01,0.1321,AK,Alaska,,,,,,,,,,Aleutians East Borough,26.0


In [19]:
merge_corr = merge[
    ["fips_code", "county_name", "estimated_hesitant", "per_gop", "winner"]
]

In [20]:
merge_corr[merge_corr.winner == "Trump"].corr(method="pearson")

Unnamed: 0,estimated_hesitant,per_gop
estimated_hesitant,1.0,0.252561
per_gop,0.252561,1.0


In [21]:
alt.Chart(merge).mark_circle(size=40).encode(
    x=alt.X("per_gop", title="% Trump", axis=alt.Axis(tickCount=4, format=("%"))),
    y=alt.Y(
        "estimated_hesitant",
        title="% hesitant",
        axis=alt.Axis(tickCount=4, format=("%")),
    ),
    color=alt.Color(
        "winner", scale=alt.Scale(domain=["Trump", "Biden"], range=["red", "blue"])
    ),
).properties(
    width=800, height=500, title="Relationship between Trump vote and vaccine hesitancy"
)

### Just California

In [22]:
ca_merge = merge[merge["state"] == "CALIFORNIA"].sort_values(
    "estimated_strongly_hesitant", ascending=False
)

In [23]:
ca_merge["county_name"] = ca_merge["county_name"].str.replace(
    ", California", "", regex=False
)

In [24]:
alt.Chart(ca_merge).mark_circle(size=50).encode(
    x=alt.X("per_gop", title="% Trump", axis=alt.Axis(tickCount=6, format=("%"))),
    y=alt.Y(
        "estimated_hesitant",
        title="% hesitant",
        axis=alt.Axis(tickCount=4, format=("%")),
    ),
    color=alt.Color(
        "winner", scale=alt.Scale(domain=["Trump", "Biden"], range=["red", "blue"])
    ),
    size=alt.Size("total_votes"),
).properties(
    width=800, height=500, title="Relationship between Trump vote and vaccine hesitancy"
)

### Least hesitant in CA

In [25]:
ca_merge.sort_values("estimated_hesitant", ascending=True).head()

Unnamed: 0,fips_code,county_name,state,estimated_hesitant,estimated_strongly_hesitant,social_vulnerability_index_(svi),svi_category,cvac_level_of_concern_for_vaccination_rollout,cvac_level_of_concern,percent_adults_fully_vaccinated_against_covid-19,percent_hispanic,percent_non-hispanic_american_indian_alaska_native,percent_non-hispanic_asian,percent_non-hispanic_black,percent_non-hispanic_native_hawaiian_pacific_islander,percent_non-hispanic_white,state_code,state_name,county_fips,votes_gop,votes_dem,total_votes,diff,per_gop,per_dem,per_point_diff,winner,name,estimated_hesitant_pct
546,6075,San Francisco County,CALIFORNIA,0.07,0.03,0.39,Low Concern,0.52,Moderate Concern,0.237,0.1524,0.0019,0.3407,0.05,0.0034,0.4051,CA,California,6075,56417.0,378156.0,443458.0,-321739.0,0.127221,0.852744,0.725523,Biden,San Francisco County,7.0
534,6041,Marin County,CALIFORNIA,0.08,0.03,0.27,Low Concern,0.31,Low Concern,0.333,0.1598,0.0017,0.0581,0.0213,0.001,0.7115,CA,California,6041,24612.0,128288.0,155801.0,-103676.0,0.157971,0.823409,0.665439,Biden,Marin County,8.0
566,6081,San Mateo County,CALIFORNIA,0.08,0.03,0.26,Low Concern,0.4,Moderate Concern,0.264,0.244,0.0015,0.2831,0.0218,0.0132,0.3924,CA,California,6081,75563.0,291410.0,374138.0,-215847.0,0.201966,0.778884,0.576918,Biden,San Mateo County,8.0
647,6085,Santa Clara County,CALIFORNIA,0.08,0.03,0.36,Low Concern,0.42,Moderate Concern,0.216,0.2547,0.0017,0.3628,0.0235,0.0032,0.3154,CA,California,6085,214612.0,617967.0,850522.0,-403355.0,0.25233,0.726574,0.474244,Biden,Santa Clara County,8.0
623,6013,Contra Costa County,CALIFORNIA,0.09,0.04,0.41,Moderate Concern,0.47,Moderate Concern,0.261,0.2559,0.002,0.1648,0.084,0.0046,0.4383,CA,California,6013,152877.0,416386.0,581230.0,-263509.0,0.263023,0.716388,0.453364,Biden,Contra Costa County,9.0


### Most hesitant in CA?

In [26]:
ca_merge.sort_values("estimated_hesitant", ascending=True).tail()

Unnamed: 0,fips_code,county_name,state,estimated_hesitant,estimated_strongly_hesitant,social_vulnerability_index_(svi),svi_category,cvac_level_of_concern_for_vaccination_rollout,cvac_level_of_concern,percent_adults_fully_vaccinated_against_covid-19,percent_hispanic,percent_non-hispanic_american_indian_alaska_native,percent_non-hispanic_asian,percent_non-hispanic_black,percent_non-hispanic_native_hawaiian_pacific_islander,percent_non-hispanic_white,state_code,state_name,county_fips,votes_gop,votes_dem,total_votes,diff,per_gop,per_dem,per_point_diff,winner,name,estimated_hesitant_pct
652,6063,Plumas County,CALIFORNIA,0.15,0.06,0.27,Low Concern,0.54,Moderate Concern,,0.0895,0.0144,0.0114,0.0076,0.0046,0.8314,CA,California,6063,6445.0,4561.0,11256.0,1884.0,0.572584,0.405206,0.167377,Trump,Plumas County,15.0
540,6049,Modoc County,CALIFORNIA,0.15,0.06,0.75,High Concern,0.78,High Concern,,0.1449,0.0418,0.0111,0.0153,0.001,0.7769,CA,California,6049,3109.0,1150.0,4338.0,1959.0,0.71669,0.265099,0.451591,Trump,Modoc County,15.0
657,6093,Siskiyou County,CALIFORNIA,0.15,0.06,0.72,High Concern,0.68,High Concern,0.215,0.1258,0.032,0.016,0.0157,0.0029,0.761,CA,California,6093,13290.0,9593.0,23450.0,3697.0,0.566738,0.409083,0.157655,Trump,Siskiyou County,15.0
588,6035,Lassen County,CALIFORNIA,0.15,0.06,0.6,Moderate Concern,0.7,High Concern,0.131,0.1896,0.0274,0.0139,0.0848,0.008,0.6543,CA,California,6035,8970.0,2799.0,11985.0,6171.0,0.748436,0.233542,0.514894,Trump,Lassen County,15.0
527,6031,Kings County,CALIFORNIA,0.16,0.06,0.97,Very High Concern,0.75,High Concern,0.108,0.5449,0.0085,0.0369,0.0583,0.0016,0.3221,CA,California,6031,24072.0,18699.0,43858.0,5373.0,0.548862,0.426353,0.122509,Trump,Kings County,16.0


In [27]:
ca_merge_datawrapper = ca_merge.copy()

In [28]:
ca_merge_datawrapper["estimated_hesitant_pct"] = (
    ca_merge_datawrapper["estimated_hesitant"] * 100
)

---

In [29]:
counties = gpd.read_file("input/counties/usa_counties_clean_simplified.json")

In [30]:
counties.columns = (
    counties.columns.str.strip()
    .str.lower()
    .str.replace(" ", "_", regex=False)
    .str.replace(":", "", regex=False)
    .str.replace("/", "_", regex=False)
    .str.replace(",", "_", regex=False)
)

In [31]:
counties.head()

Unnamed: 0,statefp,countyfp,countyns,geoid,name,shape_area,geometry
0,15,3,365281,15003,Honolulu,0.138087,"POLYGON ((-157.91418 21.63521, -157.98643 21.6..."
1,41,3,1155126,41003,Benton,0.198723,"POLYGON ((-123.14903 44.72022, -123.17499 44.7..."
2,41,11,1135848,41011,Coos,0.471548,"POLYGON ((-123.81155 42.78884, -123.81150 42.7..."
3,6,45,277287,6045,Mendocino,0.935848,"POLYGON ((-123.54446 40.00192, -123.59440 40.0..."
4,41,41,1135856,41041,Lincoln,0.294925,"POLYGON ((-123.72466 45.04443, -123.74415 45.0..."


In [32]:
merge_geo = counties.merge(merge, right_on="fips_code", left_on="geoid")

In [33]:
len(merge_geo)

3142

In [34]:
merge_geo.plot()

<matplotlib.axes._subplots.AxesSubplot at 0x125193150>

In [35]:
merge_geo.to_file("output/vaccine_hesitancy_geo.geojson", driver="GeoJSON")

---

### Just the states

In [36]:
states = pd.read_excel(
    "https://aspe.hhs.gov/system/files/pdf/265341/Predicted-Vaccine-Hesitancy-by-State-PUMA-County.xlsx",
    sheet_name="state_hesitancy_estimates",
    dtype={"State FIPS": "str"},
)

In [37]:
states.rename(
    columns={
        "State FIPS": "fips",
        "State Name": "state",
        "% Estimated Hesitant - March 3 - March 15, 2021": "hesitant",
        "% Estimated Strongly Hesitant - March 3 - March 15, 2021": "strongly_hesitant",
    },
    inplace=True,
)

In [38]:
states["hesitant"] = round(states["hesitant"] * 100, 1)
states["strongly_hesitant"] = round(states["strongly_hesitant"] * 100, 1)

In [39]:
states["fips"] = states["fips"].str.zfill(2)

In [40]:
states.head()

Unnamed: 0,fips,state,hesitant,strongly_hesitant
0,1,Alabama,21.5,10.3
1,2,Alaska,20.4,9.0
2,4,Arizona,17.4,10.2
3,5,Arkansas,21.2,12.6
4,6,California,10.8,4.3


---

## Exports 

In [41]:
ca_merge.sort_values("estimated_hesitant", ascending=True).head(7).to_csv(
    "output/hesitancy/seven_least_hesitant.csv", index=False
)

In [42]:
ca_merge.sort_values("estimated_hesitant", ascending=True).tail(7).to_csv(
    "output/hesitancy/seven_most_hesitant.csv", index=False
)

In [43]:
ca_merge.sort_values("estimated_hesitant", ascending=True).to_csv(
    "output/hesitancy/ca_hesitant_all_counties.csv", index=False
)

In [44]:
ca_merge_datawrapper[
    ["fips_code", "county_name", "estimated_hesitant_pct", "winner"]
].sort_values("estimated_hesitant_pct", ascending=True).to_csv(
    "output/hesitancy/ca_hesitant_datawrapper.csv", index=False
)

In [45]:
states.sort_values("hesitant", ascending=True).to_csv(
    "output/hesitancy/states_hesitantcy.csv", index=False
)

In [46]:
merge[["fips_code", "name", "estimated_hesitant_pct", "winner"]].sort_values(
    "estimated_hesitant_pct", ascending=True
).to_csv("output/hesitancy/usa_counties.csv", index=False)