## Load the data

In [1]:
# Load the libraries
suppressWarnings(suppressMessages(library(tidyverse)))
library(tableone)
library(leaflet)
library(noncensus) 
suppressWarnings(suppressMessages(library(sp)))
suppressWarnings(suppressMessages(library(rgdal)))
suppressWarnings(suppressMessages(library(geojsonio)))

# Load the data
load("~/Documents/holding_gwu_dissertation/data/study1_data.RData")

In [31]:
load("~/Documents/holding_gwu_dissertation/data/vcf_data.RData")

In [38]:
vcf_data %>% group_by(vaccinated_prior, group) %>% count()
vcf_data %>% filter(vaccinated_prior == TRUE | participant_id %in% c("p_72", "p_173")) %>% 
    filter(group == "Gist") %>%
    select(participant_id, vaccinated_prior, mturk_code,
           vax_y_n_2, doses_2, doses_4, doses_6) %>%
    arrange(vaccinated_prior)

vaccinated_prior,group,n
<lgl>,<fct>,<int>
False,Control,215
False,Gist,263
True,Control,14
True,Gist,15


participant_id,vaccinated_prior,mturk_code,vax_y_n_2,doses_2,doses_4,doses_6
<chr>,<lgl>,<chr>,<chr>,<dbl>,<dbl>,<dbl>
p_72,False,948253,Was already vaccinated prior to joining the study,,,
p_173,False,703253,Was already vaccinated prior to joining the study,,1.0,2.0
p_1,True,645517,Yes,1.0,3.0,2.0
p_9,True,899088,Was already vaccinated prior to joining the study,,,
p_67,True,523981,Was already vaccinated prior to joining the study,,,
p_82,True,202908,,,3.0,
p_86,True,851383,Was already vaccinated prior to joining the study,,,2.0
p_171,True,241071,Was already vaccinated prior to joining the study,,,
p_304,True,311050,Yes,2.0,,
p_306,True,775304,Was already vaccinated prior to joining the study,,,3.0


## Transform variables

In [2]:
vcf_data <- vcf_data %>% filter(vaccinated_prior == FALSE)

vcf_data$age_cat <- NA
vcf_data$age_cat[vcf_data$age < 30] <- "less than 30"
vcf_data$age_cat[(vcf_data$age >= 30 & vcf_data$age <= 49)] <- "30-49"
vcf_data$age_cat[vcf_data$age >= 50] <- "greater than 50"

# Gender
vcf_data$gender[vcf_data$gender != "Female"] <- "Not Female"
vcf_data$gender <- factor(vcf_data$gender,
                          levels = c("Not Female", "Female"))

# Race
vcf_data$race[vcf_data$race == "American Indian/Alaska Native"] <- "Other/Unknown"
vcf_data$race[vcf_data$race == "Native Hawaiian/Pacific Islander"] <- "Other/Unknown"
vcf_data$race[vcf_data$race == "Other (please describe):"] <- "Other/Unknown"
vcf_data$race[vcf_data$race == "Prefer not to answer"] <- "Other/Unknown"

# Education
vcf_data$education2 <- vcf_data$education
vcf_data$education2[vcf_data$education2 == "High school graduate"] <- "High school or less"
vcf_data$education2[vcf_data$education2 == "Less than high school (for example, middle or elementary school)"] <- "High school or less"
vcf_data$education2[vcf_data$education2 == "Some high school"] <- "High school or less"
vcf_data$education2[vcf_data$education2 == "GED"] <- "High school or less"
vcf_data$education2[vcf_data$education2 == "Some college - no degree"] <- "Associate's degree/some college"
vcf_data$education2[vcf_data$education2 == "Associate's or technical degree (for example, AA"] <- "Associate's degree/some college"
vcf_data$education2[vcf_data$education2 == "Graduate degree (for example, MA or PhD)"] <- "Bachelor's/graduate degree"
vcf_data$education2[vcf_data$education2 == "Bachelor's degree (for example, BA, BS, or AB)"] <- "Bachelor's/graduate degree"

# Marital Status
vcf_data$marital_status[vcf_data$marital_status != "Married"] <- "Unmarried"
vcf_data$marital_status <- factor(vcf_data$marital_status,
                          levels = c("Unmarried", "Married"))
# Employment
vcf_data$employment2 <- vcf_data$employment 
vcf_data$employment2[vcf_data$employment2 == "Working full time (40 hours a week or more)"] <- "Working"
vcf_data$employment2[vcf_data$employment2 == "Working part time (less than 40 hours a week)"] <- "Working"
vcf_data$employment2[vcf_data$employment2 != "Working"] <- "Unemployed/other/prefer not to answer/retired"

# Income
vcf_data$income2 <- vcf_data$income
vcf_data$income2[vcf_data$income2 %in% c("Less than $15,000",
                                         "$15,000 - $19,999"
                                         )] <- "less than $20,000"
vcf_data$income2[vcf_data$income2 %in% c("$20,000 - $24,999",
                                         "$25,000 - $34,999",
                                         "$35,000 - $49,999")] <- "$20,000 - $49,999"
vcf_data$income2[vcf_data$income2 %in% c("$50,000 - $74,999",
                                         "$75,000 - $99,999",
                                         "$100,000 and above")] <- "$50,000 and above"

# Political Affiliation
vcf_data$political_affiliation[vcf_data$political_affiliation %in% c("Other (please describe):",
                                                                       "Prefer not to answer")] <- "Other"

# Political Views
vcf_data$political_view[vcf_data$political_view %in% c("Slightly conservative",
                                                         "Very conservative")] <- "Conservative"
vcf_data$political_view[vcf_data$political_view %in% c("Slightly liberal",
                                                         "Very liberal")] <- "Liberal"

# Tested Positive
vcf_data$tested_positive[vcf_data$tested_positive != "Yes"] <- "No"

# Relabel the group variable
vcf_data$group <- vcf_data$group %>% as.character()
vcf_data$group[vcf_data$group == "Gist"] <- "Treatment"
vcf_data$group <- factor(vcf_data$group, levels = c("Treatment", "Control"))

# Relabel the trust variables
vcf_data$fed_gov_distrust <- FALSE
vcf_data$fed_gov_distrust[vcf_data$fed_gov_trust == "Not at all"] <- TRUE
vcf_data$local_gov_distrust <- FALSE
vcf_data$local_gov_distrust[vcf_data$local_gov_trust == "Not at all"] <- TRUE
vcf_data$cdc_distrust <- FALSE
vcf_data$cdc_distrust[vcf_data$cdc_trust == "Not at all"] <- TRUE
vcf_data$fda_distrust <- FALSE
vcf_data$fda_distrust[vcf_data$fda_trust == "Not at all"] <- TRUE


## Sample characteristics: demographics, COVID-19 experience, and trust

In [3]:
temp <- vcf_data %>% select(age, age_cat, gender, race, education2, 
                            marital_status, employment2, income2, 
                            political_affiliation, political_view, 
                            tested_positive, know_someone_diagnosed, 
                            know_someone_hospitalized, know_someone_died,
                            fed_gov_distrust, local_gov_distrust,
                            cdc_distrust, fda_distrust,
                            group)
CreateTableOne(data = temp)

                                     
                                      Overall      
  n                                     478        
  age (mean (SD))                     36.87 (9.71) 
  age_cat (%)                                      
     30-49                              325 (68.0) 
     greater than 50                     49 (10.3) 
     less than 30                       104 (21.8) 
  gender = Female (%)                   355 (74.3) 
  race (%)                                         
     Asian                                9 ( 1.9) 
     Black or African American           65 (13.6) 
     Other/Unknown                       22 ( 4.6) 
     White                              382 (79.9) 
  education2 (%)                                   
     Associate's degree/some college    234 (49.0) 
     Bachelor's/graduate degree         140 (29.3) 
     High school or less                104 (21.8) 
  marital_status = Married (%)          198 (41.4) 
  employment2 = Working (%

In [4]:
CreateTableOne(strat = c("group"), data = temp)

                                     Stratified by group
                                      Treatment     Control        p      test
  n                                     263           215                     
  age (mean (SD))                     36.65 (10.07) 37.15 (9.28)    0.574     
  age_cat (%)                                                       0.320     
     30-49                              173 (65.8)    152 ( 70.7)             
     greater than 50                     26 ( 9.9)     23 ( 10.7)             
     less than 30                        64 (24.3)     40 ( 18.6)             
  gender = Female (%)                   192 (73.0)    163 ( 75.8)   0.553     
  race (%)                                                          0.312     
     Asian                                6 ( 2.3)      3 (  1.4)             
     Black or African American           36 (13.7)     29 ( 13.5)             
     Other/Unknown                       16 ( 6.1)      6 (  2.8)         

## Sample characteristics: reasons not to get vaccinated

In [5]:
reason_df <- list()
for (i in 1:nrow(vcf_data)){
    reason_df[[i]] <- tibble(participant_id = vcf_data$participant_id[i],
                             group = vcf_data$group[i],
                             reasons = vcf_data$reasons_for_not_vax[i]%>% 
                                       str_split("[.],") %>% .[[1]] %>% str_trim() %>% 
                                       str_replace("[.]", "") %>% unlist())    
}
reason_df <- bind_rows(reason_df)

total <- reason_df %>% group_by(reasons) %>%
    summarise(tot_n = n(),
            tot_perc = tot_n/nrow(vcf_data),
            tot_n_perc = paste0(tot_n, " (", round(tot_perc, 4), ")")) 

trt <- reason_df %>% filter(group == "Treatment") %>% group_by(reasons) %>%
    summarise(trt_n = n(),
            trt_perc = trt_n/nrow(vcf_data[vcf_data$group == "Treatment",]),
            trt_n_perc = paste0(trt_n, " (", round(trt_perc, 4), ")")) 

ctrl <- reason_df %>% filter(group == "Control") %>% group_by(reasons) %>%
    summarise(ctrl_n = n(),
            ctrl_perc = ctrl_n/nrow(vcf_data[vcf_data$group == "Control",]),
            ctrl_n_perc = paste0(ctrl_n, " (", round(ctrl_perc, 4), ")")) 

summary <- total %>% left_join(trt, by = "reasons") %>% 
  left_join(ctrl, by = "reasons") %>%
  mutate(abs_diff = round(abs(trt_perc-ctrl_perc), 5)) %>% 
  arrange(desc(tot_n)) %>% 
  select(reasons, tot_n_perc, trt_n_perc, ctrl_n_perc, abs_diff)

colnames(summary) <- c("Reasons not to vaccinate", 
                       paste0("Total (", sum(total$tot_n), ")"),
                       paste0("Treatment (", sum(trt$trt_n), ")"),
                       paste0("Control (", sum(ctrl$ctrl_n), ")"),
                       "Difference")

# Group less common reasons into a single category of other
reason_df$reasons[!( reason_df$reasons %in% summary$`Reasons not to vaccinate`[1:5])] <- "Other"

reason_df <- reason_df %>% distinct()
total <- reason_df %>% group_by(reasons) %>%
    summarise(tot_n = n(),
            tot_perc = tot_n/nrow(vcf_data),
            tot_n_perc = paste0(tot_n, " (", round(tot_perc, 4), ")")) 

trt <- reason_df %>% filter(group == "Treatment") %>% group_by(reasons) %>%
    summarise(trt_n = n(),
            trt_perc = trt_n/nrow(vcf_data[vcf_data$group == "Treatment",]),
            trt_n_perc = paste0(trt_n, " (", round(trt_perc, 4), ")")) 

ctrl <- reason_df %>% filter(group == "Control") %>% group_by(reasons) %>%
    summarise(ctrl_n = n(),
            ctrl_perc = ctrl_n/nrow(vcf_data[vcf_data$group == "Control",]),
            ctrl_n_perc = paste0(ctrl_n, " (", round(ctrl_perc, 4), ")")) 

summary <- total %>% left_join(trt, by = "reasons") %>% 
  left_join(ctrl, by = "reasons") %>%
  mutate(abs_diff = round(abs(trt_perc-ctrl_perc), 5)) %>% 
  arrange(desc(tot_n)) %>% 
  select(reasons, tot_n_perc, trt_n_perc, ctrl_n_perc, abs_diff)
  
colnames(summary) <- c("Reasons not to vaccinate", 
                       paste0("Total (", sum(total$tot_n), ")"),
                       paste0("Treatment (", sum(trt$trt_n), ")"),
                       paste0("Control (", sum(ctrl$ctrl_n), ")"),
                       "Difference")

summary 

Reasons not to vaccinate,Total (1727),Treatment (958),Control (769),Difference
<chr>,<chr>,<chr>,<chr>,<dbl>
Other,358 (0.749),199 (0.7567),159 (0.7395),0.01712
I am not sure the vaccine is safe,353 (0.7385),200 (0.7605),153 (0.7116),0.04883
I am not sure the vaccine is effective,323 (0.6757),184 (0.6996),139 (0.6465),0.05311
I don't trust the government or pharmaceutical companies,264 (0.5523),146 (0.5551),118 (0.5488),0.0063
I am exercising my right to choose,237 (0.4958),127 (0.4829),110 (0.5116),0.02874
I don't want to put something foreign into my body,192 (0.4017),102 (0.3878),90 (0.4186),0.03077
