# Sample Characteristics

In [1]:
suppressWarnings(suppressMessages(library(tidyverse)))
library(tableone)

# Load the data
load(file = "~/Documents/holding_gwu_dissertation/data/study3_1_data.RData")
paste0("Initial sample: ", nrow(vcf_data))

## Reasons not to vaccinate

In [2]:
reason_df <- list()
for (i in 1:nrow(vcf_data)){
    reason_df[[i]] <- tibble(mturk_code = vcf_data$mturk_code[i],
                             group = vcf_data$group[i],
                             reasons = vcf_data$reasons_for_not_vax[i]%>% 
                                       str_split("[.],") %>% .[[1]] %>% str_trim() %>% 
                                       str_replace("[.]", "") %>% unlist())    
}
reason_df <- bind_rows(reason_df)

reason_df_wide <- reason_df %>% mutate(Yes = "Yes") %>% spread(key = "reasons", value = "Yes")
reason_df_wide[is.na(reason_df_wide)] <- "No"
reason_df_wide <- reason_df_wide %>% select(-mturk_code)
CreateTableOne(data = reason_df_wide)

                                                                                                                                                     
                                                                                                                                                      Overall    
  n                                                                                                                                                   397        
  group = Control (%)                                                                                                                                 177 (44.6) 
  I am exercising my right to choose = Yes (%)                                                                                                        201 (50.6) 
  I am not sure the vaccine is effective = Yes (%)                                                                                                    274 (69.0) 
  I am not sure the vaccine is safe = Ye

In [3]:
colnames(reason_df_wide) <- c('group', paste0("R", 1:(length(reason_df_wide)-1)))
CreateTableOne(strat = c("group"), data = reason_df_wide)

                     Stratified by group
                      Treatment   Control      p      test
  n                   220         177                     
  group = Control (%)   0 ( 0.0)  177 (100.0)  <0.001     
  R1 = Yes (%)        106 (48.2)   95 ( 53.7)   0.324     
  R2 = Yes (%)        155 (70.5)  119 ( 67.2)   0.561     
  R3 = Yes (%)        170 (77.3)  129 ( 72.9)   0.373     
  R4 = Yes (%)          4 ( 1.8)    1 (  0.6)   0.509     
  R5 = Yes (%)         88 (40.0)   59 ( 33.3)   0.207     
  R6 = Yes (%)         37 (16.8)   24 ( 13.6)   0.450     
  R7 = Yes (%)         14 ( 6.4)    8 (  4.5)   0.564     
  R8 = Yes (%)         56 (25.5)   41 ( 23.2)   0.681     
  R9 = Yes (%)        120 (54.5)   99 ( 55.9)   0.861     
  R10 = Yes (%)        84 (38.2)   74 ( 41.8)   0.528     
  R11 = Yes (%)        78 (35.5)   60 ( 33.9)   0.828     
  R12 = Yes (%)        12 ( 5.5)    7 (  4.0)   0.646     
  R13 = Yes (%)        25 (11.4)   25 ( 14.1)   0.502     
  R14 = Yes (%)

## COVID-19 Experience

In [4]:
temp <- vcf_data %>% dplyr::select(group, tested_positive, know_someone_diagnosed, 
                                   know_someone_hospitalized, know_someone_died)
temp$tested_positive[temp$tested_positive != "Yes"] <- "No"
CreateTableOne(data = temp)
CreateTableOne(strat = c("group"), data = temp)

                                     
                                      Overall    
  n                                   397        
  group = Control (%)                 177 (44.6) 
  tested_positive = Yes (%)           109 (27.6) 
  know_someone_diagnosed = Yes (%)    356 (89.7) 
  know_someone_hospitalized = Yes (%) 183 (46.1) 
  know_someone_died = Yes (%)         129 (32.5) 

                                     Stratified by group
                                      Treatment   Control      p      test
  n                                   220         177                     
  group = Control (%)                   0 ( 0.0)  177 (100.0)  <0.001     
  tested_positive = Yes (%)            60 (27.4)   49 ( 27.8)   1.000     
  know_someone_diagnosed = Yes (%)    191 (86.8)  165 ( 93.2)   0.055     
  know_someone_hospitalized = Yes (%) 113 (51.4)   70 ( 39.5)   0.025     
  know_someone_died = Yes (%)          80 (36.4)   49 ( 27.7)   0.084     

## Demographics

In [5]:
temp <- vcf_data %>%
              dplyr::select(age, white, gender,
                            marital_status, employment2, education2, income3,
                            political_view, political_affiliation,
                            group) 

CreateTableOne(data = temp)
min(vcf_data$age)
max(vcf_data$age)

                               
                                Overall      
  n                               397        
  age (mean (SD))               37.09 (9.82) 
  white = white (%)               325 (81.9) 
  gender = Female (%)             289 (72.8) 
  marital_status = Married (%)    162 (40.8) 
  employment2 = Working (%)       282 (71.0) 
  education2 = some college (%)   314 (79.1) 
  income3 (%)                                
     Less than $34,999            154 (39.2) 
     $35,000 - $74,999            165 (42.0) 
     $75,000 and above             74 (18.8) 
  political_view (%)                         
     Conservative                 175 (45.6) 
     Moderate                     125 (32.6) 
     Liberal                       84 (21.9) 
  political_affiliation (%)                  
     Republican                   152 (40.4) 
     Independent                  146 (38.8) 
     Democrat                      78 (20.7) 
  group = Control (%)             177 (44.6) 

In [6]:
CreateTableOne(strat = c("group"), data = temp)

                               Stratified by group
                                Treatment     Control        p      test
  n                               220           177                     
  age (mean (SD))               36.94 (10.29) 37.27 (9.23)    0.744     
  white = white (%)               177 (80.5)    148 ( 83.6)   0.496     
  gender = Female (%)             158 (71.8)    131 ( 74.0)   0.708     
  marital_status = Married (%)     93 (42.3)     69 ( 39.0)   0.575     
  employment2 = Working (%)       157 (71.4)    125 ( 70.6)   0.960     
  education2 = some college (%)   168 (76.4)    146 ( 82.5)   0.172     
  income3 (%)                                                 0.750     
     Less than $34,999             81 (37.5)     73 ( 41.2)             
     $35,000 - $74,999             93 (43.1)     72 ( 40.7)             
     $75,000 and above             42 (19.4)     32 ( 18.1)             
  political_view (%)                                          0.863     


## Trust in COVID-19 Information Sources

In [7]:
temp <- vcf_data %>%
              dplyr::select(fed_gov_trust, local_gov_trust, 
                            cdc_trust, fda_trust, doctor_trust,
                            group) %>%
              mutate(fed_gov_trust = fed_gov_trust %>% as.character(),
                     local_gov_trust = local_gov_trust %>% as.character(),
                     cdc_trust = cdc_trust %>% as.character(),
                     fda_trust = fda_trust %>% as.character(),
                     doctor_trust = doctor_trust %>% as.character())

temp$fed_gov_trust[temp$fed_gov_trust == "0"] <- "Not at all"
temp$local_gov_trust[temp$local_gov_trust == "0"] <- "Not at all"
temp$cdc_trust[temp$cdc_trust == "0"] <- "Not at all"
temp$fda_trust[temp$fda_trust == "0"] <- "Not at all"
temp$doctor_trust[temp$doctor_trust == "0"] <- "Not at all"

temp$fed_gov_trust[temp$fed_gov_trust == "1"] <- "A little"
temp$local_gov_trust[temp$local_gov_trust == "1"] <- "A little"
temp$cdc_trust[temp$cdc_trust == "1"] <- "A little"
temp$fda_trust[temp$fda_trust == "1"] <- "A little"
temp$doctor_trust[temp$doctor_trust == "1"] <- "A little"

temp$fed_gov_trust[temp$fed_gov_trust == "2"] <- "A great deal"
temp$local_gov_trust[temp$local_gov_trust == "2"] <- "A great deal"
temp$cdc_trust[temp$cdc_trust == "2"] <- "A great deal"
temp$fda_trust[temp$fda_trust == "2"] <- "A great deal"
temp$doctor_trust[temp$doctor_trust == "2"] <- "A great deal"

CreateTableOne(data = temp)

                     
                      Overall    
  n                   397        
  fed_gov_trust (%)              
     A great deal      34 ( 8.8) 
     A little         142 (36.9) 
     Not at all       209 (54.3) 
  local_gov_trust (%)            
     A great deal      35 ( 9.0) 
     A little         162 (41.9) 
     Not at all       190 (49.1) 
  cdc_trust (%)                  
     A great deal     108 (28.3) 
     A little         139 (36.4) 
     Not at all       135 (35.3) 
  fda_trust (%)                  
     A great deal      64 (16.8) 
     A little         164 (43.2) 
     Not at all       152 (40.0) 
  doctor_trust (%)               
     A great deal     224 (59.3) 
     A little         128 (33.9) 
     Not at all        26 ( 6.9) 
  group = Control (%) 177 (44.6) 

In [8]:
CreateTableOne(strat = c("group"), data = temp)

                     Stratified by group
                      Treatment   Control      p      test
  n                   220         177                     
  fed_gov_trust (%)                             0.529     
     A great deal      21 ( 9.9)   13 (  7.6)             
     A little          74 (34.7)   68 ( 39.5)             
     Not at all       118 (55.4)   91 ( 52.9)             
  local_gov_trust (%)                           0.153     
     A great deal      24 (11.2)   11 (  6.4)             
     A little          83 (38.6)   79 ( 45.9)             
     Not at all       108 (50.2)   82 ( 47.7)             
  cdc_trust (%)                                 0.406     
     A great deal      66 (30.8)   42 ( 25.0)             
     A little          77 (36.0)   62 ( 36.9)             
     Not at all        71 (33.2)   64 ( 38.1)             
  fda_trust (%)                                 0.011     
     A great deal      46 (21.8)   18 ( 10.7)             
     A little  