## Data conditioning

### Load the data

In [1]:
# Load the libraries
suppressWarnings(suppressMessages(library(tidyverse)))

# Load the data
load("~/Documents/vcf/data/vcf_data.RData")
print(paste0("Initial Sample: ", nrow(vcf_data)))

# Relabel the wave variable
vcf_data$wave <- "Wave 1"
vcf_data$wave[vcf_data$trial == 3] <- "Wave 2"
vcf_data$wave <- factor(vcf_data$wave, levels = c("Wave 1", "Wave 2"))

# Relabel the group variable
vcf_data$group <- vcf_data$group %>% as.character()
vcf_data$group[vcf_data$group == "Gist"] <- "Treatment"
vcf_data$group <- factor(vcf_data$group, levels = c("Treatment", "Control"))

# Collapse political views into 3 categories
vcf_data$political_view2 <- vcf_data$political_view
vcf_data$political_view2[vcf_data$political_view2 %in% 
                          c("Slightly conservative",
                            "Very conservative")] <- "Conservative"
vcf_data$political_view2[vcf_data$political_view2 %in% 
                           c("Slightly liberal",
                             "Very liberal")] <- "Liberal"
vcf_data$political_view2[vcf_data$political_view2 == "Prefer not to answer"] <- "No answer"
vcf_data$political_view2 <- factor(
    vcf_data$political_view2,
    levels = c("Moderate", "Liberal", "Conservative", "No answer")
)

# Filter people who didn't respond to political view
#vcf_data <- vcf_data %>% filter(political_view2 != "No answer")

# Age categories
vcf_data$age_cat <- NA
vcf_data$age_cat[vcf_data$age < 30] <- "less than 30"
vcf_data$age_cat[(vcf_data$age >= 30 & vcf_data$age <= 49)] <- "30-49"
vcf_data$age_cat[vcf_data$age >= 50] <- "greater than 50"

# Gender
vcf_data$gender[vcf_data$gender != "Female"] <- "Not Female"
vcf_data$gender <- factor(vcf_data$gender, levels = c("Not Female", "Female"))

# Race
vcf_data$afr_am <- NA
vcf_data$afr_am[vcf_data$race == "Black or African American"] <- "afr_am"
vcf_data$afr_am[vcf_data$race != "Black or African American"] <- "not_afr_am"
vcf_data$afr_am <- factor(vcf_data$afr_am, levels = c("not_afr_am", "afr_am"))
vcf_data$white <- NA
vcf_data$white[vcf_data$race == "White"] <- "white"
vcf_data$white[vcf_data$race != "White"] <- "not_white"
vcf_data$white <- factor(vcf_data$white, levels = c("not_white", "white"))
vcf_data$other <- NA
vcf_data$other[vcf_data$race == "White" | vcf_data$race ==  "Black or African American"] <- "not_other"
vcf_data$other[!(vcf_data$race == "White" | vcf_data$race ==  "Black or African American")] <- "other"
vcf_data$other <- factor(vcf_data$other, levels = c("not_other", "other"))


# Education
vcf_data$education2 <- vcf_data$education
vcf_data$education2[vcf_data$education2 == "High school graduate"] <- "High school or less"
vcf_data$education2[vcf_data$education2 == "Less than high school (for example, middle or elementary school)"] <- "High school or less"
vcf_data$education2[vcf_data$education2 == "Some high school"] <- "High school or less"
vcf_data$education2[vcf_data$education2 == "GED"] <- "High school or less"
vcf_data$education2[vcf_data$education2 == "Some college - no degree"] <- "some college"
vcf_data$education2[vcf_data$education2 == "Associate's or technical degree (for example, AA"] <- "some college"
vcf_data$education2[vcf_data$education2 == "Graduate degree (for example, MA or PhD)"] <- "some college"
vcf_data$education2[vcf_data$education2 == "Bachelor's degree (for example, BA, BS, or AB)"] <- "some college"

# Marital Status
vcf_data$marital_status[vcf_data$marital_status != "Married"] <- "Unmarried"
vcf_data$marital_status <- factor(vcf_data$marital_status, levels = c("Unmarried", "Married"))

# Employment
vcf_data$employment2 <- vcf_data$employment 
vcf_data$employment2[vcf_data$employment2 == "Working full time (40 hours a week or more)"] <- "Working"
vcf_data$employment2[vcf_data$employment2 == "Working part time (less than 40 hours a week)"] <- "Working"
vcf_data$employment2[vcf_data$employment2 != "Working"] <- "Not Working"

# Income
vcf_data$income2 <- vcf_data$income
vcf_data$income2[vcf_data$income2 %in% c("Less than $15,000",
                                         "$15,000 - $19,999"
                                         )] <- "less than $50,000"
vcf_data$income2[vcf_data$income2 %in% c("$20,000 - $24,999",
                                         "$25,000 - $34,999",
                                         "$35,000 - $49,999", 
                                         "Prefer not to answer")] <- "less than $50,000"
vcf_data$income2[vcf_data$income2 %in% c("$50,000 - $74,999",
                                         "$75,000 - $99,999",
                                         "$100,000 and above")] <- "$50,000 and above"

# Political Affiliation
vcf_data$political_affiliation[vcf_data$political_affiliation %in% c("Other (please describe):",
                                                                       "Prefer not to answer")] <- "Other"


[1] "Initial Sample: 507"


### Impute vaccinated onto intentions scale

In [2]:
# Add a "vaccinated" response to intentions that has a value of 8
vcf_data$base_intention <-  factor(vcf_data$base_intention, 
                                levels = c(levels(vcf_data$base_intention),
                                           "Vaccinated"))
vcf_data$intention_2 <-  factor(vcf_data$intention_2, 
                                levels = c(levels(vcf_data$intention_2),
                                           "Vaccinated"))
vcf_data$intention_4 <-  factor(vcf_data$intention_4, 
                                levels = c(levels(vcf_data$intention_4),
                                           "Vaccinated"))
vcf_data$intention_6 <-  factor(vcf_data$intention_6, 
                                levels = c(levels(vcf_data$intention_6),
                                           "Vaccinated"))

# Recode intentions for vaccinated people as a vaccinated (8) response
vcf_data$intention_2[vcf_data$vax_y_n_2 == "Yes"] <- "Vaccinated"
vcf_data$intention_4[vcf_data$vax_y_n_4 == "Yes"] <- "Vaccinated"
vcf_data$intention_6[vcf_data$vax_y_n_6 == "Yes"] <- "Vaccinated"

### Convert outcomes to numeric values

In [3]:
# Baseline
vcf_data$base_intention_n <- vcf_data$base_intention %>% as.numeric()
vcf_data$base_encourage_n <- vcf_data$base_encourage %>% as.numeric()
vcf_data$base_sp_complacency_n <- vcf_data$base_sp_complacency %>% as.numeric()
vcf_data$base_sp_confidence_n <- vcf_data$base_sp_confidence %>% as.numeric()

# 6 week
vcf_data$intention_6_n <- vcf_data$intention_6 %>% as.numeric()
vcf_data$encourage_6_n <- vcf_data$encourage_6 %>% as.numeric()
vcf_data$sp_complacency_6_n <- vcf_data$sp_complacency_6 %>% as.numeric()
vcf_data$sp_confidence_6_n <- vcf_data$sp_confidence_6 %>% as.numeric()

### Dichotomize variables

In [4]:
# Conservative vs. nonconservative
vcf_data$conservative <- NA
vcf_data$conservative[vcf_data$political_view2 == "Conservative"] <- "conserv"
vcf_data$conservative[vcf_data$political_view2 != "Conservative"] <- "nonconserv"
vcf_data$conservative <- factor(vcf_data$conservative, levels = c("nonconserv", "conserv"))

# Republican vs. nonrepublican
vcf_data$republican <- NA
vcf_data$republican[vcf_data$political_affiliation == "Republican"] <- "rep"
vcf_data$republican[vcf_data$political_affiliation != "Republican"] <- "nonrep"
vcf_data$republican <- factor(vcf_data$republican, levels = c("rep", "nonrep"))

# Transform trust 
transform_trust <- function(vals){
    vals <- vals %>% as.character()
    vals[vals == "Not applicable"] <- NA
    vals[vals == "Don't know"] <- NA
    vals[vals == "Not at all"] <- "0"
    vals[vals == "A little"] <- "1"    
    vals[vals == "A great deal"] <- "2"    
    vals <- vals %>% as.numeric() %>% suppressWarnings()
    vals}

vcf_data$fed_gov_trust <- vcf_data$fed_gov_trust %>% transform_trust()
vcf_data$local_gov_trust <- vcf_data$local_gov_trust %>% transform_trust()
vcf_data$cdc_trust <- vcf_data$cdc_trust %>% transform_trust()
vcf_data$fda_trust <- vcf_data$fda_trust %>% transform_trust()
vcf_data$doctor_trust <- vcf_data$doctor_trust %>% transform_trust()

dichotomize_trust <- function(vals){
    transformed_vals <- rep(NA, length(vals))
    transformed_vals[vals == 0] <- "distrust"
    transformed_vals[vals != 0] <- "trust"
    transformed_vals <- factor(transformed_vals, levels = c("trust", "distrust"))
    transformed_vals
}

vcf_data$fed_gov_trust_di <- vcf_data$fed_gov_trust %>% dichotomize_trust()
vcf_data$local_gov_trust_di <- vcf_data$local_gov_trust %>% dichotomize_trust()
vcf_data$cdc_trust_di <- vcf_data$cdc_trust %>% dichotomize_trust()
vcf_data$fda_trust_di <- vcf_data$fda_trust %>% dichotomize_trust()
vcf_data$doctor_trust_di <- vcf_data$doctor_trust %>% dichotomize_trust()

### Operationalize trust measures

In [5]:
# Operationalize public health institution trust as sum of CDC and FDA trust
vcf_data$phi_trust_cat <- NA
vcf_data$phi_trust_cat <- "trusting"
vcf_data$phi_trust_cat[vcf_data$fda_trust_di == "distrust" &
                   vcf_data$cdc_trust_di == "distrust"] <- "distrusting"
vcf_data$phi_trust_cat <- factor(vcf_data$phi_trust_cat, levels = c("distrusting", "trusting"))
vcf_data$phi_trust_cat %>% table()

# Operationalize governmental institution trust as sum of federal and local government trust
vcf_data$gov_trust_cat <- NA
vcf_data$gov_trust_cat <- "trusting"
vcf_data$gov_trust_cat[vcf_data$fed_gov_trust_di == "distrust" &
                   vcf_data$local_gov_trust_di == "distrust"] <- "distrusting"
vcf_data$gov_trust_cat <- factor(vcf_data$gov_trust_cat, levels = c("distrusting", "trusting"))
vcf_data$gov_trust_cat %>% table()

.
distrusting    trusting 
        149         358 

.
distrusting    trusting 
        220         287 

## Filter records to exclude from analysis

### Remove people who didn't follow up at 6 weeks

In [6]:
paste0("Initial sample: ", nrow(vcf_data))

In [7]:
failed_to_follow_up <- vcf_data %>% filter(is.na(sp_confidence_6))
subset <- vcf_data %>% filter(!is.na(sp_confidence_6))
paste0("Dropped for follow-up: ", nrow(failed_to_follow_up), " (", 
       round(nrow(failed_to_follow_up)/nrow(vcf_data),3), ")")
paste0("Analysis Sample: ", nrow(subset))

### Remove people who weren't paying attention

In [8]:
not_paying_attention <- subset %>% filter(attention2_6 != "Somewhat Agree",
                                               !(mturk_code %in% failed_to_follow_up$mturk_code))

subset <- vcf_data %>% filter(!is.na(sp_confidence_6), attention2_6 == "Somewhat Agree")
paste0("Dropped for attention-up: ", nrow(not_paying_attention), " (", 
       round(nrow(not_paying_attention)/nrow(vcf_data),3), ")")
paste0("Analysis Sample: ", nrow(subset))

### Remove people who explicitly stated they were vaccinated prior to the study

In [9]:
vaccinated_prior <- subset %>% filter(
    (vaccinated_prior == TRUE & vax_y_n_2 == "Was already vaccinated prior to joining the study") |
    mturk_code == "521706")
subset <- subset[!(subset$mturk_code %in% vaccinated_prior$mturk_code), ]

paste0("Dropped for explicitly stating they were vaccinated prior to the study: ", nrow(vaccinated_prior), " (", 
       round(nrow(vaccinated_prior)/nrow(vcf_data),3), ")")
paste0("Analysis Sample: ", nrow(subset))

vaccinated_prior %>% filter(mturk_code != "521706") %>% 
    dplyr::select(base_intention, doses_2, doses_4, doses_6, vax_y_n_2)
vaccinated_prior %>% filter(mturk_code == "521706") %>% 
    dplyr::select(base_intention, intention_6, addressed_concerns_text_6)

base_intention,doses_2,doses_4,doses_6,vax_y_n_2
<fct>,<dbl>,<dbl>,<dbl>,<chr>
Not at all likely,,,,Was already vaccinated prior to joining the study
5,,3.0,3.0,Was already vaccinated prior to joining the study
3,,,,Was already vaccinated prior to joining the study
Not at all likely,,,2.0,Was already vaccinated prior to joining the study
Not at all likely,,,,Was already vaccinated prior to joining the study
Extremely likely,,,2.0,Was already vaccinated prior to joining the study
Extremely likely,,,3.0,Was already vaccinated prior to joining the study
Not at all likely,,3.0,3.0,Was already vaccinated prior to joining the study
4,,,,Was already vaccinated prior to joining the study
Extremely likely,,3.0,3.0,Was already vaccinated prior to joining the study


base_intention,intention_6,addressed_concerns_text_6
<fct>,<fct>,<chr>
Not at all likely,Not at all likely,I am fully vaccinated and boosted. I have no concerns about vaccines.


### Remove people who gave inconsistent responses on vaccination questions

In [10]:
infeasible <- subset %>% filter(vaccinated_prior == TRUE, !is.na(intention_6)) %>% 
    filter(doses_2 >= 2 | doses_4 >= 2 | doses_6 >= 2) %>%
    dplyr::select(mturk_code, group, doses_2, doses_4, doses_6, 
                  intention_2, intention_4, intention_6, vaccinated_prior) %>%
    arrange(desc(doses_2), desc(doses_4), desc(doses_6)) 

subset <- subset %>% filter(!(mturk_code %in% infeasible$mturk_code))
paste0("Dropped for infeasible dose counts: ", nrow(infeasible), " (", 
       round(nrow(infeasible)/nrow(vcf_data),3), ")")
paste0("Analysis Sample: ", nrow(subset))
infeasible

mturk_code,group,doses_2,doses_4,doses_6,intention_2,intention_4,intention_6,vaccinated_prior
<chr>,<fct>,<dbl>,<dbl>,<dbl>,<fct>,<fct>,<fct>,<lgl>
983103,Control,3,3.0,3.0,Vaccinated,Vaccinated,Vaccinated,True
830458,Control,3,1.0,3.0,Vaccinated,Vaccinated,Vaccinated,True
616612,Control,2,2.0,2.0,Vaccinated,Vaccinated,Vaccinated,True
699737,Control,2,2.0,2.0,Vaccinated,Vaccinated,Vaccinated,True
953099,Treatment,2,2.0,2.0,Vaccinated,Vaccinated,Vaccinated,True
311050,Treatment,2,,,Vaccinated,5,6,True
645517,Treatment,1,3.0,2.0,Vaccinated,Vaccinated,Vaccinated,True


### Remove people who didn't answer all trust questions

In [11]:
complete_case_sample <- subset[, c("fed_gov_trust", "local_gov_trust",
                                   "cdc_trust", "fda_trust", "mturk_code")]
complete_cases <- complete_case_sample[complete.cases(complete_case_sample),]
incomplete <- subset[!(subset$mturk_code %in% complete_cases$mturk_code), ]

subset <- subset %>% filter(!is.na(sp_confidence_6), attention2_6 == "Somewhat Agree",
                              mturk_code %in% complete_cases$mturk_code)
paste0("Dropped for incompleteness: ", nrow(incomplete), " (", 
       round(nrow(incomplete)/nrow(vcf_data),3), ")")
paste0("Analysis Sample: ", nrow(subset))

## Check for Differential Attrition

In [12]:
vcf_data$drop <- TRUE
vcf_data$drop[vcf_data$mturk_code %in% subset$mturk_code] <- FALSE

tbl <- vcf_data %>% group_by(group) %>% summarise(n = n(),
                  drop_cnt = sum(drop == TRUE),
                  drop_perc = (sum(drop == TRUE)/n) %>% round(3)) 

tbl
prop.test(x = c(tbl$drop_cnt[tbl$group == "Treatment"], tbl$drop_cnt[tbl$group == "Control"]), 
          n = c(tbl$n[tbl$group == "Treatment"], tbl$n[tbl$group == "Control"]))

group,n,drop_cnt,drop_perc
<fct>,<int>,<int>,<dbl>
Treatment,278,72,0.259
Control,229,64,0.279



	2-sample test for equality of proportions with continuity correction

data:  c(tbl$drop_cnt[tbl$group == "Treatment"], tbl$drop_cnt[tbl$group == "Control"]) out of c(tbl$n[tbl$group == "Treatment"], tbl$n[tbl$group == "Control"])
X-squared = 0.17418, df = 1, p-value = 0.6764
alternative hypothesis: two.sided
95 percent confidence interval:
 -0.10211745  0.06115109
sample estimates:
   prop 1    prop 2 
0.2589928 0.2794760 


In [13]:
vcf_data <- subset
nrow(vcf_data)
nrow(vcf_data)/507
vcf_data %>% group_by(group) %>% count()

group,n
<fct>,<int>
Treatment,206
Control,165


In [14]:
save(vcf_data, file = "~/Documents/vcf/data/moderation.RData")