# Setup

## Imports

In [35]:
# Import Libraries
library(tidyverse)
library(data.table)
library(broom)
library(dtplyr)
library(lme4)
library(lmerTest)
library(ggbeeswarm)
library(cowplot)
library(tibble)

# Load helper functions
source("r_scripts/helper_functions.R")
source("https://raw.githubusercontent.com/hauselin/Rcode/master/cleanQuestionnaire.R")

## set universal plot size:
options(repr.plot.width=7, repr.plot.height=3.5)

# avoids scientific notation
options(scipen=999) 

## Load and manipulate data

In [36]:
# Read in data
df = as_tibble(read_csv("data/questionnaires.csv"), stringsAsFactors = FALSE)
# Select fMRI study subjects
fmri_sub <- grepl("FRMRI*", df[, 2][[1]])
df = df[fmri_sub, ]

# Rename Columns
names(df)[1:13] = c("demo_time",
                    "subject",
                    "demo_gender",
                    "demo_race",
                    "demo_age",
                    "demo_weight",
                    "demo_height",
                    "demo_yrs_canada",
                    "demo_diet",
                    "demo_diet_detail",
                    "demo_familiar",
                    "demo_previous_food",
                    "demo_previous_food_time")

# Just want id number
df <- df %>%
  mutate(
    subject = readr::parse_number(subject)
  )
  
# rename condition effects
cols = c(14,16,17,15,18,19,20,114,115,116,117)
colnames(df)[cols] <- c(paste0("ce_", 1:11))

# rename strategy questions
cols = c(21:28, 118:125)
colnames(df)[cols] <- c(paste0("s_", 1:16))

# rename Three Factor Eating Questionnaire
cols = c(29:46)
colnames(df)[cols] <- c(paste0("tfeq_", 1:18))

# rename rapid food screener
    # fruits and veg
cols = c(47:56)
colnames(df)[cols] <- c(paste0("rfs_fv_", 1:10))

    # fat
cols = c(57:73)
colnames(df)[cols] <- c(paste0("rfs_f_", 1:17))

# BIS 11
cols = c(74:103)
colnames(df)[cols] <- c(paste0("bis_", 1:30))

# Perceived Stress Scale
cols = c(104:113)
colnames(df)[cols] <- c(paste0("pss_", 1:10))

# reorder
df =df[,order(colnames(df))]

Parsed with column specification:
cols(
  .default = col_character(),
  `What time did you eat the food you indicated in the last question?` = [34mcol_time(format = "")[39m,
  `How difficult was it to make choices during trials when you were asked to RESPOND NATURALLY?` = [32mcol_double()[39m,
  `How difficult was it to make choices during trials when you were asked to DECREASE YOUR DESIRE?` = [32mcol_double()[39m,
  `How much did you think about the health of the foods when choosing NATURALLY?` = [32mcol_double()[39m,
  `How much did you try to decrease your desire for the foods when choosing NATURALLY?` = [32mcol_double()[39m,
  `How much did you think about the health of the foods when trying to DECREASE DESIRE?` = [32mcol_double()[39m,
  `How much did you try to reduce your craving for the foods when trying to DECREASE DESIRE?` = [32mcol_double()[39m,
  `How successful do you think you were in controlling your responses to the food when you were asked to DECREASE DESI

In [37]:
# want to look only at the subjects that were kept
good_subjects = c(101:104, 106:112, 114, 116, 118:121, 123:126, 129:137, 139:140, 142:148, 150, 152:158, 162:164)
# remove "bad" subjects
df_good = df %>%
    filter(subject %in% good_subjects)

In [38]:
# Make a copy of the dataframe
df_good_copy <- data.frame(df_good)

In [39]:
# Create vectors containing columns for each scale
bis_cols = NULL
pss_cols = NULL
rfs_cols = NULL
tfeq_cols = NULL

# Match item name to string subset for each scale
for (name in names(df_good_copy)){
    if (grepl("bis_", name, fixed = TRUE)){
        bis_cols <- append(bis_cols, name)
    } else if (grepl("pss_", name, fixed = TRUE)){
        pss_cols <- append(pss_cols, name)
    } else if (grepl("rfs_", name, fixed = TRUE)){
        rfs_cols <- append(rfs_cols, name)
    } else if (grepl("tfeq_", name, fixed = TRUE)){
        tfeq_cols <- append(tfeq_cols, name)
    }
}

In [40]:
# Recode values in bis columns
df_good_copy <- df_good_copy %>%
mutate_at(bis_cols, funs(recode(., "Rarely" = 1,
                                "Occasionally" = 2,
                                "Often" = 3,
                                "Almost always/Always" = 4,
                                .default = NaN)))

# Recode values in pss columns
df_good_copy <- df_good_copy %>%
mutate_at(pss_cols, funs(recode(., "Never" = 0,
                                "Almost Never" = 1,
                                "Sometimes" = 2,
                                "Fairly Often" = 3,
                                "Very Often" = 4,
                                .default = NaN)))

# Recode values in rfs columns
df_good_copy <- df_good_copy %>%
mutate_at(rfs_cols, funs(recode(., "0: Less than once/week" = 0,
                                "1: Once/week" = 1,
                                "2: 2-3 times/week" = 2,
                                "3: 4-6 times/week" = 3,
                                "4: One time/day" = 4,
                                "5: 2+ times/day" = 5,
                                .default = NaN)))

# Recode values in tfeq columns
df_good_copy <- df_good_copy %>%
mutate_at(tfeq_cols, funs(recode(., "Definitely false" = 1,
                                 "Mostly false" = 2,
                                 "Mostly true" = 3,
                                 "Definitely true" = 4,
                                 "Only at meal times" = 1,
                                 "Sometimes between meals" = 2,
                                 "Often between meals" = 3,
                                 "Almost always" = 4,
                                 "Almost never" = 1,
                                 "Seldom" = 2,
                                 "Usually" = 3,
                                 "Unlikely" = 1,
                                 "Slightly likely" = 2,
                                 "Moderately likely" = 3,
                                 "Very likely" = 4,
                                 "Never" = 1,
                                 "Rarely" = 2,
                                 "Sometimes" = 3,
                                 "At least once a week" = 4,
                                 "1" = 1, "2" = 1,
                                 "3" = 2, "4" = 2,
                                 "5" = 3, "6" = 3,
                                 "7" = 4, "8" = 4,
                                 .default = NaN)))

“NAs introduced by coercion”

In [41]:
# Pull data from each questionnaire
bis_data <- select(df_good_copy, subject, bis_cols) %>% tbl_df() %>% mutate_all(as.numeric) %>% tbl_dt()
pss_data <- select(df_good_copy, subject, pss_cols) %>% tbl_df() %>% mutate_all(as.numeric) %>% tbl_dt()
rfs_data <- select(df_good_copy, subject, rfs_cols) %>% tbl_df() %>% mutate_all(as.numeric) %>% tbl_dt()
tfeq_data <- select(df_good_copy, subject, tfeq_cols) %>% tbl_df() %>% mutate_all(as.numeric) %>% tbl_dt()

scales <- list()
scalesWideRaw <- list()

In [42]:
# Set indices for subscales

# bis
bis_cols_NS = c(2,5,6,7,29,30) # NonPlanning/Self-Control; bis_1,7,8,12,13,14
bis_cols_MM = c(10,12,13,16,19,24,26) # Motor/Motor; bis_2,3,4,17,19,22,25
bis_cols_AA = c(4,14,22,27,31) # Attentional/Attention; bis_5,9,11,20,28
bis_cols_AC = c(18,20,28) # Attentional/Cognitive Instability; bis_6,24,26
bis_cols_NC = c(3,8,11,21,23) # NonPlanning/Cognitive Complexity; bis_10,15,18,27,29
bis_cols_MP = c(9,15,17,25) # Motor/Perseverance; bis_16,21,23,30

# rfs
rfs_cols_FV = c(19:28) # Fruits and Vegetables; rfs_fv_1-10
rfs_cols_F = c(2:18) # Fat; rfs_f_1-17

# tfeq
tfeq_cols_UE = c(2,6,7,10,14,15,17,18,19) # Uncontrolled Eating Scale; tfeq_1,4,5,7,8,9,13,14,17
tfeq_cols_CR = c(4,5,8,9,11,12) # Cognitive Restraint Scale; tfeq_2,11,12,15,16,18
tfeq_cols_EE = c(3,13,16) # Emotional Eating Scale; tfeq_3,6,10

In [43]:
# Exclude subject data for scale if > 1 NA in at least one subscale
# If no subscales present, exclude if > 1 NA in that scale

bis_exclude = NULL
pss_exclude = NULL
rfs_exclude = NULL
tfeq_exclude = NULL

# bis data
for (row in 1:nrow(bis_data)){
    if (sum(is.na(bis_data[row, ..bis_cols_NS])) > 1 |
        sum(is.na(bis_data[row, ..bis_cols_MM])) > 1 |
        sum(is.na(bis_data[row, ..bis_cols_AA])) > 1 |
        sum(is.na(bis_data[row, ..bis_cols_AC])) > 1 |
        sum(is.na(bis_data[row, ..bis_cols_NC])) > 1 |
        sum(is.na(bis_data[row, ..bis_cols_MP])) > 1){
        bis_exclude <- append(bis_exclude, row)
    }
}
if (!is.null(bis_exclude)){
    bis_data <- bis_data[-bis_exclude]
}

# pss data
for (row in 1:nrow(pss_data)){
    if (sum(is.na(pss_data[row, ])) > 1){
        pss_exclude <- append(pss_exclude, row)
    }
}
if (!is.null(pss_exclude)){
    pss_data <- pss_data[-pss_exclude]
}

# rfs data
for (row in 1:nrow(rfs_data)){
    if (sum(is.na(rfs_data[row, ..rfs_cols_FV])) > 1 |
        sum(is.na(rfs_data[row, ..rfs_cols_F])) > 1){
        rfs_exclude <- append(rfs_exclude, row)
    }
}
if (!is.null(rfs_exclude)){
    rfs_data <- rfs_data[-rfs_exclude]
}

# tfeq data
for (row in 1:nrow(tfeq_data)){
    if (sum(is.na(tfeq_data[row, ..tfeq_cols_UE])) > 1 |
        sum(is.na(tfeq_data[row, ..tfeq_cols_CR])) > 1 |
        sum(is.na(tfeq_data[row, ..tfeq_cols_EE])) > 1){
        tfeq_exclude <- append(tfeq_exclude, row)
    }
}
if (!is.null(tfeq_exclude)){
    tfeq_data <- tfeq_data[-tfeq_exclude]
}

In [44]:
# Reverse-score items through hardcoded calculation

# bis items scored on scale of 1 to 4; 5 - 'score' gives reverse 
bis_reverse = c(2,3,5,6,8,14,23,25,29,30,31) # bis_1,7,8,9,10,12,13,15,20,29,30
bis_data[, bis_reverse] <- 5 - bis_data[, ..bis_reverse]

# pss items scored on scale of 0 to 4; 4 - 'score' gives reverse
pss_reverse = c(6,7,9,10) # pss_4,5,7,8
pss_data[, pss_reverse] <- 4 - pss_data[, ..pss_reverse]

In [45]:
# Calculate mean of subject scores per subscale

# initialize bis subscale means
bis_mean_NS = NULL # NonPlanning/Self-Control
bis_mean_MM = NULL # Motor/Motor
bis_mean_AA = NULL # Attentional/Attention
bis_mean_AC = NULL # Attentional/Cognitive Instability
bis_mean_NC = NULL # NonPlanning/Cognitive Complexity
bis_mean_MP = NULL # Motor/Perseverance

# create df of subject means for each bis subscale
bis_subject_means <- data.frame(ID = bis_data[, 1],
                                bis_mean_NS = rowMeans(bis_data[, ..bis_cols_NS], na.rm = TRUE),
                                bis_mean_MM = rowMeans(bis_data[, ..bis_cols_MM], na.rm = TRUE),
                                bis_mean_AA = rowMeans(bis_data[, ..bis_cols_AA], na.rm = TRUE),
                                bis_mean_AC = rowMeans(bis_data[, ..bis_cols_AC], na.rm = TRUE),
                                bis_mean_NC = rowMeans(bis_data[, ..bis_cols_NC], na.rm = TRUE),
                                bis_mean_MP = rowMeans(bis_data[, ..bis_cols_MP], na.rm = TRUE))

# initialize pss scale mean
pss_mean = NULL

# create df of subject means for pss
pss_subject_means <- data.frame(ID = pss_data[, 1],
                                pss_mean = rowMeans(pss_data[, -1], na.rm = TRUE)) # no subscales


# initialize rfs subscale means
rfs_mean_FV = NULL # Fruits and Vegetables
rfs_mean_F = NULL # Fat

# create df of subject means for each rfs subscale
rfs_subject_means <- data.frame(ID = rfs_data[, 1],
                                rfs_mean_FV = rowMeans(rfs_data[, ..rfs_cols_FV], na.rm = TRUE),
                                rfs_mean_F = rowMeans(rfs_data[, ..rfs_cols_F], na.rm = TRUE))

# initialize tfeq subscale
tfeq_mean_UE = NULL # Uncontrolled Eating Scale
tfeq_mean_CR = NULL # Cognitive Restraint Scale
tfeq_mean_EE = NULL # Emotional Eating Scale

# create df of subject means for each tfeq subscale
tfeq_subject_means <- data.frame(ID = tfeq_data[, 1],
                                 tfeq_mean_UE = rowMeans(tfeq_data[, ..tfeq_cols_UE], na.rm = TRUE),
                                 tfeq_mean_CR = rowMeans(tfeq_data[, ..tfeq_cols_CR], na.rm = TRUE),
                                 tfeq_mean_EE = rowMeans(tfeq_data[, ..tfeq_cols_EE], na.rm = TRUE))

In [46]:
# Adjust subject data with only 1 NA in at least one subscale
# Replace NA with subscale mean

# bis data
for (row in 1:nrow(bis_data)){
    if (sum(is.na(bis_data[row, ]))){
        locate_NA = which(is.na(bis_data[row, ]))
        for (i in locate_NA){
            if (i %in% bis_cols_NS){
                bis_data[row, i] <- bis_subject_means$bis_mean_NS[row]
            } else if (i %in% bis_cols_MM){
                bis_data[row, i] <- bis_subject_means$bis_mean_MM[row]
            } else if (i %in% bis_cols_AA){
                bis_data[row, i] <- bis_subject_means$bis_mean_AA[row]
            } else if (i %in% bis_cols_AC){
                bis_data[row, i] <- bis_subject_means$bis_mean_AC[row]
            } else if (i %in% bis_cols_NC){
                bis_data[row, i] <- bis_subject_means$bis_mean_NC[row]
            } else if (i %in% bis_cols_MP){
                bis_data[row, i] <- bis_subject_means$bis_mean_MP[row]
            }             
        }
    }
}

# pss data
for (row in 1:nrow(pss_data)){
    if (sum(is.na(pss_data[row, ]))) {
        locate_NA = which(is.na(pss_data[row, ]))
        pss_data[row, locate_NA] <- pss_subject_means$pss_mean[row]      
    }
}

# rfs data
for (row in 1:nrow(rfs_data)){
    if (sum(is.na(rfs_data[row, ]))){
        locate_NA = which(is.na(rfs_data[row, ]))
        for (i in locate_NA){
            if (i %in% rfs_cols_FV){
                rfs_data[row, i] <- rfs_subject_means$rfs_mean_FV[row]
            } else if (i %in% rfs_cols_F){
                rfs_data[row, i] <- rfs_subject_means$rfs_mean_F[row]
            }             
        }
    }
}

# tfeq data
for (row in 1:nrow(tfeq_data)){
    if (sum(is.na(tfeq_data[row, ]))){
        locate_NA = which(is.na(tfeq_data[row, ]))
        for (i in locate_NA){
            if (i %in% tfeq_cols_UE){
                tfeq_data[row, i] <- tfeq_subject_means$tfeq_mean_UE[row]
            } else if (i %in% tfeq_cols_CR){
                tfeq_data[row, i] <- tfeq_subject_means$tfeq_mean_CR[row]
            } else if (i %in% tfeq_cols_EE){
                tfeq_data[row, i] <- tfeq_subject_means$tfeq_mean_EE[row]
            }
        }
    }
}

## Questionnaire Key

---
[Coding Google Sheet](https://docs.google.com/spreadsheets/d/1M_YAg-ALHN3SOx1DGVZ9EWd8rX8-QJuatQehSt9CCms/edit?usp=sharing)

---

# Demographics

In [7]:
table(df_good$demo_gender)
tibble(age = mean(as.numeric(df_good$demo_age), na.rm = T), 
       ageSD = sd(as.numeric(df_good$demo_age), na.rm = T),
       ageMin = min(as.numeric(df_good$demo_age), na.rm = T),
       ageMax = max(as.numeric(df_good$demo_age), na.rm = T))


Female   Male 
    34     16 

age,ageSD,ageMin,ageMax
<dbl>,<dbl>,<dbl>,<dbl>
23.1,4.258245,18,38
