# Setup

## Imports

In [10]:
# Import Libraries
library(tidyverse)
library(data.table)
library(broom)
library(dtplyr)
library(lme4)
library(lmerTest)
library(ggbeeswarm)
library(cowplot)
library(tibble)

# Load helper functions
source("r_scripts/helper_functions.R")
source("https://raw.githubusercontent.com/hauselin/Rcode/master/cleanQuestionnaire.R")

## set universal plot size:
options(repr.plot.width=7, repr.plot.height=3.5)

# avoids scientific notation
options(scipen=999) 

## Load and manipulate data

In [11]:
# Read in data
df = as_tibble(read_csv("data/questionnaires.csv"), stringsAsFactors = FALSE)
# Select fMRI study subjects
fmri_sub <- grepl("FRMRI*", df[, 2][[1]])
df = df[fmri_sub, ]

# Rename Columns
names(df)[1:13] = c("demo_time",
                    "subject",
                    "demo_gender",
                    "demo_race",
                    "demo_age",
                    "demo_weight",
                    "demo_height",
                    "demo_yrs_canada",
                    "demo_diet",
                    "demo_diet_detail",
                    "demo_familiar",
                    "demo_previous_food",
                    "demo_previous_food_time")

# Just want id number
df <- df %>%
  mutate(
    subject = readr::parse_number(subject)
  )
  
# rename condition effects
cols = c(14,16,17,15,18,19,20,114,115,116,117)
colnames(df)[cols] <- c(paste0("ce_", 1:11))

# rename strategy questions
cols = c(21:28, 118:125)
colnames(df)[cols] <- c(paste0("s_", 1:16))

# rename Three Factor Eating Questionnaire
cols = c(29:46)
colnames(df)[cols] <- c(paste0("tfeq_", 1:18))

# rename rapid food screener
    # fruits and veg
cols = c(47:56)
colnames(df)[cols] <- c(paste0("rfs_fv_", 1:10))

    # fat
cols = c(57:73)
colnames(df)[cols] <- c(paste0("rfs_f_", 1:17))

# BIS 11
cols = c(74:103)
colnames(df)[cols] <- c(paste0("bis_", 1:30))

# Perceived Stress Scale
cols = c(104:113)
colnames(df)[cols] <- c(paste0("pss_", 1:10))

# reorder
df =df[,order(colnames(df))]

Parsed with column specification:
cols(
  .default = col_character(),
  `What time did you eat the food you indicated in the last question?` = [34mcol_time(format = "")[39m,
  `How difficult was it to make choices during trials when you were asked to RESPOND NATURALLY?` = [32mcol_double()[39m,
  `How difficult was it to make choices during trials when you were asked to DECREASE YOUR DESIRE?` = [32mcol_double()[39m,
  `How much did you think about the health of the foods when choosing NATURALLY?` = [32mcol_double()[39m,
  `How much did you try to decrease your desire for the foods when choosing NATURALLY?` = [32mcol_double()[39m,
  `How much did you think about the health of the foods when trying to DECREASE DESIRE?` = [32mcol_double()[39m,
  `How much did you try to reduce your craving for the foods when trying to DECREASE DESIRE?` = [32mcol_double()[39m,
  `How successful do you think you were in controlling your responses to the food when you were asked to DECREASE DESI

In [12]:
# want to look only at the subjects that were kept
good_subjects = c(101:104, 106:112, 114, 116, 118:121, 123:126, 129:137, 139:140, 142:148, 150, 152:158, 162:164)
# remove "bad" subjects
df_good = df %>%
    filter(subject %in% good_subjects)

In [13]:
# Make a copy of the dataframe
df_good_copy <- data.frame(df_good)

In [14]:
# Create vectors containing columns for each scale
bis_cols = NULL
pss_cols = NULL
rfs_cols = NULL
tfeq_cols = NULL

# Match item name to string subset for each scale
for (name in names(df_good_copy)){
    if (grepl("bis_", name, fixed = TRUE)){
        bis_cols <- append(bis_cols, name)
    } else if (grepl("pss_", name, fixed = TRUE)){
        pss_cols <- append(pss_cols, name)
    } else if (grepl("rfs_", name, fixed = TRUE)){
        rfs_cols <- append(rfs_cols, name)
    } else if (grepl("tfeq_", name, fixed = TRUE)){
        tfeq_cols <- append(tfeq_cols, name)
    }
}

In [15]:
# Recode values in bis columns
df_good_copy <- df_good_copy %>%
mutate_at(bis_cols, funs(recode(., "Rarely" = 1,
                                "Occasionally" = 2,
                                "Often" = 3,
                                "Almost always/Always" = 4,
                                .default = NaN)))

# Recode values in pss columns
df_good_copy <- df_good_copy %>%
mutate_at(pss_cols, funs(recode(., "Never" = 0,
                                "Almost Never" = 1,
                                "Sometimes" = 2,
                                "Fairly Often" = 3,
                                "Very Often" = 4,
                                .default = NaN)))

# Recode values in rfs columns
df_good_copy <- df_good_copy %>%
mutate_at(rfs_cols, funs(recode(., "0: Less than once/week" = 0,
                                "1: Once/week" = 1,
                                "2: 2-3 times/week" = 2,
                                "3: 4-6 times/week" = 3,
                                "4: One time/day" = 4,
                                "5: 2+ times/day" = 5,
                                .default = NaN)))

# Recode values in tfeq columns
df_good_copy <- df_good_copy %>%
mutate_at(tfeq_cols, funs(recode(., "Definitely false" = 1,
                                 "Mostly false" = 2,
                                 "Mostly true" = 3,
                                 "Definitely true" = 4,
                                 "Only at meal times" = 1,
                                 "Sometimes between meals" = 2,
                                 "Often between meals" = 3,
                                 "Almost always" = 4,
                                 "Almost never" = 1,
                                 "Seldom" = 2,
                                 "Usually" = 3,
                                 "Unlikely" = 1,
                                 "Slightly likely" = 2,
                                 "Moderately likely" = 3,
                                 "Very likely" = 4,
                                 "Never" = 1,
                                 "Rarely" = 2,
                                 "Sometimes" = 3,
                                 "At least once a week" = 4,
                                 "1" = 1, "2" = 1,
                                 "3" = 2, "4" = 2,
                                 "5" = 3, "6" = 3,
                                 "7" = 4, "8" = 4,
                                 .default = NaN)))

“funs() is soft deprecated as of dplyr 0.8.0
please use list() instead

# Before:
funs(name = f(.)

# After: 
list(name = ~f(.))
“NAs introduced by coercion”

## Questionnaire Key

---
[Coding Google Sheet](https://docs.google.com/spreadsheets/d/1M_YAg-ALHN3SOx1DGVZ9EWd8rX8-QJuatQehSt9CCms/edit?usp=sharing)

---

# Demographics

In [7]:
table(df_good$demo_gender)
tibble(age = mean(as.numeric(df_good$demo_age), na.rm = T), 
       ageSD = sd(as.numeric(df_good$demo_age), na.rm = T),
       ageMin = min(as.numeric(df_good$demo_age), na.rm = T),
       ageMax = max(as.numeric(df_good$demo_age), na.rm = T))


Female   Male 
    34     16 

age,ageSD,ageMin,ageMax
<dbl>,<dbl>,<dbl>,<dbl>
23.1,4.258245,18,38
