stiat/qualtrics/minno.qualtrics.test.stiat.process.rmd

```{r}
#Change to your own directory (where you keep the data file)
dir = 'C:\\Users\\yoav\\Documents\\bigfiles\\qualtrics.iat\\gal.elinor\\raw'
```

```{r}
# Copied from Elad Zlotnick (from here: https://github.com/eladzlot/minnojs-qiat/blob/master/qiat.R)
# Parse csv generated by minnoJS
#
# @param df a data frame
# @param id the column name holding the ids
# @param data the column name holding the data (we assume the data holds an array of unnested objects)
#
# @example
# qiat.parse.quoted(df, 'ResponseId', 'Q_47')
# qiat.parse(df, ResponseId, Q_47)
#
# @returns data frame with parsed data, rows with NA or '' are omitted.
qiat.parse.quoted = function(df, id, data){
  # @TODO does not work?
  # if (is.factor(df[id])) stop(paste0(data, ' column must not be a factor'))
  filteredDF = df[df[,data]!='' & !is.na(df[,data]) ,]
  
  # parse data -> list of data data frames
  csvList = lapply(filteredDF[,data], function(str) tryCatch({
    read.csv(text=str,stringsAsFactors = FALSE)
  },
  error = function(err){
    message('woa there is a malformed csv here')
    return(NA)
  }
  ))
  #browser()
  # add id to each data DF
  mask = which(sapply(csvList,nrow)>0)
  dataPages = mapply(
    function(id, df) cbind(id,df),
    filteredDF[mask,id],
    csvList[mask],
    SIMPLIFY = FALSE
  )
  
  if (!length(dataPages)) { return(data.frame()) }
  
  # concat pages
  do.call(rbind,dataPages)
}

qiat.parse = function(df, id, data){
  qiat.parse.quoted(df, deparse(substitute(id)), deparse(substitute(data)))
}
```

```{r}
###Read your file here.

#We're reading two files from our two examples.
df = read.csv(paste(dir, 'exampleSTIAT_June+29,+2020_21.00.csv',sep = '\\'))
```

```{r}

#Q2 is the question that displayed the IAT and all the IAT data was saved under Q2.
#If the text 'block' does not appear in Q2's response, then the IAT data were not saved in that question. Somehow, the IAT was not completed, or perhaps this is from an earlier stage before your IAT was setup correctly.
df <- df[which(grepl('block', df$Q2)),]
```


```{r}
#The escaping in the data sometimes uses "" which we need to change to " for the parsing.
#Note Q3 in my data was the columns that saved the IAT data for each participant. It might have a different name in your data. 
library(kutils)
df$IAT <- mgsub(df$Q2, pattern = c('""'), replacement = c('"'))
```

```{r}
#Use Elad's parsing function
df2 <- qiat.parse.quoted(df = df, id='ResponseId', data = 'IAT')
```

```{r}
#Sanity check: see the names of the variables.
nrow(df2)
names(df2)
#If successful, these should be:  "id"    "block" "trial" "cond"  "type"  "cat"   "stim"  "resp"  "err"   "rt"    "d"     "fb"    "bOrd" 
```


```{r}
#Keep only the critical blocks (the first block was practice)
iat.raw <- df2[which(df2$block %in% c(2,3,4,5)),]
#Sanity check: need to include only "incompatible" and "compatible"
table(iat.raw$cond, exclude=NULL)
#Make sure the 'compatible' trials are indeed what you expected them to be.
table(iat.raw$cond, iat.raw$block, exclude=NULL)
```

```{r}
#The ST-IAT can have one of two orders: (a) one pairing condition is used in blocks 2 and 3 and one in block 4 and 5; (b) one pairing is used in blocks 2 and 4 and the other in blocks 3 and 5. So, we're going to create our own "part" variable, to separate each pairing condition to two parts, so that we could use the IAT's clean-IAT function easily.
library(data.table)
t.part <- unique(iat.raw[,c('id','cond','block')])
t.part <- setDF(setDT(t.part)[order(id, cond, block), part := 1:.N , by = c("id", "cond") ])
iat.raw <- merge(iat.raw,t.part,by=c('id','cond','block'))
```


```{r}
#See what conditions exist
table(iat.raw$cond, exclude=NULL)
#Set the blockName for the IAT scoring algorithm, based on pairing condition ('cond') and
iat.raw$blockName <- ifelse(iat.raw$cond == 'Unpleasant,Pleasant/Black people', 
                            ifelse(iat.raw$part==1, "B3", 
                                   ifelse(iat.raw$part==2, "B4", NA)), 
                            ifelse(iat.raw$cond == 'Black people/Unpleasant,Pleasant', 
                                   ifelse(iat.raw$part==1, "B6", 
                                          ifelse(iat.raw$part==2, "B7", NA)), NA))
#Make sure you only have B3,B4,B6, and B7 and no NAs.
table(iat.raw$blockName, exclude=NULL)
#Not supposed to have any NA at this point. So, if you see <NA> here, investigate why 
#(e.g., perhaps some of your data has different cond values than expected, if you changed the cond after running a few participants)
table(iat.raw$cond[is.na(iat.raw$blockName)], exclude=NULL)
iat.raw <- iat.raw[which(!is.na(iat.raw$blockName)),]
```

```{r}
#Verify that both are numbers (probably, integer)
class(iat.raw$rt)
class(iat.raw$err)
```

```{r}
##Sanity check. For each participant, we expect a certain number of trials for in each block. 
##We will indicate whether the participant has the expected number of trials within each of the critical block.
library(doBy)
nTrials.long <- summaryBy(formula = err ~ id + blockName, data=iat.raw, FUN = length)
library(reshape2)
nTrials <- dcast(nTrials.long, id ~ blockName, value.var = 'err.length')
#You can change those number if your IAT had different numbers
library(dplyr)
nTrials <- nTrials %>% 
  mutate(ntrials.ok = case_when(
    nTrials$B3==48 & nTrials$B4==48 & nTrials$B6==48 & nTrials$B7==48 ~ TRUE,
    TRUE ~ FALSE
  ))
#If not all are TRUE, then you some participants are missing data
table(nTrials$ntrials.ok, exclude=NULL)
```

```{r}
library(IAT)
iatscore <- cleanIAT(iat.raw, block_name="blockName", 
                     trial_blocks = c("B3", "B4", "B6", "B7"), 
                     session_id="id", 
                     trial_latency="rt",
                     trial_error = "err", 
                     v_error=2, v_extreme=2, v_std=1) 
#v_error=2 means recode error latency to m+600, v_error=1 mean the standard (onset of stimuli until the correct response is pressed). v_extreme=2(current standard)=delete trial latencies < 400ms. v_std=1 (current standard), block SD is performed including error trials 

#How many participants were excluded for problematic performance?
table(iatscore$SUBEXCL, exclude=NULL)
#Summary of those who were not excluded.
summary(iatscore$IAT[which(iatscore$SUBEXCL==0)])
```

```{r}
##Simple graph
library(ggplot2)
iatscore$dummy <- ''
box_plot <- ggplot(iatscore, aes(x = dummy, y = IAT))
box_plot +
  geom_boxplot() +
  geom_dotplot(binaxis = 'y',
               dotsize = 0.4,
               stackdir = 'center') +
  theme_classic() + 
  stat_summary(geom = "point", fun.y = "mean", col = "black", size = 3, shape = 24, fill = "grey")
iatscore$dummy <- NULL
```

```{r}
#Get the bord variable
block.order1 <- df2[which(!is.na(df2$bOrd) & nchar(df2$bOrd)>0),c('id', 'bOrd')]
table(block.order1$bOrd, exclude=TRUE)

#Also get the block order condition from the raw data
block.order2 <- df2[which(df2$block==2),c('id','cond')]

block.order2 <- unique(block.order2)
names(block.order2) <- c('id', 'block2Cond')

#Should be TRUE if all is fine.
length(unique(block.order2$id)) == nrow(block.order2)

#Should be TRUE if all is fine. If not, perhaps some of your participants did not save all their trials.
nrow(block.order1) == nrow(block.order2)

block.order <- merge(block.order1, block.order2, by='id')
#Make sure we got the same block order condition using both methods
table(block.order$bOrd, block.order$block2Cond, exclude=NULL)
```