/
minno.qualtrics.test.stiat.process.rmd
202 lines (171 loc) · 7.28 KB
/
minno.qualtrics.test.stiat.process.rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
```{r}
#Change to your own directory (where you keep the data file)
dir = 'C:\\Users\\yoav\\Documents\\bigfiles\\qualtrics.iat\\gal.elinor\\raw'
```
```{r}
# Copied from Elad Zlotnick (from here: https://github.com/eladzlot/minnojs-qiat/blob/master/qiat.R)
# Parse csv generated by minnoJS
#
# @param df a data frame
# @param id the column name holding the ids
# @param data the column name holding the data (we assume the data holds an array of unnested objects)
#
# @example
# qiat.parse.quoted(df, 'ResponseId', 'Q_47')
# qiat.parse(df, ResponseId, Q_47)
#
# @returns data frame with parsed data, rows with NA or '' are omitted.
qiat.parse.quoted = function(df, id, data){
# @TODO does not work?
# if (is.factor(df[id])) stop(paste0(data, ' column must not be a factor'))
filteredDF = df[df[,data]!='' & !is.na(df[,data]) ,]
# parse data -> list of data data frames
csvList = lapply(filteredDF[,data], function(str) tryCatch({
read.csv(text=str,stringsAsFactors = FALSE)
},
error = function(err){
message('woa there is a malformed csv here')
return(NA)
}
))
#browser()
# add id to each data DF
mask = which(sapply(csvList,nrow)>0)
dataPages = mapply(
function(id, df) cbind(id,df),
filteredDF[mask,id],
csvList[mask],
SIMPLIFY = FALSE
)
if (!length(dataPages)) { return(data.frame()) }
# concat pages
do.call(rbind,dataPages)
}
qiat.parse = function(df, id, data){
qiat.parse.quoted(df, deparse(substitute(id)), deparse(substitute(data)))
}
```
```{r}
###Read your file here.
#We're reading two files from our two examples.
df = read.csv(paste(dir, 'exampleSTIAT_June+29,+2020_21.00.csv',sep = '\\'))
```
```{r}
#Q2 is the question that displayed the IAT and all the IAT data was saved under Q2.
#If the text 'block' does not appear in Q2's response, then the IAT data were not saved in that question. Somehow, the IAT was not completed, or perhaps this is from an earlier stage before your IAT was setup correctly.
df <- df[which(grepl('block', df$Q2)),]
```
```{r}
#The escaping in the data sometimes uses "" which we need to change to " for the parsing.
#Note Q3 in my data was the columns that saved the IAT data for each participant. It might have a different name in your data.
library(kutils)
df$IAT <- mgsub(df$Q2, pattern = c('""'), replacement = c('"'))
```
```{r}
#Use Elad's parsing function
df2 <- qiat.parse.quoted(df = df, id='ResponseId', data = 'IAT')
```
```{r}
#Sanity check: see the names of the variables.
nrow(df2)
names(df2)
#If successful, these should be: "id" "block" "trial" "cond" "type" "cat" "stim" "resp" "err" "rt" "d" "fb" "bOrd"
```
```{r}
#Keep only the critical blocks (the first block was practice)
iat.raw <- df2[which(df2$block %in% c(2,3,4,5)),]
#Sanity check: need to include only "incompatible" and "compatible"
table(iat.raw$cond, exclude=NULL)
#Make sure the 'compatible' trials are indeed what you expected them to be.
table(iat.raw$cond, iat.raw$block, exclude=NULL)
```
```{r}
#The ST-IAT can have one of two orders: (a) one pairing condition is used in blocks 2 and 3 and one in block 4 and 5; (b) one pairing is used in blocks 2 and 4 and the other in blocks 3 and 5. So, we're going to create our own "part" variable, to separate each pairing condition to two parts, so that we could use the IAT's clean-IAT function easily.
library(data.table)
t.part <- unique(iat.raw[,c('id','cond','block')])
t.part <- setDF(setDT(t.part)[order(id, cond, block), part := 1:.N , by = c("id", "cond") ])
iat.raw <- merge(iat.raw,t.part,by=c('id','cond','block'))
```
```{r}
#See what conditions exist
table(iat.raw$cond, exclude=NULL)
#Set the blockName for the IAT scoring algorithm, based on pairing condition ('cond') and
iat.raw$blockName <- ifelse(iat.raw$cond == 'Unpleasant,Pleasant/Black people',
ifelse(iat.raw$part==1, "B3",
ifelse(iat.raw$part==2, "B4", NA)),
ifelse(iat.raw$cond == 'Black people/Unpleasant,Pleasant',
ifelse(iat.raw$part==1, "B6",
ifelse(iat.raw$part==2, "B7", NA)), NA))
#Make sure you only have B3,B4,B6, and B7 and no NAs.
table(iat.raw$blockName, exclude=NULL)
#Not supposed to have any NA at this point. So, if you see <NA> here, investigate why
#(e.g., perhaps some of your data has different cond values than expected, if you changed the cond after running a few participants)
table(iat.raw$cond[is.na(iat.raw$blockName)], exclude=NULL)
iat.raw <- iat.raw[which(!is.na(iat.raw$blockName)),]
```
```{r}
#Verify that both are numbers (probably, integer)
class(iat.raw$rt)
class(iat.raw$err)
```
```{r}
##Sanity check. For each participant, we expect a certain number of trials for in each block.
##We will indicate whether the participant has the expected number of trials within each of the critical block.
library(doBy)
nTrials.long <- summaryBy(formula = err ~ id + blockName, data=iat.raw, FUN = length)
library(reshape2)
nTrials <- dcast(nTrials.long, id ~ blockName, value.var = 'err.length')
#You can change those number if your IAT had different numbers
library(dplyr)
nTrials <- nTrials %>%
mutate(ntrials.ok = case_when(
nTrials$B3==48 & nTrials$B4==48 & nTrials$B6==48 & nTrials$B7==48 ~ TRUE,
TRUE ~ FALSE
))
#If not all are TRUE, then you some participants are missing data
table(nTrials$ntrials.ok, exclude=NULL)
```
```{r}
library(IAT)
iatscore <- cleanIAT(iat.raw, block_name="blockName",
trial_blocks = c("B3", "B4", "B6", "B7"),
session_id="id",
trial_latency="rt",
trial_error = "err",
v_error=2, v_extreme=2, v_std=1)
#v_error=2 means recode error latency to m+600, v_error=1 mean the standard (onset of stimuli until the correct response is pressed). v_extreme=2(current standard)=delete trial latencies < 400ms. v_std=1 (current standard), block SD is performed including error trials
#How many participants were excluded for problematic performance?
table(iatscore$SUBEXCL, exclude=NULL)
#Summary of those who were not excluded.
summary(iatscore$IAT[which(iatscore$SUBEXCL==0)])
```
```{r}
##Simple graph
library(ggplot2)
iatscore$dummy <- ''
box_plot <- ggplot(iatscore, aes(x = dummy, y = IAT))
box_plot +
geom_boxplot() +
geom_dotplot(binaxis = 'y',
dotsize = 0.4,
stackdir = 'center') +
theme_classic() +
stat_summary(geom = "point", fun.y = "mean", col = "black", size = 3, shape = 24, fill = "grey")
iatscore$dummy <- NULL
```
```{r}
#Get the bord variable
block.order1 <- df2[which(!is.na(df2$bOrd) & nchar(df2$bOrd)>0),c('id', 'bOrd')]
table(block.order1$bOrd, exclude=TRUE)
#Also get the block order condition from the raw data
block.order2 <- df2[which(df2$block==2),c('id','cond')]
block.order2 <- unique(block.order2)
names(block.order2) <- c('id', 'block2Cond')
#Should be TRUE if all is fine.
length(unique(block.order2$id)) == nrow(block.order2)
#Should be TRUE if all is fine. If not, perhaps some of your participants did not save all their trials.
nrow(block.order1) == nrow(block.order2)
block.order <- merge(block.order1, block.order2, by='id')
#Make sure we got the same block order condition using both methods
table(block.order$bOrd, block.order$block2Cond, exclude=NULL)
```