/
Analysis_deidentified_2023_08_10.Rmd
949 lines (715 loc) · 48.9 KB
/
Analysis_deidentified_2023_08_10.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
---
title: "EA Channels Combined Analysis"
author: "Anonymous (for peer review)"
date: "10 Aug 2023"
output:
html_document:
theme: spacelab
toc: yes
toc_depth: 3
smart: no
fig_width: 7
fig_height: 5
fig_caption: yes
pdf_document:
toc: yes
toc_depth: '3'
editor_options:
chunk_output_type: console
fontsize: 12pt
---
For all studies reported here, we averaged over 2-second long windows. (ML models are fit over 5-second long windows).
```{r preamble, echo=FALSE, warning=FALSE, message=FALSE, eval=TRUE}
library(tidyverse)
library(ggplot2)
library(lme4) # for lmer and associated functions
library(pander) # for pandoc.table
### PATHS
train_ids = read.csv("data/models/train_ids.csv")
dev_ids = read.csv("data/models/dev_ids.csv")
test_ids = read.csv("data/models/test_ids.csv")
heb_ids = c("ID53_vid2", "ID101_vid2", "ID107_vid4", "ID112_vid4", "ID114_vid4", "ID117_vid4", "ID119_vid5", "ID120_vid8", "ID127_vid4")
NUM_DIGITS_FOR_TEXT_REPORTING = 3
my_theme <- theme_bw() + theme(legend.position="top",
strip.background = element_rect(fill="#FFFFFF"),
strip.text = element_text(size=12),
axis.text = element_text(size=12),
axis.title.x = element_text(size=14, vjust=-0.2),
axis.title.y = element_text(size=14, vjust=0.8),
legend.text = element_text(size=12),
title = element_text(size=18, vjust=1),
panel.grid = element_blank())
my_color_palette = c("#8c1515", "#d2c295", "#175e54", "#544948", "#8d3c1e")
z_to_p <- function(x) {return(2*(1-pnorm(x)))}
format_table <- function(x, digits=4) {return(format(x, digits=digits))}
format_table_2 <- function(x, y, digits=4) { return(paste(x, " (", format_table(x/y*100,digits), "%)", sep=""))}
format_p_value = function(p, digits=3) {
if(p>=0.05) { pString = paste( "p = ", format_table(p, digits=digits) , sep="") }
else if(p>=0.01) { pString = paste( "<b>p = ", format_table(p, digits=digits) , "*</b>", sep="") }
else if(p>=0.001) { pString = paste( "<b>p = ", format_table(p, digits=digits) , "**</b>", sep="") }
else { pString = "<b>p < 0.001***</b>" }
return(pString)
}
return_lmer_for_table <- function(lmerObject, coeffName, digits=3) {
coeffTableRow = summary(lmerObject)$coeff[coeffName,]
b = coeffTableRow['Estimate']
t = coeffTableRow['t value']
p = coeffTableRow['Pr(>|t|)']
pString = format_p_value(p)
return( paste("b = ", format_table(b, digits=digits),
", <br>t = ", format_table(t, digits=digits),
" <br>(", pString, ")", sep="") )
}
report_lmer_in_text = function(summary_obj, variable, digits=3, reversed=FALSE) {
coeff_row = summary_obj$coefficients[variable,]
estimate = unlist(coeff_row[1])
se = coeff_row[2]
t = coeff_row[3]
pval = 2*(1-pnorm(abs(t)))
ci = 1.96*se
if(!reversed) {
text_to_return = paste( "b = ", format(estimate, digits=digits),
" [", format(estimate-ci, digits=digits), ", ",
format(estimate+ci, digits=digits), "], ",
"t = ", format(t, digits=digits),
", p = ", format(pval, digits=digits), sep="" )
} else {
## reversed is if the coefficient is A > B, but if you want to consider B > A instead.
text_to_return = paste( "b = ", format(-estimate, digits=digits),
" [", format(-(estimate)-ci, digits=digits), ", ",
format(-(estimate)+ci, digits=digits), "], ",
"t = ", format(abs(t), digits=digits),
", p = ", format(pval, digits=digits), sep="" )
}
return(text_to_return)
}
fisher_transform_r_to_z = function(rho) { return( .5 * (log(1+rho) - log(1-rho))) }
```
```{r read-in-data, message=FALSE, echo=FALSE, eval=TRUE}
# _dAllmelt: each row contains the rating for each 2s window that participant made for video
# (roughly, N participants x 8 videos x )
#
d_study1a_dAllmelt = read.csv('data/study1a_dAllmelt.csv', header=TRUE)
d_study1a_dAllsum = read.csv('data/study1a_dAllsum.csv', header=TRUE)
d_study1a_dAllTarget = read.csv('data/study1a_dAllTarget.csv', header=TRUE)
d_study1a_long = read.csv('data/study1a_long.csv', header=TRUE)
d_study1b_dAllmelt = read.csv('data/study1b_dAllmelt.csv', header=TRUE)
d_study1b_dAllsum = read.csv('data/study1b_dAllsum.csv', header=TRUE)
d_study1b_dAllTarget = read.csv('data/study1b_dAllTarget.csv', header=TRUE)
d_study1b_long = read.csv('data/study1b_long.csv', header=TRUE)
# Study 2
d_studyHeb_dAllmelt = read.csv('data/studyHeb_dAllmelt.csv', header=TRUE)
d_studyHeb_dAllsum = read.csv('data/studyHeb_dAllsum.csv', header=TRUE)
d_studyHeb_dAllTarget = read.csv('data/studyHeb_dAllTarget.csv', header=TRUE)
d_studyHeb_long = read.csv('data/studyHeb_long.csv', header=TRUE)
# Study 3
d_studyAmHeb_dAllmelt = read.csv('data/studyAmHeb_dAllmelt.csv', header=TRUE)
d_studyAmHeb_dAllsum = read.csv('data/studyAmHeb_dAllsum.csv', header=TRUE)
d_studyAmHeb_dAllTarget = read.csv('data/studyAmHeb_dAllTarget.csv', header=TRUE)
d_studyAmHeb_long = read.csv('data/studyAmHeb_long.csv', header=TRUE)
# machine learning data
ids_to_read_in = bind_rows(train_ids, dev_ids, test_ids)
modelRatingsAll = data.frame()
for(videoIterator in c(1:nrow(ids_to_read_in))) { # loop over ids_to_read_in
# if(videoIterator %% 10 == 0) {
# cat(paste("Currently on video", videoIterator, "out of", nrow(ids_to_read_in), "...\n"))
# }
thisVideoID = ids_to_read_in$vid_id[videoIterator]
resultsFile = read.csv(paste("data/models/target/", thisVideoID, ".csv", sep=""), header=T) %>%
mutate(videoID = thisVideoID,
time = row_number()*5,
train = (videoID %in% train_ids$vid_id),
valid = (videoID %in% dev_ids$vid_id),
test = (videoID %in% test_ids$vid_id))
modelRatingsAll = bind_rows(modelRatingsAll, resultsFile)
}
```
```{r combine-all-studies-prep, message=F, echo=F, eval=T}
# (very-) long-form, one entry per participant and per video
d_studies_byVideoAndParticipantSummary <-
bind_rows(
# Study 1a
full_join(d_study1a_dAllmelt,
(d_study1a_dAllTarget %>% rename(targetRating = rating) %>%
mutate(study="1a")), by=c("videoID", "time")),
# Study 1b
full_join(d_study1b_dAllmelt,
(d_study1b_dAllTarget %>% rename(targetRating = rating) %>%
mutate(study="1b")), by=c("videoID", "time")),
# Study 2
full_join((d_studyHeb_dAllmelt %>% mutate(participantID = as.character(participantID))),
(d_studyHeb_dAllTarget %>% rename(targetRating = rating) %>%
mutate(study="Heb")), by=c("videoID", "time")),
# Study 3
full_join(d_studyAmHeb_dAllmelt,
(d_studyAmHeb_dAllTarget %>% rename(targetRating = rating) %>%
mutate(study="Am-Heb")), by=c("videoID", "time"))
) %>%
group_by(study, videoID, condition, participantID) %>%
summarize(indivCorrelation = cor(rating, targetRating)) %>% ungroup()
# long-form, aggregated over participants, so one entry per video
d_studies_byVideoSummary <-
bind_rows(
# Study 1a
full_join(d_study1a_dAllmelt, (d_study1a_dAllTarget %>% rename(targetRating = rating) %>% mutate(study="1a")), by=c("videoID", "time")),
# Study 1b
full_join(d_study1b_dAllmelt, (d_study1b_dAllTarget %>% rename(targetRating = rating) %>% mutate(study="1b")), by=c("videoID", "time")),
# Study 2
full_join((d_studyHeb_dAllmelt %>% mutate(participantID = as.character(participantID))),
(d_studyHeb_dAllTarget %>% rename(targetRating = rating) %>% mutate(study="Heb")), by=c("videoID", "time")),
# Study 3
full_join(d_studyAmHeb_dAllmelt, (d_studyAmHeb_dAllTarget %>% rename(targetRating = rating) %>% mutate(study="Am-Heb")), by=c("videoID", "time"))
) %>%
#mutate(modelType = "Human") %>%
group_by(study, videoID, condition, participantID) %>%
summarize(indivCorrelation = cor(rating, targetRating, use="complete")) %>%
group_by(study, videoID, condition) %>%
summarize(correlation = mean(indivCorrelation),
sdCorrelation = sd(indivCorrelation),
N = n()) %>%
mutate(se = sdCorrelation/sqrt(N),
ci = qt(0.975, N) * se,
ci_upper = correlation + ci,
ci_lower = correlation - ci)
# long-form, aggregated over participants and then aggregated over videos, so one entry per study/condition. i.e., condition means, for plotting.
d_studies_allSummary <- d_studies_byVideoSummary %>%
group_by(study, condition) %>%
summarize(meanCorr = mean(correlation),
sdCorrelation = sd(correlation),
N = n()) %>%
mutate(se = sdCorrelation/sqrt(N),
ci_upper = meanCorr + qt(0.975, N) * se,
ci_lower = meanCorr - qt(0.975, N) * se)
```
## Study 1a Channels 1
See recruitment statistics from Prep file
#### Text in paper:
> In Study 1a, we had 1,108 American participants watch videos sampled from 193 videos from the Stanford Emotional Narratives Dataset ...
```{r study1a-analysis, echo=F}
s1a_main_lmer_summary =
d_studies_byVideoAndParticipantSummary %>% filter(study %in% c("1a")) %>%
mutate(condition = factor(condition, levels=c("both", "audioOnly", "videoOnly"),
labels=c("Both", "Audio-Only", "Video-Only"))) %>%
lmer(indivCorrelation ~ condition + (1|participantID) + (1|videoID), data=.) %>%
summary()
s1a_main_lmer_audioAsBase_summary =
d_studies_byVideoAndParticipantSummary %>% filter(study %in% c("1a")) %>%
lmer(indivCorrelation ~ condition + (1|participantID) + (1|videoID), data=.) %>%
summary()
```
Means and SDs of empathic accuracy:
- Mean in Audio-Video condition is:
`r format((d_studies_allSummary %>% filter(study %in% c("1a"), condition %in% c("both")))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`,
(SD across videos =
`r format((d_studies_allSummary %>% filter(study %in% c("1a"), condition %in% c("both")))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
)
- Mean in AudioOnly condition is:
`r format((d_studies_allSummary %>% filter(study %in% c("1a"), condition %in% c("audioOnly")))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
(SD =
`r format((d_studies_allSummary %>% filter(study %in% c("1a"), condition %in% c("audioOnly")))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
)
- Mean in VideoOnly condition is:
`r format((d_studies_allSummary %>% filter(study %in% c("1a"), condition %in% c("videoOnly")))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
(SD =
`r format((d_studies_allSummary %>% filter(study %in% c("1a"), condition %in% c("videoOnly")))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
)
#### Mixed-effects linear model stats:
> `lmer(accuracy ~ condition + (1|videoID) + (1|participantID))`
- Audio Only > Both, `r report_lmer_in_text(s1a_main_lmer_summary, "conditionAudio-Only")`
- Video Only > Both, `r report_lmer_in_text(s1a_main_lmer_summary, "conditionVideo-Only")`
- Video Only > Audio Only, `r report_lmer_in_text(s1a_main_lmer_audioAsBase_summary, "conditionvideoOnly")`
#### Text in paper:
> We modelled the results with a mixed-effects linear model (in R with `lme4`) with `condition` as a fixed effect and with random effects by `participant`s and `video`s.
>
> Observers' empathic accuracies were highest in the Audio-Video
(M$_\text{Audio-Video}$ = `r format((d_studies_allSummary %>% filter(study %in% c("1a"), condition %in% c("both")))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`,
SD$_\text{Audio-Video}$ = `r format((d_studies_allSummary %>% filter(study %in% c("1a"), condition %in% c("both")))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
) and Audio-Only
(M$_\text{Audio-Only}$ = `r format((d_studies_allSummary %>% filter(study %in% c("1a"), condition %in% c("audioOnly")))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`,
SD$_\text{Audio-Only}$ = `r format((d_studies_allSummary %>% filter(study %in% c("1a"), condition %in% c("audioOnly")))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
) conditions, with no significant difference between these conditions (p = .18).
>
> These were both significantly higher than the Video-Only condition
(M$_\text{Video-Only}$ = `r format((d_studies_allSummary %>% filter(study %in% c("1a"), condition %in% c("videoOnly")))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
, SD$_\text{Video-Only}$ = `r format((d_studies_allSummary %>% filter(study %in% c("1a"), condition %in% c("videoOnly")))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
; Audio-Video > Video-Only, `r report_lmer_in_text(s1a_main_lmer_summary, "conditionAudio-Only", reversed=TRUE)`;
Audio-Only > Video-Only, `r report_lmer_in_text(s1a_main_lmer_audioAsBase_summary, "conditionvideoOnly", reversed=TRUE)`).
## Study 1b Channels replication with subset of 32 videos
See recruitment statistics from Prep file
#### Text in paper:
> We replicated this pattern of results in Study 1b, as an internal replication. Study 1b (N=521) was identical except that the pool of videos was a smaller subset of 32 videos of the original 193, giving us a larger number of ratings per video (approx. 32 ratings/video/condition, compared to approx. 8 ratings/video/condition in Study 1a).
```{r study1b-analysis, message=FALSE, echo=FALSE, eval=TRUE}
s1b_main_lmer_summary =
d_studies_byVideoAndParticipantSummary %>% filter(study %in% c("1b")) %>%
mutate(condition = factor(condition, levels=c("both", "audioOnly", "videoOnly"),
labels=c("Both", "Audio-Only", "Video-Only"))) %>%
lmer(indivCorrelation ~ condition + (1|participantID) + (1|videoID), data=.) %>%
summary()
s1b_main_lmer_audioAsBase_summary =
d_studies_byVideoAndParticipantSummary %>% filter(study %in% c("1b")) %>%
lmer(indivCorrelation ~ condition + (1|participantID) + (1|videoID), data=.) %>%
summary()
```
Means and SDs of empathic accuracy:
- Mean in Audio-Video condition is:
`r format((d_studies_allSummary %>% filter(study %in% c("1b"), condition %in% c("both")))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`,
(SD =
`r format((d_studies_allSummary %>% filter(study %in% c("1b"), condition %in% c("both")))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
)
- Mean in AudioOnly condition is:
`r format((d_studies_allSummary %>% filter(study %in% c("1b"), condition %in% c("audioOnly")))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
(SD =
`r format((d_studies_allSummary %>% filter(study %in% c("1b"), condition %in% c("audioOnly")))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
)
- Mean in VideoOnly condition is:
`r format((d_studies_allSummary %>% filter(study %in% c("1b"), condition %in% c("videoOnly")))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
(SD =
`r format((d_studies_allSummary %>% filter(study %in% c("1b"), condition %in% c("videoOnly")))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
)
#### Mixed-effects linear model stats:
> `lmer(accuracy ~ condition + (1|videoID) + (1|participantID))`
- Audio Only > Both, `r report_lmer_in_text(s1b_main_lmer_summary, "conditionAudio-Only")`
- Video Only > Both, `r report_lmer_in_text(s1b_main_lmer_summary, "conditionVideo-Only")`
- Video Only > Audio Only, `r report_lmer_in_text(s1b_main_lmer_audioAsBase_summary, "conditionvideoOnly")`
#### Text in paper:
> In Study 1b, we also found that performance was the highest in the Audio-Video
(M$_\text{Audio-Video}$ = `r format((d_studies_allSummary %>% filter(study %in% c("1b"), condition %in% c("both")))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`,
SD$_\text{Audio-Video}$ = `r format((d_studies_allSummary %>% filter(study %in% c("1b"), condition %in% c("both")))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
) and Audio-Only
(M$_\text{Audio-Only}$ = `r format((d_studies_allSummary %>% filter(study %in% c("1b"), condition %in% c("audioOnly")))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`,
SD$_\text{Audio-Only}$ = `r format((d_studies_allSummary %>% filter(study %in% c("1b"), condition %in% c("audioOnly")))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
) conditions. These were not significantly different from each other (p = .23), and they were both significantly greater than the Video-Only condition
(M$_\text{Video-Only}$ = `r format((d_studies_allSummary %>% filter(study %in% c("1b"), condition %in% c("videoOnly")))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
, SD$_\text{Video-Only}$ = `r format((d_studies_allSummary %>% filter(study %in% c("1b"), condition %in% c("videoOnly")))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
; Audio-Video > Video-Only, `r report_lmer_in_text(s1b_main_lmer_summary, "conditionVideo-Only", reversed=TRUE)`
; Audio-Only > Video-Only, `r report_lmer_in_text(s1b_main_lmer_audioAsBase_summary, "conditionvideoOnly", reversed=TRUE)`.
## Study 1a+1b
```{r cor-study1ab, echo=F, eval=T}
# to compare corr at video level, use aggregated over participants
s1b_videoIDs = levels(factor((d_studies_byVideoSummary %>% filter(study %in% c("1b")))$videoID))
```
- Correlation between Study 1a and Study 1b in the Both condition is
`r format(cor(
(d_studies_byVideoSummary %>% filter(study %in% c("1a"), condition %in% c("both"), videoID %in% s1b_videoIDs) %>% arrange(videoID))$correlation,
(d_studies_byVideoSummary %>% filter(study %in% c("1b"), condition %in% c("both")) %>% arrange(videoID))$correlation
),
digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
- Correlation between Study 1a and Study 1b in the Audio-Only condition is
`r format(cor(
(d_studies_byVideoSummary %>% filter(study %in% c("1a"), condition %in% c("audioOnly"), videoID %in% s1b_videoIDs) %>% arrange(videoID))$correlation,
(d_studies_byVideoSummary %>% filter(study %in% c("1b"), condition %in% c("audioOnly")) %>% arrange(videoID))$correlation
),
digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
- Correlation between Study 1a and Study 1b in the Video-Only condition is
`r format(cor(
(d_studies_byVideoSummary %>% filter(study %in% c("1a"), condition %in% c("videoOnly"), videoID %in% s1b_videoIDs) %>% arrange(videoID))$correlation,
(d_studies_byVideoSummary %>% filter(study %in% c("1b"), condition %in% c("videoOnly")) %>% arrange(videoID))$correlation
),
digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
#### Text in paper:
> Across Studies 1a and 1b, average empathic accuracy for each video was highly correlated
(r = `r format(
cor(
(d_studies_byVideoSummary %>% filter(study %in% c("1a"), condition %in% c("both"), videoID %in% s1b_videoIDs) %>% arrange(videoID))$correlation,
(d_studies_byVideoSummary %>% filter(study %in% c("1b"), condition %in% c("both")) %>% arrange(videoID))$correlation
),
digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
in the Audio-Video condition,
r = `r format(
cor(
(d_studies_byVideoSummary %>% filter(study %in% c("1a"), condition %in% c("audioOnly"), videoID %in% s1b_videoIDs) %>% arrange(videoID))$correlation,
(d_studies_byVideoSummary %>% filter(study %in% c("1b"), condition %in% c("audioOnly")) %>% arrange(videoID))$correlation
),
digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
in the Audio-Only condition, and
r = `r format(
cor(
(d_studies_byVideoSummary %>% filter(study %in% c("1a"), condition %in% c("videoOnly"), videoID %in% s1b_videoIDs) %>% arrange(videoID))$correlation,
(d_studies_byVideoSummary %>% filter(study %in% c("1b"), condition %in% c("videoOnly")) %>% arrange(videoID))$correlation
),
digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
in the Video-Only condition), showing evidence for strong video-specific inter-rater reliability—that is, the same video is rated similarly accurately by two groups of observers. This suggests that while individual videos vary across many attributes (i.e., “item-level effects”), and there are individual differences across participants on how accurately they perceive emotions in the videos (i.e., “participant-level effects”), we can be confident that with these sample sizes, our results are reliable after averaging across videos and participants, and that they are not driven by specific videos.
```{r Study1ab-plot, fig.width=6, fig.height=5.5, echo=F}
# For Paper: plot only studies 1a and 1b
plot1a_df1 = d_studies_allSummary %>% filter(study %in% c("1a", "1b")) %>%
mutate(condition = factor(condition, levels=c("both", "audioOnly", "videoOnly"),
labels=c("Audio-Video", "Audio-Only", "Video-Only")))
plot1a_df2 = d_studies_byVideoSummary %>% filter(study %in% c("1a", "1b")) %>%
mutate(condition = factor(condition, levels=c("both", "audioOnly", "videoOnly"),
labels=c("Audio-Video", "Audio-Only", "Video-Only")))
ggplot(plot1a_df1, aes(x=condition, y=meanCorr, fill=study)) +
geom_bar(stat="identity", position=position_dodge(.9)) +
geom_point(data = plot1a_df2, aes(x=condition, fill=study, y = correlation),
position=position_jitterdodge(dodge.width= .9, jitter.width=.1), alpha=.2) +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width=.2, position=position_dodge(.9), color="black") +
xlab("Condition") + ylab("Empathic Accuracy") +
scale_fill_manual(breaks=c("1a", "1b"), labels=c("Study 1a", "Study 1b"), values = my_color_palette, name=NULL) +
scale_y_continuous(breaks=c(-0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6, 0.8)) +
coord_cartesian(ylim=c(-0.6,0.9)) +
my_theme + theme(
legend.text = element_text(size=18),
strip.text = element_text(size=16),
axis.text = element_text(size=16),
axis.title.x = element_text(size=18, vjust=-0.2),
axis.title.y = element_text(size=18, vjust=0.8),
axis.ticks = element_line(linewidth = 0.4)
)
```
> Figure 2. Behavioral results from Study 1a and 1b, showing direct replication with 193 (Study 1a) and a smaller subset of 32 (Study 1b) videos. Error bars give 95% confidence intervals, with standard errors calculated over videos (rather than over participants).
## Machine Learning modeling on Study 1a+b
### mean performance of multimodal model across training partitions
```{r ML-calculate-multimodal-means, message=F, echo=F, eval=T}
modelRatings_byVideoSummary <- modelRatingsAll %>%
group_by(videoID, train, valid, test) %>%
summarize(trimodal = cor(actual, A.V.L),
AV = cor(actual, A.V),
AL = cor(actual, A.L),
VL = cor(actual, V.L),
acoustic = cor(actual, A),
video = cor(actual, V),
ling = cor(actual, L))
modelRatings_byVideoLong <- modelRatings_byVideoSummary %>%
pivot_longer(cols=!c(videoID, train, valid, test), names_to = "modality", values_to = "correlation")
#summary stats for train/valid
modelRatings_summary_by_partition = modelRatings_byVideoLong %>%
filter(modality=="trimodal") %>%
group_by(train, valid, test) %>%
summarize(meanCorr = mean(correlation),
sdCorrelation = sd(correlation),
N = n())
```
- Mean performance of the multimodal model on the Training Set is:
`r format((modelRatings_summary_by_partition %>% filter(train))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
(SD =
`r format((modelRatings_summary_by_partition %>% filter(train))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
)
- Mean performance of the multimodal model on the Validation Set is:
`r format((modelRatings_summary_by_partition %>% filter(valid))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
(SD =
`r format((modelRatings_summary_by_partition %>% filter(valid))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
)
- Mean performance of the multimodal model on the held out Test Set is:
`r format((modelRatings_summary_by_partition %>% filter(test))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
(SD =
`r format((modelRatings_summary_by_partition %>% filter(test))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
)
#### Text in paper:
> ... We trained the model as is typical in machine learning, using cross-validation with the SEND dataset split into training, validation, and a held-out test set (using the same partition as in Ong et al., 2021).
The model trained on all three modalities learns to predict the emotional valence well, with a high accuracy on the 114 videos in the training set
(M$_\text{Audio-Video}$ = `r format((modelRatings_summary_by_partition %>% filter(train))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
, SD$_\text{Audio-Video}$ across videos = `r format((modelRatings_summary_by_partition %>% filter(train))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
), and a moderate accuracy on the 40 videos in the validation set
(M$_\text{Audio-Video}$ = `r format((modelRatings_summary_by_partition %>% filter(valid))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
, SD$_\text{Audio-Video}$ = `r format((modelRatings_summary_by_partition %>% filter(valid))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
). On the 39 videos in the held-out test set, which the model did not see during training, the trained multimodal model achieves a performance of
`r format((modelRatings_summary_by_partition %>% filter(test))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
(SD$_\text{Audio-Video}$ = `r format((modelRatings_summary_by_partition %>% filter(test))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
), which is a little lower, but not significantly different from, human observers’ performances in Study 1a and 1b.
### mean model performance with different combinations of modalities (on test set only)
```{r ML-calculate-means-test-set-only, echo=F, eval=T}
# this next data frame has mean performance only for the test partition
modelRatings_testSetSummary <- modelRatings_byVideoLong %>% filter(test) %>%
group_by(modality) %>%
summarize(meanCorr = mean(correlation),
sdCorrelation = sd(correlation),
N = n()) %>%
mutate(study="ML-test") %>%
mutate(modality = factor(modality, levels=c("trimodal", "AL", "AV", "VL", "video", "acoustic", "ling")),
se = sdCorrelation / sqrt(N),
ci_upper = meanCorr + qt(0.975, N) * se,
ci_lower = meanCorr - qt(0.975, N) * se)
# this next data frame has all train/valid/test
modelRatings_allSummary <- bind_rows(
(modelRatings_byVideoLong %>% group_by(modality) %>%
summarize(meanCorr = mean(correlation),
sdCorrelation = sd(correlation),
N = n()) %>%
mutate(study="ML-all")),
(modelRatings_byVideoLong %>% filter(test) %>%
group_by(modality) %>%
summarize(meanCorr = mean(correlation),
sdCorrelation = sd(correlation),
N = n()) %>%
mutate(study="ML-test"))
) %>%
mutate(modality = factor(modality, levels=c("trimodal", "AL", "AV", "VL", "video", "acoustic", "ling")),
se = sdCorrelation / sqrt(N),
ci_upper = meanCorr + qt(0.975, N) * se,
ci_lower = meanCorr - qt(0.975, N) * se)
```
- Mean performance of the *Video-Only* model on the Test Set is:
`r format((modelRatings_testSetSummary %>% filter(modality=="video"))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
(SD =
`r format((modelRatings_testSetSummary %>% filter(modality=="video"))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
)
- Mean performance of the *Audio-Only* model (linguistic + acoustic) on the Test Set is:
`r format((modelRatings_testSetSummary %>% filter(modality=="AL"))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
(SD =
`r format((modelRatings_testSetSummary %>% filter(modality=="AL"))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
)
Separating Audio-Only into Linguistic and Acoustic modalities:
- Mean performance of the *Linguistic-modality-only* model on the Test Set is:
`r format((modelRatings_testSetSummary %>% filter(modality=="ling"))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
(SD =
`r format((modelRatings_testSetSummary %>% filter(modality=="ling"))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
)
- Mean performance of the *Acoustic-modality-only* model on the Test Set is:
`r format((modelRatings_testSetSummary %>% filter(modality=="acoustic"))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
(SD =
`r format((modelRatings_testSetSummary %>% filter(modality=="acoustic"))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
)
#### Text in paper:
> With this trained model, we then considered how the model would perform given the same combinations of modalities that people saw. That is, we presented the model with only the video modality from the clips in the test-set to mirror Study 1a/b’s “Video-Only” condition, and only the acoustic and linguistic channels to similarly mirror the “Audio-Only” condition.
When the model was only given the visual modality, it does very poorly, much like human observers, with a mean performance of `r format((modelRatings_testSetSummary %>% filter(modality=="video"))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
, (SD$_\text{Video-Only}$ = `r format((modelRatings_testSetSummary %>% filter(modality=="video"))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
), which is not different from a correlation of 0, achieved if it was just randomly guessing. By contrast, when given both the acoustic and linguistic channels—the two components of the “Audio-Only” condition—the model achieves a performance of `r format((modelRatings_testSetSummary %>% filter(modality=="AL"))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
(SD$_\text{Audio-Only}$ = `r format((modelRatings_testSetSummary %>% filter(modality=="AL"))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
), which is comparable to its performance when given all three modalities. That is, the model (trained on the target’s self-report) does equally well at predicting the target’s valence when given both the audio and video, as when given only the audio, in a similar pattern to human observers. Finally, although with the study design of Study 1a and 1b, we were unable to separate the contributions of the linguistic and acoustic modalities in the human data, we can do so with the machine learning model: doing so shows that the model does very well when only given the linguistic modality
(M$_\text{Linguistics-Only}$ = `r format((modelRatings_testSetSummary %>% filter(modality=="ling"))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
, SD$_\text{Linguistics-Only}$ = `r format((modelRatings_testSetSummary %>% filter(modality=="ling"))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
), but not when given the acoustic modality
(M$_\text{Acoustics-Only}$ = `r format((modelRatings_testSetSummary %>% filter(modality=="acoustic"))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
, SD$_\text{Acoustics-Only}$ = `r format((modelRatings_testSetSummary %>% filter(modality=="acoustic"))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
), suggesting that the emotion relevant information is mostly in the semantic content of the target’s story.
### comparing behavioural data with ML model
```{r compare-behavioural-data-with-ML, echo=F}
comparisonDF_allSummary <- bind_rows(
d_studies_allSummary,
(modelRatings_allSummary %>%
mutate(condition = factor(modality,
levels=c("trimodal", "AL", "video",
"AV", "VL", "acoustic", "ling"),
labels=c("both", "audioOnly", "videoOnly",
"acousticVideo", "videoLing", "acoustic", "ling"))) %>%
select(-modality))
) %>% mutate(condition = factor(condition,
levels=c("both", "audioOnly", "videoOnly",
"acousticVideo",
"videoLing",
"acoustic", "ling"),
labels=c("Audio-Video", "Audio-Only", "Video-Only",
"Acous.+Video",
"Ling.+Video",
"Acoustic", "Linguistics")),
study = factor(study, levels=c("1a", "1b", "Heb", "Am-Heb", "ML-all", "ML-test")))
modelRatings_byVideoSummaryLong = modelRatings_byVideoSummary %>%
pivot_longer(cols=!c(videoID, train, valid, test),
names_to = "modality", values_to = "modelCorrelation") %>%
mutate(condition = factor(modality,
levels=c("trimodal", "AL", "video", "AV", "VL", "acoustic", "ling"),
labels=c("both", "audioOnly", "videoOnly", "acousticVideo", "videoLing", "acoustic", "ling")),
videoID = (videoID %>% gsub("ID", "", .) %>% gsub("vid", "", .) )
) %>%
select(-modality)
comparisonDF_byVideoSummary = full_join(d_studies_byVideoSummary,
modelRatings_byVideoSummaryLong,
by=c("videoID", "condition"))
human_model_correlations_byVideo = comparisonDF_byVideoSummary %>%
filter(test) %>%
filter(condition %in% c("both", "audioOnly", "videoOnly")) %>%
group_by(condition) %>% summarize(
humanModelCorr = (cor.test(correlation, modelCorrelation)$estimate),
p.val = (cor.test(correlation, modelCorrelation)$p.value)
)
```
#### Text in paper
> We also found strong positive correlations (Fig. 3b) between the averaged human empathic accuracy and the model’s “empathic accuracy” (i.e., the correlation of the model’s rating with the target’s self-report), for the held-out test set videos, in the
Audio-Video (r =
`r format((human_model_correlations_byVideo %>% filter(condition=="both"))$humanModelCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
; p = `r format((human_model_correlations_byVideo %>% filter(condition=="both"))$p.val, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
) and Audio-Only conditions (r = `r format((human_model_correlations_byVideo %>% filter(condition=="audioOnly"))$humanModelCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
; p = `r format((human_model_correlations_byVideo %>% filter(condition=="audioOnly"))$p.val, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
). This means that in these conditions, for videos on which people are more accurate at recovering the target’s affect, the model is too. This correlation was not significant in the Video-Only condition (
r = `r format((human_model_correlations_byVideo %>% filter(condition=="videoOnly"))$humanModelCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`; p = `r format((human_model_correlations_byVideo %>% filter(condition=="videoOnly"))$p.val, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
).
### Figure 3
```{r compare-behavioural-data-with-ML-plots-a, warning=F, fig.width=12.5, fig.height=4.5, echo=F}
### Plot for paper (white background)
comparisonDF_allSummary %>% filter(study %in% c("1a", "1b", "ML-test")) %>%
#filter(condition %in% c("All", "Audio-Only", "Video-Only")) %>%
complete(condition = unique(condition)) %>%
ggplot(aes(x=condition, y=meanCorr, fill=study)) +
geom_bar(stat="identity", position=position_dodge(width = .9, preserve = "single")) +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width=.2,
position=position_dodge(width = .9, preserve = "single"), color="black") +
xlab('Available Modalities') + ylab("Empathic Accuracy") +
scale_fill_manual(breaks=c("1a", "1b", "ML-test"),
labels=c("Study 1a", "Study 1b", "Machine Learning model on held-out test videos"), values = my_color_palette, name=NULL) +
scale_y_continuous(breaks=c(0, 0.2, 0.4, 0.6)) + coord_cartesian(ylim=c(-0.15,0.6)) +
my_theme + theme(axis.text = element_text(size=16),
axis.title.x = element_text(size=18, vjust=-0.2),
axis.title.y = element_text(size=18, vjust=0.8),
legend.text = element_text(size=16))
# 12 by 4.5
```
```{r compare-behavioural-data-with-ML-plots-b, message=F, fig.width=12.5, fig.height=4.5, echo=F}
# Plot for paper: white background
comparisonDF_byVideoSummary %>%
filter(condition %in% c("both", "audioOnly", "videoOnly"), study %in% c("1a")) %>%
mutate(condition = factor(condition, levels=c("both", "audioOnly", "videoOnly"),
labels=c("Audio-Video", "Audio-Only", "Video-Only"))) %>%
filter(test) %>%
ggplot(aes(x=modelCorrelation, y=correlation)) + geom_point(color="black") +
geom_errorbar(aes(ymax = correlation+se, ymin = correlation-se), width=.2, color="black") +
geom_smooth(method="lm") +
coord_cartesian(xlim=c(-1,1), ylim=c(-1,1)) +
ylab("Mean Human Empathic Accuracy") + xlab('Machine Learning Model "Empathic Accuracy"') +
facet_grid(~condition) +
my_theme + theme(
strip.text = element_text(size=16),
axis.text = element_text(size=16),
axis.title.x = element_text(size=16, vjust=-0.2),
axis.title.y = element_text(size=16, vjust=0.8),
axis.ticks = element_line(linewidth = 0.2),
strip.background = element_rect(color = "white", fill = "white")
)
# 12 by 4.5
```
> Figure 3 Comparison of human and machine learning model average performance, by condition. a) "Audio-Video" includes the machine learning model with all three modalities. "Audio-Only" includes the deep learning model with the Linguistic and Acoustic features. Error bars represent 95% confidence intervals (with standard error calculated over number of videos). b) Scatterplot comparing mean human observers’ empathic accuracy (vertical axis) against the deep learning model’s “empathic accuracy” (horizontal axis), for the Audio-Video (left panel), Audio-Only (middle panel), and Video-Only (right panel) conditions. Each data point is a video from the held-out test set, and 95% confidence interval error bars are provided for the mean human empathic accuracies. There is a significant positive correlation in the Audio-Video condition and in the Audio-Only condition, but no correlation in the Video-Only condition.
## Study 2 Hebrew-speaking participants watching Hebrew Videos
To add in recruitment statistics from Prep file
#### Text in paper:
> X
```{r Study2-analysis, echo=F, eval=T}
s2_main_lmer_summary =
d_studies_byVideoAndParticipantSummary %>% filter(study %in% c("Heb")) %>%
mutate(condition = factor(condition, levels=c("both", "audioOnly", "videoOnly"),
labels=c("Both", "Audio-Only", "Video-Only"))) %>%
lmer(indivCorrelation ~ condition + (1|participantID) + (1|videoID), data=.) %>%
summary()
s2_main_lmer_audioAsBase_summary =
d_studies_byVideoAndParticipantSummary %>% filter(study %in% c("Heb")) %>%
lmer(indivCorrelation ~ condition + (1|participantID) + (1|videoID), data=.) %>%
summary()
```
Means and SDs of empathic accuracy:
- Mean in Audio-Video condition is:
`r format((d_studies_allSummary %>% filter(study %in% c("Heb"), condition %in% c("both")))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`,
(SD =
`r format((d_studies_allSummary %>% filter(study %in% c("Heb"), condition %in% c("both")))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
)
- Mean in AudioOnly condition is:
`r format((d_studies_allSummary %>% filter(study %in% c("Heb"), condition %in% c("audioOnly")))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
(SD =
`r format((d_studies_allSummary %>% filter(study %in% c("Heb"), condition %in% c("audioOnly")))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
)
- Mean in VideoOnly condition is:
`r format((d_studies_allSummary %>% filter(study %in% c("Heb"), condition %in% c("videoOnly")))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
(SD =
`r format((d_studies_allSummary %>% filter(study %in% c("Heb"), condition %in% c("videoOnly")))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
)
#### Mixed-effects linear model stats:
> `lmer(accuracy ~ condition + (1|videoID) + (1|participantID))`
- Audio Only > Both, `r report_lmer_in_text(s2_main_lmer_summary, "conditionAudio-Only")`
- Video Only > Both, `r report_lmer_in_text(s2_main_lmer_summary, "conditionVideo-Only")`
- Video Only > Audio Only, `r report_lmer_in_text(s2_main_lmer_audioAsBase_summary, "conditionvideoOnly")`
#### Text in paper:
> The pattern of results replicates Study 1a and 1b; observers' empathic accuracies are highest in the Audio-Video
(M$_\text{Audio-Video}$ = `r format((d_studies_allSummary %>% filter(study %in% c("Heb"), condition %in% c("both")))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`,
SD$_\text{Audio-Video}$ = `r format((d_studies_allSummary %>% filter(study %in% c("Heb"), condition %in% c("both")))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
) and Audio-Only
(M$_\text{Audio-Only}$ = `r format((d_studies_allSummary %>% filter(study %in% c("Heb"), condition %in% c("audioOnly")))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`,
SD$_\text{Audio-Only}$ = `r format((d_studies_allSummary %>% filter(study %in% c("Heb"), condition %in% c("audioOnly")))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
) conditions, and these were not significantly different from each other (p = .95). These performances were significantly higher than the performance in the Video-Only condition
(M$_\text{Video-Only}$ = `r format((d_studies_allSummary %>% filter(study %in% c("Heb"), condition %in% c("videoOnly")))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
, SD$_\text{Video-Only}$ = `r format((d_studies_allSummary %>% filter(study %in% c("Heb"), condition %in% c("videoOnly")))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
; Audio-Video > Video-Only, `r report_lmer_in_text(s2_main_lmer_summary, "conditionVideo-Only", reversed=TRUE)`;
Audio-Only > Video-Only, `r report_lmer_in_text(s2_main_lmer_audioAsBase_summary, "conditionvideoOnly", reversed=TRUE)`).
## Study 3 Americans watching Hebrew Videos
to add in recruitment statistics from Prep file
#### Text in paper:
> In Study 3, we recruited English-speaking Americans (N=170) to watch the Hebrew videos (specifically excluding participants who had familiarity with Hebrew); ...
```{r Study3-analysis, echo=F, eval=T}
s3_main_lmer_summary =
d_studies_byVideoAndParticipantSummary %>% filter(study %in% c("Am-Heb")) %>%
mutate(condition = factor(condition, levels=c("both", "audioOnly", "videoOnly"),
labels=c("Both", "Audio-Only", "Video-Only"))) %>%
lmer(indivCorrelation ~ condition + (1|participantID) + (1|videoID), data=.) %>%
summary()
s3_main_lmer_audioAsBase_summary =
d_studies_byVideoAndParticipantSummary %>% filter(study %in% c("Am-Heb")) %>%
lmer(indivCorrelation ~ condition + (1|participantID) + (1|videoID), data=.) %>%
summary()
s3_main_lmer_videoAsBase_summary =
d_studies_byVideoAndParticipantSummary %>% filter(study %in% c("Am-Heb")) %>%
mutate(condition = factor(condition, levels=c("videoOnly", "both", "audioOnly"),
labels=c("Video-Only", "Both", "Audio-Only"))) %>%
lmer(indivCorrelation ~ condition + (1|participantID) + (1|videoID), data=.) %>%
summary()
```
Means and SDs of empathic accuracy:
- Mean in Audio-Video condition is:
`r format((d_studies_allSummary %>% filter(study %in% c("Am-Heb"), condition %in% c("both")))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`,
(SD =
`r format((d_studies_allSummary %>% filter(study %in% c("Am-Heb"), condition %in% c("both")))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
)
- Mean in AudioOnly condition is:
`r format((d_studies_allSummary %>% filter(study %in% c("Am-Heb"), condition %in% c("audioOnly")))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
(SD =
`r format((d_studies_allSummary %>% filter(study %in% c("Am-Heb"), condition %in% c("audioOnly")))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
)
- Mean in VideoOnly condition is:
`r format((d_studies_allSummary %>% filter(study %in% c("Am-Heb"), condition %in% c("videoOnly")))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
(SD =
`r format((d_studies_allSummary %>% filter(study %in% c("Am-Heb"), condition %in% c("videoOnly")))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
)
#### Mixed-effects linear model stats:
> `lmer(accuracy ~ condition + (1|videoID) + (1|participantID))`
(Intercepts)
- Both > zero, `r report_lmer_in_text(s3_main_lmer_summary, "(Intercept)")`
- Audio Only > zero, `r report_lmer_in_text(s3_main_lmer_audioAsBase_summary, "(Intercept)")`
- Video Only > zero, `r report_lmer_in_text(s3_main_lmer_videoAsBase_summary, "(Intercept)")`
(Between conditions)
- Audio Only > Both, `r report_lmer_in_text(s3_main_lmer_summary, "conditionAudio-Only")`
- Video Only > Both, `r report_lmer_in_text(s3_main_lmer_summary, "conditionVideo-Only")`
- Video Only > Audio Only, `r report_lmer_in_text(s3_main_lmer_audioAsBase_summary, "conditionvideoOnly")`
#### Text in paper:
> Now, when participants did not have access to the linguistic information, we observed a different pattern compared to the earlier studies: performance was highest in the Audio-Video
(M$_\text{Audio-Video}$ = `r format((d_studies_allSummary %>% filter(study %in% c("Am-Heb"), condition %in% c("both")))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`,
SD$_\text{Audio-Video}$ = `r format((d_studies_allSummary %>% filter(study %in% c("Am-Heb"), condition %in% c("both")))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
) and in the Video-Only condition
(M$_\text{Video-Only}$ = `r format((d_studies_allSummary %>% filter(study %in% c("Am-Heb"), condition %in% c("videoOnly")))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
, SD$_\text{Video-Only}$ = `r format((d_studies_allSummary %>% filter(study %in% c("Am-Heb"), condition %in% c("videoOnly")))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
). These performances were significantly greater than chance (linear mixed-model with fixed-effects of condition:
coefficient on Audio-Video, b = 0.198 [0.034, 0.362], t = 2.36, p = .018
; coefficient on Video-Only, b = 0.191 [0.029, 0.353], t = 2.31, p = .021
), and not significantly different from each other (p = 0.89). By contrast, performance in the Audio-Only condition
(M$_\text{Audio-Only}$ = `r format((d_studies_allSummary %>% filter(study %in% c("Am-Heb"), condition %in% c("audioOnly")))$meanCorr, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`,
SD$_\text{Audio-Only}$ = `r format((d_studies_allSummary %>% filter(study %in% c("Am-Heb"), condition %in% c("audioOnly")))$sdCorrelation, digits=NUM_DIGITS_FOR_TEXT_REPORTING)`
) was the lowest, and it was not significantly greater than chance
(b = 0.103 [-0.060, 0.266], t = 1.23, p = 0.217
). The performance in the former two conditions were greater than in the Audio-Only condition, and these differences were marginally significant
(Audio-Video > Audio-Only, `r report_lmer_in_text(s3_main_lmer_summary, "conditionAudio-Only", reversed=TRUE)`,
; Video-Only > Audio-Only, `r report_lmer_in_text(s3_main_lmer_audioAsBase_summary, "conditionvideoOnly")`).
```{r Study2-3-plots, fig.width=8.5, fig.height=5.5, echo=F, eval=T}
# For Paper: plot only studies 1a and 1b
plot23_df1 = d_studies_allSummary %>% filter(study %in% c("Am-Heb", "Heb")) %>%
mutate(study = factor(study, levels=c("Heb", "Am-Heb")),
condition = factor(condition, levels=c("both", "audioOnly", "videoOnly"),
labels=c("Audio-Video", "Audio-Only", "Video-Only")))
plot23_df2 = d_studies_byVideoSummary %>% filter(study %in% c("Am-Heb", "Heb")) %>%
mutate(study = factor(study, levels=c("Heb", "Am-Heb")),
condition = factor(condition, levels=c("both", "audioOnly", "videoOnly"),
labels=c("Audio-Video", "Audio-Only", "Video-Only")))
# For paper: plot only studies Am-Heb and Heb; no ML. [White background]
ggplot(plot23_df1, aes(x=condition, y=meanCorr, fill=study)) +
geom_bar(stat="identity", position=position_dodge(.9)) +
geom_point(data = plot23_df2, aes(x=condition, fill=study, y = correlation),
position=position_jitterdodge(dodge.width= .9, jitter.width=.1), alpha=.3) +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper),
width=.2, position=position_dodge(.9), color="black") +
xlab("Condition") + ylab("Empathic Accuracy") +
scale_fill_manual(breaks=c("Heb", "Am-Heb"),
labels=c("S2: Israelis watching Hebrew videos", "S3: Americans watching Hebrew videos"), values = my_color_palette[c(4,5)], name=NULL) +
scale_y_continuous(breaks=c(-0.2, 0, 0.2, 0.4, 0.6, 0.8)) + coord_cartesian(ylim=c(-0.30,0.85)) +
my_theme + theme(
legend.text = element_text(size=14),
strip.text = element_text(size=16),
axis.text = element_text(size=16),
axis.title.x = element_text(size=18, vjust=-0.2),
axis.title.y = element_text(size=18, vjust=0.8),
axis.ticks = element_line(linewidth = 0.2)
)
## 8.5 by 5.5
```
> Figure 4. Behavioral results from Study 2 and Study 3. Study 2 shows a direct replication of Studies 1a/1b with a different population and a different video set. But Study 3 shows a different pattern, indicating that people are flexibly adjusting the modalities they use when the informativeness changes. Error bars give 95% confidence intervals, with standard errors calculated over videos (rather than over participants).