# Libraries

In [1]:
library(plyr)

library(tidyverse)
library(DT) # dependency
library(ggthemes) # dependency

"package 'tidyverse' was built under R version 3.3.3"Loading tidyverse: ggplot2
Loading tidyverse: tibble
Loading tidyverse: tidyr
Loading tidyverse: readr
Loading tidyverse: purrr
Loading tidyverse: dplyr
"package 'dplyr' was built under R version 3.3.3"Conflicts with tidy packages ---------------------------------------------------
arrange():   dplyr, plyr
compact():   purrr, plyr
count():     dplyr, plyr
failwith():  dplyr, plyr
filter():    dplyr, stats
id():        dplyr, plyr
lag():       dplyr, stats
mutate():    dplyr, plyr
rename():    dplyr, plyr
summarise(): dplyr, plyr
summarize(): dplyr, plyr
"package 'ggthemes' was built under R version 3.3.3"

In [2]:
source("tools.R") # Ad-hoc tools

# General variables and functions

In [3]:
se <- function(x) {
    sd(x)/sqrt(length(x))
}

bar_graph <- function(plot_data) {
    plot_data %>%
        ggplot(aes(x=x, y=y))+
        geom_bar(stat="identity", position=position_dodge()) +
        geom_errorbar(aes(ymin=lower_bound, ymax=upper_bound),
                      position=position_dodge(.9),
                      width=0.2 )
}

decimals <- 2 # for rounding

# Import data

In [4]:
directory <- "Data January 2017/"

dat <- get_fc_table(directory)

### Data mutation

In [5]:
# Change column names
colnames(dat) <- c( "row_id",
                    "left_door",
                    "right_door",
                    "rt",
                    "KEY_RESPONSE_TIME",
                    "DISPLAY_TIME", 
                    "key_pressed",
                    "block_level_1_fc",
                    "block",
                    "id",
                    "experiment" )


# Remove ".jpg" of the doors' names; Change {f,j} to {left, right}; Add chosen_door column
dat <- dat %>%
    mutate(left_door = substr(left_door, 0, 7), 
           right_door = substr(right_door, 0, 7),
           key_pressed = ifelse(key_pressed=='f', 'left', 'right'), 
           chosen_door = ifelse(key_pressed=='left', left_door, right_door)) 

"package 'bindrcpp' was built under R version 3.3.3"

Cue values

In [6]:
for(index in 1:length(dat$id)){
    dat$left_speed[index] <- get_cues(dat$experiment[index], dat$left_door[index])[1]
    dat$left_safety[index] <- get_cues(dat$experiment[index], dat$left_door[index])[2]
    
    dat$right_speed[index] <- get_cues(dat$experiment[index], dat$right_door[index])[1]
    dat$right_safety[index] <- get_cues(dat$experiment[index], dat$right_door[index])[2]
    
    dat$chosen_speed[index] <- get_cues(dat$experiment[index], dat$chosen_door[index])[1]
    dat$chosen_safety[index] <- get_cues(dat$experiment[index], dat$chosen_door[index])[2]
}

Add final columns

In [7]:
# Number of cues to check
# ASSUMPTION: when an unknown door is involved, the decision takes 1 step

dat <- dat %>%
    mutate(steps_ttb_speed = ifelse(left_speed == right_speed, 2, 1),
           steps_ttb_safety = ifelse(left_safety == right_safety, 2, 1),
           unknown_door_involved = (left_speed == 0 | right_speed ==0),
           chosen_door_is_unknown = (chosen_speed == 0) ) # Checking one cue is enough

Now that we have the cue values, we can calculate the **predicted** decisions

In [8]:
# Choice per TTB.
# ASSUMPTION: unknown door is never chosen

## TTB Speed

dat[!dat$unknown_door_involved & dat$left_speed > dat$right_speed, 'chosen_ttb_speed'] <- 'left'

dat[!dat$unknown_door_involved & dat$left_speed < dat$right_speed, 'chosen_ttb_speed'] <- 'right'

dat[!dat$unknown_door_involved & dat$left_speed == dat$right_speed & dat$left_safety > dat$right_safety, 'chosen_ttb_speed'] <- 'left'

dat[!dat$unknown_door_involved & dat$left_speed == dat$right_speed & dat$left_safety < dat$right_safety, 'chosen_ttb_speed'] <- 'right'

dat[dat$unknown_door_involved & dat$left_speed == 0 , 'chosen_ttb_speed'] <- 'right'

dat[dat$unknown_door_involved & dat$right_speed == 0 , 'chosen_ttb_speed'] <- 'left'


## TTB Safety

dat[!dat$unknown_door_involved & dat$left_safety > dat$right_safety, 'chosen_ttb_safety'] <- 'left'

dat[!dat$unknown_door_involved & dat$left_safety < dat$right_safety, 'chosen_ttb_safety'] <- 'right'

dat[!dat$unknown_door_involved & dat$left_safety == dat$right_safety & dat$left_speed > dat$right_speed, 'chosen_ttb_safety'] <- 'left'

dat[!dat$unknown_door_involved & dat$left_safety == dat$right_safety & dat$left_speed < dat$right_speed, 'chosen_ttb_safety'] <- 'right'

dat[dat$unknown_door_involved & dat$left_safety == 0 , 'chosen_ttb_safety'] <- 'right'

dat[dat$unknown_door_involved & dat$right_safety == 0 , 'chosen_ttb_safety'] <- 'left'

<hr>
<hr>

# Accordance rates

In [9]:
accordance <- function( data, column ) {
    
    column <- enquo(column)
    
    data %>%
        mutate( correctly_predicted = (key_pressed == !!column) ) %>%
        group_by( id ) %>%
        summarize( accordance_rate = mean(correctly_predicted) ) %>%
        summarize( accordance_rate_mean = mean(accordance_rate), se = se(accordance_rate) ) %>%
        round( decimals )
}

### TTB_Speed

Overall

In [10]:
dat %>%
    accordance(chosen_ttb_speed)

accordance_rate_mean,se
0.65,0.01


Excluding unknown doors

In [11]:
dat %>%
    filter(unknown_door_involved == FALSE) %>%
    accordance(chosen_ttb_speed)

accordance_rate_mean,se
0.77,0.01


### TTB_Safety

Overall

In [12]:
dat %>%
    accordance(chosen_ttb_safety)

accordance_rate_mean,se
0.71,0.01


Excluding unknown doors

In [13]:
dat %>%
    filter(unknown_door_involved == FALSE) %>%
    accordance(chosen_ttb_safety)

accordance_rate_mean,se
0.93,0.01


Excluding unknown doors and equivalent safety value

In [14]:
dat %>%
    filter(unknown_door_involved == FALSE,
           left_safety != right_safety) %>%
    accordance(chosen_ttb_safety)

accordance_rate_mean,se
0.97,0.01


# Comparisons with unknown doors

In [15]:
ratio_unknown_door <- function ( data ) {
   data %>%
        filter( unknown_door_involved) %>%
        mutate( chosen_door_is_unknown_length = ifelse(chosen_door_is_unknown, 1, 0)) %>%
        group_by( id ) %>% # Each individual is grouped
        summarize( ratio_unknown_door = round(sum(chosen_door_is_unknown) / length(chosen_door_is_unknown), 2)) %>%
        summarize( ratio_unknown_door_mean = mean(ratio_unknown_door), se = se((ratio_unknown_door)) ) %>%
        round( decimals )
}

### Overall ratio

When an unknown door is available, what is the probability of them choosing it?

In [16]:
dat %>%
        ratio_unknown_door

ratio_unknown_door_mean,se
0.49,0.02


### Extremes versus unknown

What if the known door was **extremely weak** (i.e. unsafe and slow)?

In [17]:
dat %>%
        filter(( left_safety == -1 & left_speed == -1 ) | 
               ( right_safety == -1 & right_speed == -1 )) %>%
        ratio_unknown_door

ratio_unknown_door_mean,se
0.86,0.03


What if the known door was **extremely strong** (i.e. safe and fast)?

In [18]:
dat %>%
        filter(( left_safety == 1 & left_speed == 1 ) | 
               ( right_safety == 1 & right_speed == 1 )) %>%
        ratio_unknown_door

ratio_unknown_door_mean,se
0.03,0.01


### Other cases

Where the known door is **safe** but slow

In [19]:
dat %>%
        filter(( left_safety == 1 & left_speed == -1 ) | 
               ( right_safety == 1 & right_speed == -1 )) %>%
        ratio_unknown_door

ratio_unknown_door_mean,se
0.08,0.02


Where the known door is **fast** but unsafe

In [20]:
dat %>%
        filter(( left_safety == -1 & left_speed == 1 ) | 
               ( right_safety == -1 & right_speed == 1 )) %>%
        ratio_unknown_door

ratio_unknown_door_mean,se
0.72,0.05


### Safe door (regardless of speed) versus unknown 

In [21]:
dat %>%
        filter(( left_safety == 1 ) | 
               ( right_safety == 1 )) %>%
        ratio_unknown_door

ratio_unknown_door_mean,se
0.05,0.01


# Response times

### Overall response time per block
It is calculated as an average of averages.

In [22]:
response_times <- dat %>%
                    group_by(id, block) %>%
                    summarize(subject_rt_mean = mean(rt)) %>% # per subject per block
                    group_by(block) %>%
                    summarize(y = mean(subject_rt_mean),
                              se = se(subject_rt_mean)) %>%
                    mutate(lower_bound = y - se/2, 
                           upper_bound = y + se/2, 
                           x=block)

response_times

block,y,se,lower_bound,upper_bound,x
0,2669.345,264.27507,2537.207,2801.483,0
1,1946.088,169.0121,1861.582,2030.594,1
2,1677.167,159.32135,1597.506,1756.827,2
3,1502.65,97.44472,1453.928,1551.372,3
4,1639.612,172.77521,1553.224,1725.999,4
5,1526.05,141.1022,1455.499,1596.601,5


In [23]:
p1 = bar_graph(response_times)

### Only known doors: response times

In [24]:
response_times <- dat %>%
                    filter(unknown_door_involved == FALSE) %>%
                    group_by(id, block) %>%
                    summarize(subject_rt_mean = mean(rt)) %>% # per subject per block
                    group_by(block) %>%
                    summarize(y = mean(subject_rt_mean),
                              se = se(subject_rt_mean)) %>%
                    mutate(lower_bound = y - se/2, 
                           upper_bound = y + se/2, 
                           x=block)

response_times

block,y,se,lower_bound,upper_bound,x
0,2508.158,279.74501,2368.286,2648.031,0
1,1831.096,170.13779,1746.027,1916.165,1
2,1488.537,97.91346,1439.581,1537.494,2
3,1309.483,86.42481,1266.271,1352.696,3
4,1537.992,168.83033,1453.577,1622.407,4
5,1319.933,100.72531,1269.571,1370.296,5


In [25]:
p2 = bar_graph(response_times)

### Individual response times

In [26]:
response_times <- dat %>%
                    group_by(id) %>%
                    summarize(y = mean(rt),
                              se = se(rt))  %>%
                    mutate(lower_bound = y - se/2, 
                           upper_bound = y + se/2, 
                           x=id)

head(response_times)

id,y,se,lower_bound,upper_bound,x
1,1271.289,55.19251,1243.693,1298.885,1
10,1646.694,103.07848,1595.155,1698.234,10
11,1531.006,98.10892,1481.951,1580.06,11
12,1385.678,56.26731,1357.544,1413.811,12
13,1118.083,54.24746,1090.96,1145.207,13
14,2123.867,164.74162,2041.496,2206.237,14


#### Separated between those decisions that involved unknown doors and those that did not

In [27]:
response_times <- dat %>%
                    filter(id==1 | id==2 | id==4) %>%
                    group_by(id, unknown_door_involved) %>%
                    summarize(y = mean(rt),
                              se = se(rt))  %>%
                    mutate(lower_bound = y - se/2, 
                           upper_bound = y + se/2, 
                           x=paste(id, unknown_door_involved))

head(response_times)

id,unknown_door_involved,y,se,lower_bound,upper_bound,x
1,False,1170.347,78.77117,1130.962,1209.733,1 FALSE
1,True,1338.583,75.1178,1301.024,1376.142,1 TRUE
2,False,1517.417,136.02304,1449.405,1585.428,2 FALSE
2,True,1719.407,109.75263,1664.531,1774.284,2 TRUE
4,False,1478.333,77.39547,1439.636,1517.031,4 FALSE
4,True,1541.454,69.33742,1506.785,1576.122,4 TRUE


# Explore

In [28]:
dat %>%
    select(-KEY_RESPONSE_TIME, -DISPLAY_TIME, -row_id, -unknown_door_involved) %>%
    filter(chosen_ttb_speed != chosen_ttb_safety) %>%
    head

left_door,right_door,rt,key_pressed,block_level_1_fc,block,id,experiment,chosen_door,left_speed,left_safety,right_speed,right_safety,chosen_speed,chosen_safety,steps_ttb_speed,steps_ttb_safety,chosen_door_is_unknown,chosen_ttb_speed,chosen_ttb_safety
door005,door001,1063,left,1,0,1,A,door005,-1,1,1,-1,-1,1,1,1,False,right,left
door001,door005,1587,right,1,0,1,A,door005,1,-1,-1,1,-1,1,1,1,False,left,right
door007,door011,610,right,2,1,1,A,door011,1,-1,-1,1,-1,1,1,1,False,left,right
door011,door007,828,left,2,1,1,A,door011,-1,1,1,-1,-1,1,1,1,False,right,left
door017,door013,797,left,3,2,1,A,door017,-1,1,1,-1,-1,1,1,1,False,right,left
door013,door017,557,right,3,2,1,A,door017,1,-1,-1,1,-1,1,1,1,False,left,right
