<a href="https://colab.research.google.com/github/alinat2/SpanTasks/blob/main/Rspan_Processing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Reading Span Data Processing

**Author**: Alina Tu

**Contact**: alinat2@uci.edu

**Last Updated**: 8/15/2022

**About**: This preprocessing script is for the Reading Span (Rspan) data files of the Individual Differences and Robotics (IndivRobotics) project. Reading refers to the sentences used when determining whether the sentence makes sense. Letter refers to the letters that show up and are recalled in sequential order.

In [None]:
!pip install rpy2==3.5.1

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting rpy2==3.5.1
  Downloading rpy2-3.5.1.tar.gz (201 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m201.7/201.7 KB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rpy2
  Building wheel for rpy2 (setup.py) ... [?25l[?25hdone
  Created wheel for rpy2: filename=rpy2-3.5.1-cp39-cp39-linux_x86_64.whl size=317896 sha256=c6c0a4bd0bbda9a4b9003b1175d5b6b635e9e4bb1bf6422e83bba40d988aea90
  Stored in directory: /root/.cache/pip/wheels/09/e7/bc/33685b60ab54dba969596dd87244ee9f4c2e83dff9a53d4f20
Successfully built rpy2
Installing collected packages: rpy2
  Attempting uninstall: rpy2
    Found existing installation: rpy2 3.5.5
    Uninstalling rpy2-3.5.5:
      Successfully uninstalled rpy2-3.5.5
Successfully installed rpy2-3.5.1


In [None]:
%load_ext rpy2.ipython

In [None]:
%%R
install.packages("rprime")
library(rprime)
library(dplyr)
library(tidyverse)

(as ‘lib’ is unspecified)
























	‘/tmp/Rtmpl5fVyd/downloaded_packages’

Attaching package: ‘dplyr’



    filter, lag



    intersect, setdiff, setequal, union



✔ tibble  3.1.8     ✔ stringr 1.4.1
✔ tidyr   1.3.0     ✔ forcats 1.0.0
✔ readr   2.1.4     

✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()



In [None]:
# Input the working directory containing the Rspan Eprime data files
# Each file is named "RspanShort-" + "subject ID" + "session number" (i.e., RspanShort-111-1.txt)
%%R
workdir <- "/content/drive/Shareddrives/IndividualDifferencesUCIUCSB/RAFolder/Raw Data/Session 3.2/Reading Span"
setwd(workdir)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Create a function to organize Eprime data
%%R
read <- function(file_name) {
  # Read in a text file generated by Eprime
  file_lines <- read_eprime(file_name)
  # Convert lines from an Eprime file into EprimeFrame objects
  file_frames <- FrameList(file_lines)
  # Make it a data frame
  file_df <- to_data_frame(file_frames)
  # Clean up data
  data <- as_tibble(file_df)
  # data[data == "?"] <- "NA"
}

#test_data <- read("RspanShort-803-1.txt")

In [None]:

%%R
# Store list of names for all Eprime data files
files <- dir(pattern = "*.txt")

trial_full <- tibble()
recall_full <- tibble()
participant_full <- tibble()

for (indiv_eprime_datafile in files){
  readable_data <- read(indiv_eprime_datafile)
  sub_id <- as.numeric(readable_data$Subject)[1]
  
  # Symmetry-matrix trial sequences
  trial_data <- readable_data[readable_data$Procedure == "trialdo", ]
  trial_proc <- as_tibble(trial_data) %>% # there are 6 test trials
    select(showSentence.RT, SENSEBOTH.ACC, SENSEBOTH.RT)
    # ShowSymm.RT: RT taken to click past the symmetry grid
    # CheckResponse.RT: RT taken to select a true/false response
    # CollectClick.RT: RT taken to select a red square during recall
  trial_proc <- cbind(rep(sub_id, nrow(trial_proc)), trial_proc)
  trial_full <- rbind(trial_full, trial_proc)
  
  # Overall test "recall" trials
  recall_data <- readable_data[readable_data$Procedure == "recall", ]
  recall_proc <- as_tibble(recall_data)[3:8,] %>% # there are 6 test trials
    select(Cycle, Sample, SpanScore, SpanTotal, setsz, numberwrong)
  recall_proc <- cbind(rep(sub_id, nrow(recall_proc)), recall_proc)
  recall_full <- rbind(recall_full, recall_proc)
  
  # Overall scores
  summary_data <- readable_data[readable_data$Procedure == "SessionProc", ]
  summary_proc <- as_tibble(summary_data)[1,] %>%
    select(MathErrorTotal, SpeedErrorTotal, AccErrorTotal, RspanScore, RspanTotal)
    # Rspan Score: sum of SpanScore in each recall_proc procedure (# of correctly recalled letters)
    # note: set size = list length
    # Rspan Total: sum of SpanTotal in each recall_proc procedure (total possible letters to recall)
  avg_showSentence.RT <- mean(as.numeric(trial_proc$showSentence.RT), na.rm = TRUE)
  avg_SENSEBOTH.ACC <- mean(as.numeric(trial_proc$SENSEBOTH.ACC), na.rm = TRUE)
  avg_SENSEBOTH.RT <- mean(as.numeric(trial_proc$SENSEBOTH.RT), na.rm = TRUE)
  summary_proc <- cbind(sub_id, avg_showSentence.RT, avg_SENSEBOTH.ACC, avg_SENSEBOTH.RT, summary_proc)
  participant_full <- rbind(participant_full, summary_proc)
}

In [None]:
%%R
# print(participant_full)
colnames(trial_full) <- c("sub_id", "showSentence.RT", "SENSEBOTH.ACC", "SENSEBOTH.RT")
colnames(recall_full) <- c("sub_id", "Cycle", "Sample", "SpanScore", "SpanTotal", "Sets", "numberwrong")

print("Writing CSVs...")
path_out <- "/content/drive/Shareddrives/IndividualDifferencesUCIUCSB/RAFolder/DataProcessingScripts/Session 3.2/Reading Span"
write.csv(trial_full, file.path(path_out, "Rspan_trial_full.csv"), row.names=FALSE)
write.csv(recall_full, file.path(path_out, "Rspan_recall_full.csv"), row.names=FALSE)
write.csv(participant_full, file.path(path_out, "Rspan_participant_full.csv"), row.names=FALSE)
print("CSVs have been created.")

[1] "Writing CSVs..."
[1] "CSVs have been created."
