# CIMA-Q 2019-09-01 Release patch 0.1

#### Visualize and append missing data from previous cmiaq_03-19 release to cimaq_20190901 (current release)

### Missing data in question includes both:
#### - In-scan behavioral data 'events.tsv' files
#### - Offline delayed retrieval data 'behavioral.tsv' files


In [1]:
import json
import os
from os import listdir as ls
from os.path import basename as bname
from os.path import dirname as dname
from os.path import join
import pandas as pd
from pandas import DataFrame as df
from shutil import move as smv
from cimaq_utils import loadimages


## Get CIMQA-Q 'events.tsv' files
#### Name each file according to BIDS specification
#### - In conformity with CIMA-Q release found in /data/simexp/DATA/cimaq_20190901
        - The files are present, but in a distant subdirectory in the previous release
        - path: '/data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data/task_files/processed'
#### Files correspond to regular BIDS 'events.tsv' files describing trial-based information
##### - trial type, stimuli onset & offset, participants' response times, etc.

In [4]:
# List of data tuples (filename, filepath, DataFrame)
# Note: filenames contain both 7-digits & 6-digits participant identifiers.
#       Datas in cimaq_20190901 use the 7-digits ID to reference participants
maindir = "../../../data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data/task_files/processed"
allevents = sorted([(os.path.splitext(bname(file))[0],
                     pd.read_csv(file, sep='\t').set_index("onset"), file)
                        for file in loadimages(maindir) if 'events' in file
                        and file.endswith('.tsv')])
# display(allevents[77][0], allevents[77][1], allevents[77][2])
display(allevents[0][0], allevents[0][1], allevents[0][2])

'sub-108391_ses-4_task-memory_events'

Unnamed: 0_level_0,trial_number,duration,offset,trial_type,response,response_time,stim_id,stim_file,stim_category,stim_name,recognition_accuracy,recognition_responsetime,position_correct,position_response,position_accuracy,position_responsetime
onset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
11.537,4,3.009,14.546,CTL,1.0,0.615,,,,,-1,,8,-1.0,-1,
20.046,5,3.009,23.055,Enc,1.0,0.666,Old65,,sporting,soccer ball_new,0,1.915,6,-1.0,0,
23.556,6,3.009,26.565,Enc,1.0,0.708,Old07,,animal,lion_new,0,1.450,5,-1.0,0,
27.552,7,3.009,30.561,Enc,1.0,0.600,Old15,,food,bottleofredwine01,1,3.335,6,9.0,1,3.786
31.564,8,3.009,34.573,Enc,1.0,0.747,Old69,,fruit,lemon_new,0,0.775,9,-1.0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
694.428,116,3.009,697.437,Enc,1.0,0.811,Old52,,musical,xylophone_old,1,1.210,6,6.0,2,0.977
703.438,117,3.009,706.447,CTL,1.0,0.705,,,,,-1,,9,-1.0,-1,
716.945,118,3.009,719.954,Enc,1.0,0.813,Old18,,food,cracker02,1,0.836,5,5.0,2,1.298
724.952,119,3.009,727.961,Enc,1.0,0.814,Old35,,kitchen,plate,1,1.332,5,5.0,2,2.982


'../../../data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data/task_files/processed/sub-108391_ses-4_task-memory_events.tsv'

## Get CIMA-Q 'behavioral.tsv' files
#### Name each file according to BIDS specification
#### - In conformity with CIMA-Q release found in /data/simexp/DATA/cimaq_20190901

In [5]:
# List of data tuples (filename, filepath, DataFrame)
# Note: filenames contain both 7-digits & 6-digits participant identifiers.
#       Datas in cimaq_20190901 use the 7-digits ID to reference participants
allbehavs = sorted([(os.path.splitext(bname(file))[0],
                     pd.read_csv(file, sep='\t').set_index("trial_number"), file)
                    for file in loadimages(maindir)
                    if 'PostScan' in file and file.endswith('.tsv')])
display(allbehavs[77][0], allbehavs[77][1], allbehavs[77][2])

'PostScanBehav_pscid7674650_dccid729722'

Unnamed: 0_level_0,old_new,stim_file,stim_id,stim_category,stim_name,recognition_response,recognition_accuracy,recognition_responsetime,recognition_performance,position_correct,position_response,position_accuracy,position_responsetime
trial_number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,OLD,food_shellpasta.bmp,Old25,food,shellpasta,1,1,2.142,Hit,6,6,2.0,1.695
2,New,kitchen_fork_old.bmp,New18,kitchen,fork_old,2,1,3.233,CR,-1,-1,,
3,OLD,fruit_blackberry_new.bmp,Old66,fruit,blackberry_new,1,1,3.675,Hit,9,9,2.0,0.552
4,New,sporting_basketball_new.bmp,New14,sporting,basketball_new,2,1,2.303,CR,-1,-1,,
5,New,animal_dolphin_old.bmp,New09,animal,dolphin_old,2,1,1.904,CR,-1,-1,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
113,OLD,kitchen_scrubbing brush.bmp,Old36,kitchen,scrubbing brush,2,0,3.895,Miss,8,-1,0.0,
114,New,animal_ostrich.bmp,New26,animal,ostrich,2,1,0.923,CR,-1,-1,,
115,OLD,vegie_green capsicum.bmp,Old75,vegie,green capsicum,1,1,1.211,Hit,6,5,1.0,0.314
116,OLD,animal_cheetah.bmp,Old02,animal,cheetah,2,0,2.130,Miss,5,-1,0.0,


'../../../data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data/task_files/processed/PostScanBehav_pscid7674650_dccid729722.tsv'

## Create BIDS compliant 'participants.tsv' file

### Althought present in current release, it is safer to generate one here to properly index participants
#### - In previous release cimaq_03-19, participants are labeled according to their 6-digit ID
#### - Creating this sheet allows proper follow-ip between releases

In [6]:
# Strip prefixes and extract pscid & dccid
participants = df((((item[0], item[0].split('_', 1)[1][5:12],
                     item[0].rsplit('_', 1)[1][5:], item[2]) for item in allbehavs)),
                 columns=["_c_behavfilenames", "pscid", "dccid", "c_behavpaths"])

# Add 'newname' columns containing BIDS comliant participant IDs with their 7-digits ID
participants["sub-IDs"] = ["sub-"+row[1]["pscid"] for row in participants.iterrows()]
participants["n_behavfilenames"] = ["sub-"+row[1]["pscid"]+"_ses-4_task-memory_behavioral" for row in participants.iterrows()]
participants = participants.set_index("dccid").sort_index()

# Add current 'events.tsv' files path in a new column 'c_evpaths'
# N.B.: These files use the 6-digits participant IDs
p_events = df((((item[0], item[0].split('sub-', 1)[1][:6], item[2])
                for item in allevents)),
              columns=["c_evfilenames", "dccid", "c_evpaths"]).set_index("dccid").sort_index()
participants = pd.concat([participants, p_events], axis=1)
participants["n_evfilenames"] = [row[1]["c_evfilenames"].replace("sub-"+row[0], "sub-"+row[1]["pscid"])
                                 for row in participants.iterrows()]
display(participants)

Unnamed: 0_level_0,_c_behavfilenames,pscid,c_behavpaths,sub-IDs,n_behavfilenames,c_evfilenames,c_evpaths,n_evfilenames
dccid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
108391,PostScanBehav_pscid4509950_dccid108391,4509950,../../../data/simexp/DATA/cimaq_03-19/derivati...,sub-4509950,sub-4509950_ses-4_task-memory_behavioral,sub-108391_ses-4_task-memory_events,../../../data/simexp/DATA/cimaq_03-19/derivati...,sub-4509950_ses-4_task-memory_events
120839,PostScanBehav_pscid7424803_dccid120839,7424803,../../../data/simexp/DATA/cimaq_03-19/derivati...,sub-7424803,sub-7424803_ses-4_task-memory_behavioral,sub-120839_ses-4_task-memory_events,../../../data/simexp/DATA/cimaq_03-19/derivati...,sub-7424803_ses-4_task-memory_events
122922,PostScanBehav_pscid7874568_dccid122922,7874568,../../../data/simexp/DATA/cimaq_03-19/derivati...,sub-7874568,sub-7874568_ses-4_task-memory_behavioral,sub-122922_ses-4_task-memory_events,../../../data/simexp/DATA/cimaq_03-19/derivati...,sub-7874568_ses-4_task-memory_events
127228,PostScanBehav_pscid3865361_dccid127228,3865361,../../../data/simexp/DATA/cimaq_03-19/derivati...,sub-3865361,sub-3865361_ses-4_task-memory_behavioral,sub-127228_ses-4_task-memory_events,../../../data/simexp/DATA/cimaq_03-19/derivati...,sub-3865361_ses-4_task-memory_events
139593,PostScanBehav_pscid5760364_dccid139593,5760364,../../../data/simexp/DATA/cimaq_03-19/derivati...,sub-5760364,sub-5760364_ses-4_task-memory_behavioral,sub-139593_ses-4_task-memory_events,../../../data/simexp/DATA/cimaq_03-19/derivati...,sub-5760364_ses-4_task-memory_events
...,...,...,...,...,...,...,...,...
979001,PostScanBehav_pscid7516889_dccid979001,7516889,../../../data/simexp/DATA/cimaq_03-19/derivati...,sub-7516889,sub-7516889_ses-4_task-memory_behavioral,sub-979001_ses-4_task-memory_events,../../../data/simexp/DATA/cimaq_03-19/derivati...,sub-7516889_ses-4_task-memory_events
983291,PostScanBehav_pscid3874544_dccid983291,3874544,../../../data/simexp/DATA/cimaq_03-19/derivati...,sub-3874544,sub-3874544_ses-4_task-memory_behavioral,sub-983291_ses-4_task-memory_events,../../../data/simexp/DATA/cimaq_03-19/derivati...,sub-3874544_ses-4_task-memory_events
988602,PostScanBehav_pscid6633412_dccid988602,6633412,../../../data/simexp/DATA/cimaq_03-19/derivati...,sub-6633412,sub-6633412_ses-4_task-memory_behavioral,sub-988602_ses-4_task-memory_events,../../../data/simexp/DATA/cimaq_03-19/derivati...,sub-6633412_ses-4_task-memory_events
996599,PostScanBehav_pscid4576408_dccid996599,4576408,../../../data/simexp/DATA/cimaq_03-19/derivati...,sub-4576408,sub-4576408_ses-4_task-memory_behavioral,sub-996599_ses-4_task-memory_events,../../../data/simexp/DATA/cimaq_03-19/derivati...,sub-4576408_ses-4_task-memory_events


### Create (oldpath, newpath) tuples to rename all files according to BIDS

In [7]:
behav_renamer = tuple(zip(participants["c_behavpaths"],
                          [join(dname(row[1]["c_behavpaths"]), row[1]["n_behavfilenames"]+".tsv")
                           for row in participants.iterrows()]))
ev_renamer = tuple(zip(participants["c_evpaths"],
                       [join(dname(row[1]["c_evpaths"]), row[1]["n_evfilenames"]+".tsv")
                        for row in participants.iterrows()]))
# Showing examples
display(behav_renamer[0], ev_renamer[0])

('../../../data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data/task_files/processed/PostScanBehav_pscid4509950_dccid108391.tsv',
 '../../../data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data/task_files/processed/sub-4509950_ses-4_task-memory_behavioral.tsv')

('../../../data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data/task_files/processed/sub-108391_ses-4_task-memory_events.tsv',
 '../../../data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data/task_files/processed/sub-4509950_ses-4_task-memory_events.tsv')

#### 1. Perform renaming operation using "os.rename"

#### 2. Make new 'confounds' directory in current release (cimaq_20190901)

#### 3. Create "mover" object 
##### - Same as renamer in previous cell, but with updated release directory and file names

#### 4. Move the files to new directory


In [8]:
# [os.rename(item[0], item[1]) for item in behav_renamer]
# [os.rename(item[0], item[1]) for item in ev_renamer]
# os.mkdir("../../data/simexp/DATA/cimaq_20190901/confounds")
# behav_mover = [(item[1], item[1].replace('cimaq_03-19', 'cimaq_20190901', 1)
#                for item in behav_renamer]
# ev_mover = [(item[1], item[1].replace('cimaq_03-19', 'cimaq_20190901', 1)
#             for item in ev_renamer]
# [smv(item[0], item[1]) for item in behav_mover]
# [smv(item[0], item[1]) for item in ev_mover]
# 

#### Finally, create and export "participants.tsv" to new release directory

In [44]:
participants = participants[["sub-IDs", "pscid"]]
display(participants)
participants.to_csv("~/participants.tsv", sep="\t")

Unnamed: 0_level_0,sub-IDs,pscid
dccid,Unnamed: 1_level_1,Unnamed: 2_level_1
108391,sub-4509950,4509950
120839,sub-7424803,7424803
122922,sub-7874568,7874568
127228,sub-3865361,3865361
139593,sub-5760364,5760364
...,...,...
979001,sub-7516889,7516889
983291,sub-3874544,3874544
988602,sub-6633412,6633412
996599,sub-4576408,4576408
