<a href="https://colab.research.google.com/github/albey-code/hippoabstraction/blob/main/events_tsv_conversion_np.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Note:** np in the title of this notebook means **no patch**. This is merely for the `_patch_0.txt` files!

This notebook serves to convert all event files in .txt format into .tsv format for subject 4. I will write a function to implement this for all *N* = 23 subjects from Garvert et al. (2017) later. Importantly, .tsv format is in line with Brain Imaging Data Structure (BIDS) conventions and better for open-science and reproducibility.

In [24]:
from google.colab import files
import zipfile, os, glob, io
import pandas as pd

# Run 1 Subject 04

In [25]:
uploaded = files.upload()

Saving subject4_run1_np.zip to subject4_run1_np.zip


In [26]:
# Get the name dynamically
zip_filename = list(uploaded.keys())[0]

In [27]:
# Unzip
unzip_dir = "unzipped_run1"
# Clear the unzip directory if it exists
if os.path.exists(unzip_dir):
    for root, dirs, files in os.walk(unzip_dir, topdown=False):
        for name in files:
            os.remove(os.path.join(root, name))
        for name in dirs:
            os.rmdir(os.path.join(root, name))
os.makedirs(unzip_dir, exist_ok=True)
with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
    zip_ref.extractall(unzip_dir)

print(os.listdir(unzip_dir))  # Sanity check

['Subj_4_run_1_obj_2_patch_0.txt', 'Subj_4_run_1_obj_9_patch_0.txt', 'Subj_4_run_1_obj_6_patch_0.txt', 'Subj_4_run_1_obj_8_patch_0.txt', 'Subj_4_run_1_obj_4_patch_0.txt', 'Subj_4_run_1_obj_7_patch_0.txt', 'Subj_4_run_1_obj_10_patch_0.txt', 'Subj_4_run_1_button.txt']


In [28]:
stim_rows = []
button_rows = []

In [29]:
for filepath in glob.glob(f"{unzip_dir}/*.txt"):
    filename = os.path.basename(filepath)

    if "button" in filename.lower():
        # Button file: one column called 'button'
        df = pd.read_csv(filepath, sep='\s+', skiprows=1, names=["button"])
        df["onset"] = df["button"]
        df["duration"] = 0.0
        df["trial_type"] = "button_press"
        button_rows.append(df[["onset", "duration", "trial_type"]])

    else:
        # Object stimulus files
        df = pd.read_csv(filepath, sep='\s+', skiprows=1, names=["object", "patch", "onset", "prevObject", "dist"])
        df["duration"] = 0.0

        def label_trial(row):
            if pd.isna(row["object"]):
                return "other"
            obj = int(row["object"])
            if obj in [2, 4, 6, 7, 8, 9, 10]:
                return f"object_{obj}"
            elif row["patch"] == 0:          # I removed the elif row ["patch"] == 1 statement as it doesn't apply here
                return "no_patch_all"
            else:
                return "other"

        df["trial_type"] = df.apply(label_trial, axis=1)
        stim_rows.append(df[["onset", "duration", "trial_type"]])

  df = pd.read_csv(filepath, sep='\s+', skiprows=1, names=["button"])
  df = pd.read_csv(filepath, sep='\s+', skiprows=1, names=["object", "patch", "onset", "prevObject", "dist"])


In [30]:
# Merge both types of events
stim_events = pd.concat(stim_rows, ignore_index=True)
button_events = pd.concat(button_rows, ignore_index=True)
events = pd.concat([stim_events, button_events], ignore_index=True).sort_values("onset")

In [31]:
# Final preview
print(events["trial_type"].value_counts())

trial_type
button_press    41
object_10       26
object_8        23
object_4        23
object_6        22
object_7        20
object_2        19
object_9        18
Name: count, dtype: int64


In [22]:
events

Unnamed: 0,onset,duration,trial_type
148,3.061,0.0,button_press
149,4.845,0.0,button_press
150,7.131,0.0,button_press
60,11.690,0.0,object_6
18,16.730,0.0,object_4
...,...,...,...
147,1312.519,0.0,object_2
79,1316.579,0.0,object_6
59,1320.639,0.0,object_7
190,1326.842,0.0,button_press


In [32]:
# Save to new _events.tsv
events.to_csv("sub04_run1_events_np.tsv", sep="\t", index=False)

In [33]:
# Downloading them locally
from google.colab import files

files.download("sub04_run1_events_np.tsv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Run 2 Subject 04

In [34]:
uploaded = files.upload()

Saving subject4_run2_np.zip to subject4_run2_np.zip


In [35]:
# Get the name dynamically
zip_filename = list(uploaded.keys())[0]

In [36]:
# Unzip
unzip_dir = "unzipped_run2"
# Clear the unzip directory if it exists
if os.path.exists(unzip_dir):
    for root, dirs, files in os.walk(unzip_dir, topdown=False):
        for name in files:
            os.remove(os.path.join(root, name))
        for name in dirs:
            os.rmdir(os.path.join(root, name))
os.makedirs(unzip_dir, exist_ok=True)
with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
    zip_ref.extractall(unzip_dir)

print(os.listdir(unzip_dir))  # Sanity check

['Subj_4_run_2_obj_6_patch_0.txt', 'Subj_4_run_2_obj_8_patch_0.txt', 'Subj_4_run_2_obj_10_patch_0.txt', 'Subj_4_run_2_obj_9_patch_0.txt', 'Subj_4_run_2_button.txt', 'Subj_4_run_2_obj_2_patch_0.txt', 'Subj_4_run_2_obj_7_patch_0.txt', 'Subj_4_run_2_obj_4_patch_0.txt']


In [37]:
stim2_rows = []     # Add the 2 for run 2
button2_rows = []   # Add the 2 for run 2

In [38]:
for filepath in glob.glob(f"{unzip_dir}/*.txt"):
    filename = os.path.basename(filepath)

    if "button" in filename.lower():
        # Button file: one column called 'button'
        df = pd.read_csv(filepath, sep='\s+', skiprows=1, names=["button"])
        df["onset"] = df["button"]
        df["duration"] = 0.0
        df["trial_type"] = "button_press"
        button2_rows.append(df[["onset", "duration", "trial_type"]])

    else:
        # Object stimulus files
        df = pd.read_csv(filepath, sep='\s+', skiprows=1, names=["object", "patch", "onset", "prevObject", "dist"])
        df["duration"] = 0.0

        def label_trial(row):
            if pd.isna(row["object"]):
                return "other"
            obj = int(row["object"])
            if obj in [2, 4, 6, 7, 8, 9, 10]:
                return f"object_{obj}"
            elif row["patch"] == 0:            # I removed the elif row ["patch"] == 1 statement as it doesn't apply here
                return "no_patch_all"
            else:
                return "other"

        df["trial_type"] = df.apply(label_trial, axis=1)
        stim2_rows.append(df[["onset", "duration", "trial_type"]])

  df = pd.read_csv(filepath, sep='\s+', skiprows=1, names=["button"])
  df = pd.read_csv(filepath, sep='\s+', skiprows=1, names=["object", "patch", "onset", "prevObject", "dist"])


In [39]:
# Merge both types of events
stim2_events = pd.concat(stim2_rows, ignore_index=True)
button2_events = pd.concat(button2_rows, ignore_index=True)
events_2 = pd.concat([stim2_events, button2_events], ignore_index=True).sort_values("onset")

In [40]:
# Final preview
print(events_2["trial_type"].value_counts())

trial_type
button_press    42
object_9        26
object_6        25
object_10       23
object_2        22
object_7        22
object_4        21
object_8        20
Name: count, dtype: int64


In [41]:
events_2

Unnamed: 0,onset,duration,trial_type
0,3.640,0.0,object_6
138,6.790,0.0,object_4
116,19.040,0.0,object_7
45,22.120,0.0,object_10
1,24.150,0.0,object_6
...,...,...,...
158,1330.369,0.0,object_4
198,1333.976,0.0,button_press
199,1341.965,0.0,button_press
115,1346.679,0.0,object_2


In [42]:
# Save to new _events.tsv
events_2.to_csv("sub04_run2_events_np.tsv", sep="\t", index=False)

In [43]:
# Downloading them locally
from google.colab import files

files.download("sub04_run2_events_np.tsv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Run 3 Subject 04

In [44]:
uploaded = files.upload()

Saving subject4_run3_np.zip to subject4_run3_np.zip


In [45]:
# Get the name dynamically
zip_filename = list(uploaded.keys())[0]

In [46]:
# Unzip
unzip_dir = "unzipped_run3"
# Clear the unzip directory if it exists
if os.path.exists(unzip_dir):
    for root, dirs, files in os.walk(unzip_dir, topdown=False):
        for name in files:
            os.remove(os.path.join(root, name))
        for name in dirs:
            os.rmdir(os.path.join(root, name))
os.makedirs(unzip_dir, exist_ok=True)
with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
    zip_ref.extractall(unzip_dir)

print(os.listdir(unzip_dir))  # Sanity check

['Subj_4_run_3_obj_7_patch_0.txt', 'Subj_4_run_3_button.txt', 'Subj_4_run_3_obj_8_patch_0.txt', 'Subj_4_run_3_obj_9_patch_0.txt', 'Subj_4_run_3_obj_10_patch_0.txt', 'Subj_4_run_3_obj_6_patch_0.txt', 'Subj_4_run_3_obj_2_patch_0.txt', 'Subj_4_run_3_obj_4_patch_0.txt']


In [47]:
stim3_rows = []     # Add the 3 for run 3
button3_rows = []   # Add the 3 for run 3

In [48]:
for filepath in glob.glob(f"{unzip_dir}/*.txt"):
    filename = os.path.basename(filepath)

    if "button" in filename.lower():
        # Button file: one column called 'button'
        df = pd.read_csv(filepath, sep='\s+', skiprows=1, names=["button"])
        df["onset"] = df["button"]
        df["duration"] = 0.0
        df["trial_type"] = "button_press"
        button3_rows.append(df[["onset", "duration", "trial_type"]])

    else:
        # Object stimulus files
        df = pd.read_csv(filepath, sep='\s+', skiprows=1, names=["object", "patch", "onset", "prevObject", "dist"])
        df["duration"] = 0.0

        def label_trial(row):
            if pd.isna(row["object"]):
                return "other"
            obj = int(row["object"])
            if obj in [2, 4, 6, 7, 8, 9, 10]:
                return f"object_{obj}"
            elif row["patch"] == 0:            # I removed the elif row ["patch"] == 1 statement as it doesn't apply here
                return "no_patch_all"
            else:
                return "other"

        df["trial_type"] = df.apply(label_trial, axis=1)
        stim3_rows.append(df[["onset", "duration", "trial_type"]])

  df = pd.read_csv(filepath, sep='\s+', skiprows=1, names=["button"])
  df = pd.read_csv(filepath, sep='\s+', skiprows=1, names=["object", "patch", "onset", "prevObject", "dist"])


In [49]:
# Merge both types of events
stim3_events = pd.concat(stim3_rows, ignore_index=True)
button3_events = pd.concat(button3_rows, ignore_index=True)
events_3 = pd.concat([stim3_events, button3_events], ignore_index=True).sort_values("onset")

In [50]:
# Final preview
print(events_3["trial_type"].value_counts())

trial_type
button_press    42
object_6        24
object_4        23
object_9        23
object_10       23
object_7        21
object_8        21
object_2        20
Name: count, dtype: int64


In [51]:
events_3

Unnamed: 0,onset,duration,trial_type
0,4.342,0.0,object_7
42,8.400,0.0,object_9
21,13.440,0.0,object_8
155,17.083,0.0,button_press
132,17.640,0.0,object_4
...,...,...,...
154,1322.318,0.0,object_4
194,1326.945,0.0,button_press
131,1333.658,0.0,object_2
195,1338.258,0.0,button_press


In [52]:
# Save to new _events.tsv
events_3.to_csv("sub04_run3_events_np.tsv", sep="\t", index=False)

In [53]:
# Downloading them locally
from google.colab import files

files.download("sub04_run3_events_np.tsv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>