**READ ME (provided by Aitana Grasso Cladera)**  
You will receive 5 .CSV files. Most files’ structure is 88x122 (trials/participants). The first four trials are test trials, then you have 40 valid trials, and then you have again other four trials that are for test since we changed the instruction, then another 40 trials (N trials = 88). Hence, they are excluded from the analysis. 122 corresponds to participants. However, we recorded 61. The doubled number of columns refers to the 2 blocks that each participant performed. For example, column 1 is Participant 1 Block 1, while column 2 is Participant 1 Block 2.
Muscle trial information has already been taken out.

1. pictureSequence = 88x122 (trials/participants). It contains the ID number of each picture in the same sequence that was presented to the participant. ID numbers from 1 to 44 correspond to positive pictures, and from 45 to 88 to negative pictures.
2. blockOrder =  1x122 (block order/participants). It contains a number that represents the order of each block. For numbers 1 and 3, participants did first the Congruent block and then the Incongruent block. For numbers 2 and 4, participants did first the Incongruent block and then the Congruent block.
3. correctReaction = 88x122 (trials/participants). It contains the expected reaction for each trial. The reaction can be either pull or push.
4. actualReaction = 88x122 (trials/participants). It contains the reaction that the participant actually performed. The reaction can be either pull or push.
5. reactionTime = 88x122 (trials/participants). It contains the reaction time in seconds for every trial the participant performs.


In [None]:
### MY CODE

# --- Import Libraries and Data ---

import scipy.io
from google.colab import drive
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

drive.mount('/content/drive')

In [None]:
# Picture Sequence (88x122)
picture_sequence_raw = pd.read_csv('/YOUR_PATH/pictureSequence_Task1Exp1.csv', header=None)
# Block Order (1x122)
block_order_raw = pd.read_csv('/YOUR_PATH/blockOrder_Task1Exp1.csv', header=None)
# Correct Reaction (88x122)
correct_reaction_raw = pd.read_csv('/YOUR_PATH/correctReaction_Task1Exp1.csv', header=None)
# Actual Reaction (88x122)
actual_reaction_raw = pd.read_csv('/YOUR_PATH/actualReaction_Task1Exp1.csv', header=None)
# Reaction Time (88x122)
reaction_time_raw = pd.read_csv('/YOUR_PATH/reactionTime_Task1Exp1.csv', header=None)


In [None]:
# --- Preprocessing the Behavioural Data ---

# 1. Remove test trials

# define rows of test trials
rows_to_drop = [0, 1, 2, 3] + [44, 45, 46, 47]  # since index starts at 0

# drop the test trials and reset indices
picture_sequence = picture_sequence_raw.drop(rows_to_drop, axis=0)
correct_reaction = correct_reaction_raw.drop(rows_to_drop, axis=0)
actual_reaction = actual_reaction_raw.drop(rows_to_drop, axis=0)
reaction_time = reaction_time_raw.drop(rows_to_drop, axis=0)
picture_sequence.reset_index(drop=True, inplace=True)
correct_reaction.reset_index(drop=True, inplace=True)
actual_reaction.reset_index(drop=True, inplace=True)
reaction_time.reset_index(drop=True, inplace=True)


# 2. Set trials with incorrect responses invalid
# NaN in the reaction_time data for each trial where the response of the participant was false

# set reaction times to NaN where actual_reaction and correct_reaction do not align
mismatch_condition = actual_reaction != correct_reaction
reaction_time[mismatch_condition] = np.nan

# save reaction times for distribution graph
reaction_time_step_1 = reaction_time.copy()

# 3. Set trials with very fast reactions (RT <= 150 ms) invalid

# set to NaN
reaction_time[reaction_time <= 0.150] = np.nan


In [None]:
# 4. Compute accuracy of responses for each participant and exclude participants with accuracy < 90%

# initialize lists to save accuracies and blocks to be excluded
accuracies = []
excluded_block_nr = []

# iterate over participants (by iterating over blocks with step size 2)
for column in range(0, reaction_time.shape[1], 2):

    # extract the two blocks for the participant
    block1_data = reaction_time.iloc[:, column]
    block2_data = reaction_time.iloc[:, column + 1]

    # get the total number of trials per participant (2 blocks * 80 trials)
    total_trials = len(reaction_time)*2

    # count the total number of correct and valid trials (non-NaN)
    correct_trials = block1_data.notna().sum() + block2_data.notna().sum()

    # calculate accuracy of participant
    accuracy_percentage = (correct_trials / total_trials) * 100

    # save the accuracies from all participants that will not be excluded
    # participant 22 and 61 will be excluded further down and therefore not considered for accuracies
    if accuracy_percentage >= 90 and column not in {42, 43, 120, 121}:
      accuracies.append(accuracy_percentage)

    # check if the accuracy is below 90%
    if accuracy_percentage < 90:
        # if performance was bad the blocks of the participant get saved to be excluded
        excluded_block_nr.append(column)
        excluded_block_nr.append(column + 1)

# furthermore add participant 22 and 61 to excluded blocks because of bad performance
# in the EyeTracking Task
excluded_block_nr.append((22-1)*2)
excluded_block_nr.append((22-1)*2+1)
excluded_block_nr.append((61-1)*2)
excluded_block_nr.append((61-1)*2+1)

# exclude respective participants from picture_sequence and reaction_time
picture_sequence = picture_sequence.drop(excluded_block_nr, axis=1)
reaction_time = reaction_time.drop(excluded_block_nr, axis=1)
block_order = block_order_raw.drop(excluded_block_nr, axis=1)

# save reaction times for distribution graph
reaction_time_step_2 = reaction_time.copy()

# output the overall accuracy of all included participants
print("Total accuracy across all participants: ", sum(accuracies) / len(accuracies))

In [None]:
# 5. Log-transform the data using log10.

# save log10 transformed and untransformed reaction times
reaction_time_log_transformed = reaction_time.applymap(
    lambda x: np.log10(x) if pd.notna(x) else np.nan)

reaction_time_untransformed = reaction_time.copy()


In [None]:
# --- Plotting the Distribution across Preprocessing Steps ---

# flatten and clean the data from different preprocessing steps
reaction_time_step_1_flat = reaction_time_step_1.values.flatten()
reaction_time_step_1_flat = reaction_time_step_1_flat[~np.isnan(reaction_time_step_1_flat)]

reaction_time_step_2_flat = reaction_time_step_2.values.flatten()
reaction_time_step_2_flat = reaction_time_step_2_flat[~np.isnan(reaction_time_step_2_flat)]

reaction_time_log_flat = reaction_time_log_transformed.values.flatten()
reaction_time_log_flat = reaction_time_log_flat[~np.isnan(reaction_time_log_flat)]

# create the plot
fig, ax1 = plt.subplots(figsize=(10, 6))

# plot the data after the different preprocessing steps
sns.histplot(reaction_time_step_1_flat, kde=True, bins=100, color='blue', alpha=0.7,label='Step 1: excluding test trials and incorrect responses', ax=ax1)
sns.histplot(reaction_time_step_2_flat, kde=True, bins=100, color='aqua', alpha=0.7,label='Step 2: excluding responses <=150ms & participants with accuracy < 90%', ax=ax1)

# label the plot
ax1.set_xlabel("Reaction Time (s)", fontsize = 13)
ax1.set_ylabel("Frequency", fontsize = 13)
ax1.legend(loc="lower right")

# create an independent axis for the log-transformed data
ax4 = fig.add_axes([0.47, 0.45, 0.4, 0.4])
ax4.set_xlim(-0.8, 0.8)
ax4.set_ylim(0, 2500)

# plot the log-transformed data
sns.histplot(reaction_time_log_flat, kde=True, bins=35, color='teal', alpha=0.7, label='Step 3: log-transformation (log10)', ax=ax4)

# label the second smaller plot
ax4.set_xlabel("Log-transformed Reaction Time", fontsize=13)
ax4.set_ylabel("Frequency", fontsize=13)
ax4.legend(fontsize=10)

plt.show()


In [None]:
# --- Save the Data in a Data Frame ---

# prepare a list to store DataFrame rows
data_frame_BD = []

# get arousal ratings
arousal_ratings = (
    pd.read_csv('/YOUR_PATH/median_arousal_ratings.csv')
)
arousal_dict = dict(zip(arousal_ratings['img_id'], arousal_ratings['arousal_levels']))

# get participant ids
participants = reaction_time_untransformed.columns // 2 + 1

# go through every block
for block in range(reaction_time.shape[1]):

    # get the current participant id and the currect block order
    participant_id = participants[block]
    block_type = block_order.iloc[0, block]

    # derive from block_type what the order of conditions was
    if block_type in [1, 3]:
        congruence_labels = (
            ['congruent'] * int(reaction_time.shape[0]/2) +
            ['incongruent'] * int(reaction_time.shape[0]/2)
        )
    else:
        congruence_labels = (
            ['incongruent'] * int(reaction_time.shape[0]/2) +
            ['congruent'] * int(reaction_time.shape[0]/2)
        )

    # save data in data frame by adding each trial as a row
    for trial in range(reaction_time.shape[0]):
        data_frame_BD.append({
            'participant_id': participant_id,
            'rt': reaction_time_untransformed.iloc[trial, block],
            'log-transformed rt': reaction_time_log_transformed.iloc[trial, block],
            'picture_id': picture_sequence.iloc[trial, block],
            'condition': congruence_labels[trial],
            'valence': ('positive' if 1 <= picture_sequence.iloc[trial, block] <= 44 else 'negative'),
            'arousal': arousal_dict.get(picture_sequence.iloc[trial, block])
        })

# save as a data frame
df_BD = pd.DataFrame(data_frame_BD)

# save the data frame as a csv file
#df_BD.to_csv("/YOUR_PATH/BehaviouralData_T1.csv", index=False)


In [None]:
# --- Descriptive Plots ---

# compute the mean reaction times and standard deviations for different
# Valence-Condition combinations
valence_condition_combinations = df_BD.groupby(['valence', 'condition'])['rt'].agg(['mean', 'std']).reset_index()

# adjust the order of combinations
valence_condition_combinations = valence_condition_combinations.sort_values(['condition', 'valence'], ascending=[True, False])


# save means and standard deviations
conditions = ['congruent positive', 'congruent negative', 'incongruent positive', 'incongruent negative']
means = valence_condition_combinations['mean']
std_devs = valence_condition_combinations['std']

# plot the different mean reaction times
plt.figure(figsize=(10, 6))
bars = plt.bar(conditions, means, yerr=std_devs, capsize=5,
               color=['lightgreen', 'lightpink', 'seagreen', 'crimson'],
               label=conditions)

# add values for means and standard deviations to the bars
for i, (bar, mean, std_dev) in enumerate(zip(bars, means, std_devs)):
    plt.text(
        bar.get_x() + bar.get_width()/2,
        mean,
        f"{mean:.2f}\n±{std_dev:.2f}",
        ha="center",
        va="center",
        fontsize=12,
        color="black",
        fontweight="bold",
        bbox=dict(boxstyle="round,pad=0.3", facecolor="white", edgecolor="none",
        alpha=0.7)
    )

# define plot labels and design details
plt.ylabel('Mean Reaction Time (s)', fontsize = 13)
plt.xticks([], rotation=45)
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.legend()
plt.tight_layout()

plt.show()


In [None]:

# compute the mean reaction times for different Valence-Arousal combinations
valence_arousal_combinations = (
    df_BD.groupby(['valence', 'arousal'])['rt'].agg(['mean', 'std']).reset_index()
)

# create a data frame with all combinations for Valence and Arousal
all_combinations = {
    'arousal': [2, 3, 4, 5, 6, 7, 8] * 2,
    'valence': np.append(['positive'] * 7, ['negative'] * 7)
}
all_combs = pd.DataFrame(all_combinations)

# merge both frames together for plotting
df_complete = pd.merge(all_combs, valence_arousal_combinations,
                       on=['arousal', 'valence'], how='left')

# split up into positive and negative for color-coding in the plot
pos_data = df_complete[df_complete['valence'] == 'positive']
neg_data = df_complete[df_complete['valence'] == 'negative']

# create subplots
fig = plt.subplots(figsize=(12, 8))

# define the positions of the bars
bar_position_positive = np.arange(len(pos_data))
bar_position_negative = [x + 0.3 for x in bar_position_positive]

# plot the positive and negative bars
plt.bar(bar_position_positive, pos_data['mean'], color='g', width=0.3, label='positive')
plt.bar(bar_position_negative, neg_data['mean'], color='r', width=0.3, label='negative')

# define plot labels and design details
plt.xlabel('Arousal', fontsize=15)
plt.ylabel('Mean Reaction Time (s)', fontsize=15)
plt.xticks(np.arange(0.15, 7.15, step=1), labels = pos_data['arousal'], fontsize=12)
plt.yticks(fontsize=12, ticks = [0, 0.25, 0.5, 0.75, 1, 1.25])
plt.legend(loc='upper right', fontsize=12)
plt.grid(axis="y", linestyle="--", alpha=0.7)

plt.show()


In [None]:
# --- Get Number of excluded trials for Valence-Condition Combinations ---

# count NaN values for each Valence-Condition combination
valence_condition_nans = (
    df_BD.groupby(['valence', 'condition'])['rt']
    .apply(lambda x: x.isna().sum()).reset_index()
)
valence_condition_nans.columns = ['valence', 'condition', 'NaNs'] # rename


# get number of total trials for each Valence-Condition combination
amount_trials = df_BD.groupby(['valence', 'condition'])

# based on these numbers the percentages of NaN-trials were computed
print(valence_condition_nans)
print(amount_trials.size())