# Block sequence generation for study
## Criteria that each participant's run must fulfill
- Each participant sees 50% of all snippets, 50% obf and 50% clean
- Each participant sees all in all 72 snippets (for an overview over all snippets see [snippets.csv](../01-Data-Code_Snippets/snippets.csv))
- No participant can see both the clean and the obf variant of a snippet (no matter the version)
- The Snippets are divided into three blocks, according to their version. For simplification purposes, just assign v0 to block 1, v1 to block 2 and v2 to block 3.
    - These blocks are shown to the participant in three sessions, with breaks in-between, to reduce a possible learning effect.
    - The order of the blocks to show, as well as the order of the snippets in the block, must be randomized.
    - For each snippet, across all versions, a participant sees only the clean or the obf variant, not mixed (x1_clean_v0, x1_clean_v1, x1_clean_v2 or x1_obf_v0, x1_obf_v1, x1_obf_v2).
    - The 1 last snippets of a block and the 1 first snippets of the next block should not be identical (ignoring the versions), to reduce a possible learning effect.
    - For each sequence in a block:
        - Not more than 3 obf or 3 clean snippets in a row
    - For some snippets that are very similar, there exist additional constraints for those snippets and versions within a block:
        - Between 7, 8 and 9, who look rather similar, there must be at least 3 distance between either.
        - Between 10, 11 and 12, who look rather similar, there must be at least 3 distance between either.
        - Between 13, 14 and 15, who look rather similar, there must be at least 3 distance between either.

- We need generate 24 balanced runs:
    - factor 6 by varying block order (123, 132, 213, 231, 312, 321)
        - counterbalancing strategies
    - factor 2 by changing variant (clean / obf)
    - All snippets should be distributed across their block and in total (so that not the same snippet / version is always in the beginning or at the end, and that possible learning effects between snippets vary).

- The image files don't have to be copied, we'll store them in a folder in psychopy (from [Image Folder](../03-Data-Code_Snippet_Images/Pictures/)) and use the herein generated file to select the suitable file for each round from this folder.
- For each participant, write three csv files according to our specifications (for each block one) so that we can put a calibration between the blocks, for a better data quality:
    - Snippet Name
    - image path (constant depending on snippet name)
    - answers in 3 separate columns
    - correct answer (button if known, otherwise string)
    - break duration after snippet has been finished (current idea: 2s for the first seven, then 1 min break after the eighth one, and repeat)
    - event code to send to eeg to signal the begin of snippet view (2 codes for clean and obf, current idea is to take 1 for obf and 2 for clean)
- an example of what a file should look like is given in [example.csv](./example.csv), however therein all snippets are present. This file can also be used to test the psychopy script.

In [1]:
import os
import random as rd
import csv
import shutil
import pandas as pd
from typing import Any
from pathlib import Path

In [2]:
SNIPPETS_FILE = f'../01-Data-Code_Snippets/snippets.csv'
SNIPPETS_PICTURES_FOLDER = f'../03-Data-Code_Snippet_Images/Pictures/'
RUN_DATA = f'../05-Data-Trial_Runs/'

CLEAN = 'clean'
OBF = 'obf'
# Columns for temporary sequence file
SEQUENCE_BLOCK = 'Block'
SEQUENCE_IN_BLOCK = 'In-Block'
SEQUENCE_TOTAL = 'Total'

# Columns for condition files
SNIPPET_NAME = 'SnippetName'
IMAGE_PATH = 'ImagePath'
BREAK_LENGTH = 'break_length'
CORRECT_ANSWER = 'correct'
START_EVENT = 'start_event'
ANSWER_1, ANSWER_2, ANSWER_3 = 'answer1', 'answer2', 'answer3'
LONGER_SHORT_BREAK_ROUND = 8
TOTAL_PARTICIPANTS = 24

os.makedirs(RUN_DATA, exist_ok=True)

## Prepare snippet order for pilot study

In [3]:
# Get snippets from images and create tuples for blocks
# Read files -> save the names in 3 lists v0, v1 and v2 with (obf, clean) tuples
raw_v0 = os.listdir(f'{SNIPPETS_PICTURES_FOLDER}/v0/')
raw_v1 = os.listdir(f'{SNIPPETS_PICTURES_FOLDER}/v1/')
raw_v2 = os.listdir(f'{SNIPPETS_PICTURES_FOLDER}/v2/')

# sort the lists depending on the base 10 value of the first part of the file name, if nothing is there, it will be sorted to the beginning
raw_v0.sort(key=lambda x: int(
    x.split('-')[0]) if x.split('-')[0].isdigit() else -1)
raw_v1.sort(key=lambda x: int(
    x.split('-')[0]) if x.split('-')[0].isdigit() else -1)
raw_v2.sort(key=lambda x: int(
    x.split('-')[0]) if x.split('-')[0].isdigit() else -1)


def is_tuple_clean_obf(a, b) -> bool:
    a_split = a.split('-')
    b_split = b.split('-')
    # '4-clean-v0.png' -> 4;clean;v0.png
    # number and version should be the same, the variant may differ
    return (a_split[0] == b_split[0]) and (a_split[2] == b_split[2]) and (a_split[1] == 'clean' and b_split[1] == 'obf')


v0_data = []
v1_data = []
v2_data = []

# arrange in tuples
for x in range(0, len(raw_v0)-1):
    if is_tuple_clean_obf(raw_v0[x], raw_v0[x+1]):
        v0_data.append({CLEAN: raw_v0[x], OBF: raw_v0[x+1]})
if len(v0_data)*2 != len(raw_v0):
    print('Missing Values v0')
for x in range(0, len(raw_v1)-1):
    if is_tuple_clean_obf(raw_v1[x], raw_v1[x+1]):
        v1_data.append({CLEAN: raw_v1[x], OBF: raw_v1[x+1]})
if len(v1_data)*2 != len(raw_v1):
    print('Missing Values v1')
for x in range(0, len(raw_v2)-1):
    if is_tuple_clean_obf(raw_v2[x], raw_v2[x+1]):
        v2_data.append({CLEAN: raw_v2[x], OBF: raw_v2[x+1]})
if len(v2_data)*2 != len(raw_v2):
    print('Missing Values v2')

### Partition randomly into two subsets for each version

In [4]:
# For each tuple randomly decide in which list obf or clean will be used for "p1" or "p2"
def partition_data_tuples(v0_data: list[tuple[str, str]], v1_data: list[tuple[str, str]], v2_data: list[tuple[str, str]]):
    """"""
    # tuple content order: clean, obf
    v0_part1 = {CLEAN: [], OBF: []}
    v0_part2 = {CLEAN: [], OBF: []}
    v1_part1 = {CLEAN: [], OBF: []}
    v1_part2 = {CLEAN: [], OBF: []}
    v2_part1 = {CLEAN: [], OBF: []}
    v2_part2 = {CLEAN: [], OBF: []}

    random = []
    for x in range(0, int(len(v0_data)/2)):
        random.append(1)
        random.append(0)
    rd.shuffle(random)
    print(random)

    for x in range(0, len(v0_data)):
        # choice = rd.choice([0,1])
        trial_aspect_flag = random[x]
        a, b = (CLEAN, OBF) if trial_aspect_flag == 1 else (OBF, CLEAN)
        v0_part1[a].append(v0_data[x][a])
        v0_part2[b].append(v0_data[x][b])
        v1_part1[a].append(v1_data[x][a])
        v1_part2[b].append(v1_data[x][b])
        v2_part1[a].append(v2_data[x][a])
        v2_part2[b].append(v2_data[x][b])
    print(v0_part1)

    return (v0_part1, v1_part1, v2_part1), (v0_part2, v1_part2, v2_part2)


# (v0_part1, v1_part1, v2_part1), (v0_part2, v1_part2,
#                                  v2_part2) = partition_data_tuples(v0_data, v1_data, v2_data)

### Create trial sequence from a single block

In [5]:
# Create a trial run from 1 block of data
def generate_sequence_from_block_data(block_data: list[str]):
    count_assigned_obf_trials = 0
    count_assigned_clean_trials = 0
    amount_of_trials_to_add = 0
    sequence = []
    trial_aspect_flag = rd.choice([0, 1])

    clean_trials = block_data[CLEAN].copy()
    obf_trials = block_data[OBF].copy()
    rd.shuffle(clean_trials), rd.shuffle(obf_trials)

    while len(clean_trials) + len(obf_trials) > 0:
        # switch trial flag
        trial_aspect_flag = CLEAN if trial_aspect_flag == OBF else OBF
        # calculate amount to use
        if len(clean_trials) == 0 and trial_aspect_flag == CLEAN:
            continue
        elif len(obf_trials) == 0 and trial_aspect_flag == OBF:
            continue
        elif len(clean_trials) < 3 and trial_aspect_flag == CLEAN:
            amount_of_trials_to_add = min(1, len(clean_trials))
        elif len(obf_trials) < 3 and trial_aspect_flag == OBF:
            amount_of_trials_to_add = min(1, len(obf_trials))
        elif count_assigned_clean_trials - count_assigned_obf_trials >= 3 and trial_aspect_flag == OBF:
            amount_of_trials_to_add = 3
        elif count_assigned_obf_trials - count_assigned_clean_trials >= 3 and trial_aspect_flag == CLEAN:
            amount_of_trials_to_add = 3
        else:
            amount_of_trials_to_add = rd.choice([1, 2, 3])

        # add amount to trials
        if trial_aspect_flag == CLEAN:
            sequence.extend(clean_trials[:amount_of_trials_to_add])
            clean_trials = clean_trials[amount_of_trials_to_add:]
            count_assigned_clean_trials += amount_of_trials_to_add
        else:
            sequence.extend(obf_trials[:amount_of_trials_to_add])
            obf_trials = obf_trials[amount_of_trials_to_add:]
            count_assigned_obf_trials += amount_of_trials_to_add

    return sequence
# generate_sequence_from_block_data(v0_part1)

### Check task constraints

In [6]:
# Check if more than 3 obf or clean in a row -> if yes, output error message
def check_three_in_a_row(sequence: list[str]):
    count_obf = 0
    count_clean = 0
    for x in sequence:
        if x.split('-')[1] == OBF:
            count_obf += 1
            count_clean = 0
            if count_obf > 3:
                print('Error: more than 3x OBF task')
                return False
        else:
            count_clean += 1
            count_obf = 0
            if count_clean > 3:
                print('Error: more than 3x CLEAN task')
                return False
    return True

In [None]:
# Check if the last snippets of v0_p1 occur also in the first 8 of v1_p1  -> if yes, output error message
# as the grouping of the versions is kept intact, this simplified check ensures that the same snippets in different versions have more than 7 distance
def check_distance_one(sequence: list[str]):
    last_one = []
    for x in sequence:
        nr = x.split('-')[0]
        if nr in last_one:
            print('Error: Tasks ordering, not enough distance')
            return False
        if len(last_one) == 1:
            last_one = last_one[1:]
        last_one.append(nr)
    return True

In [8]:
# Check additional constraints for snippets and versions:
# - Between 7, 8 and 9, who look rather similar, there must be at least 3 distance between either.
# - Between 10, 11 and 12, who look rather similar, there must be at least 3 distance between either.
# - Between 13, 14 and 15, who look rather similar, there must be at least 3 distance between either.
# - Between 55, 56 and 57, who look rather similar, there must be at least 3 distance between either.
def check_additional_constraints(seqeunce: list[str]):
    # Define groups of similar tasks
    similar_groups = [
        ('7', '8', '9'),
        ('10', '11', '12'),
        ('13', '14', '15'),
        ('55', '56', '57')
    ]

    for group in similar_groups:
        indexes = [i for i, task in enumerate(
            seqeunce) if task.split('-')[0] in group]
        if len(indexes) < 2:
            continue
        for i in range(len(indexes)):
            for j in range(len(indexes)):
                if i == j:
                    continue
                if abs(indexes[i] - indexes[j]) < 3:
                    print('Error: Tasks ordering, not enough distance')
                    return False
    return True

## Generate trial sequences for participant and check constraints

In [None]:
# generate the total sequence from the three blocks
def generate_valid_total_sequence(block_data_1: list[str], block_data_2: list[str], block_data_3: list[str], index: int):
    all_sequences_finalized = False
    while not all_sequences_finalized:
        all_sequences = []
        for block_data in [block_data_1, block_data_2, block_data_3]:
            sequence = generate_sequence_from_block_data(block_data)
            sequence_finalized = False
            while not sequence_finalized:
                if check_three_in_a_row(sequence) and check_additional_constraints(sequence):
                    all_sequences.append(sequence)
                    sequence_finalized = True
                else:
                    sequence = generate_sequence_from_block_data(block_data)
        complete_sequence = [
            item for sequence in all_sequences for item in sequence]
        block_sequence = [
            i for i, sequence in enumerate(all_sequences) for _ in sequence]
        in_block_sequence = [
            j for sequence in all_sequences for j, _ in enumerate(sequence)]
        across_block_sequence = [i for i, _ in enumerate(complete_sequence)]

        if check_three_in_a_row(complete_sequence) and check_distance_one(complete_sequence):
            all_sequences_finalized = True

    df = pd.DataFrame.from_dict(
        {SNIPPET_NAME: complete_sequence, SEQUENCE_BLOCK: block_sequence, SEQUENCE_IN_BLOCK: in_block_sequence, SEQUENCE_TOTAL: across_block_sequence})
    df.to_csv(f'{RUN_DATA}/p{str(index).zfill(3)}.csv', index=False)
    return {index: [pd.DataFrame(sequence, columns=[SNIPPET_NAME]) for sequence in all_sequences]}


# generate_valid_total_sequence(v0_part1, v1_part1, v2_part1, 1)

## Generate multiple trial sequences for participants balancing the blocks

In [10]:
def generate_pair_sequence_runs(block_1_data: list[str], block_2_data: list[str], block_3_data: list[str],
                                starting_index: list[int]):
    ((block_1_data_1, block_2_data_1, block_3_data_1),
     (block_1_data_2, block_2_data_2, block_3_data_2)
     ) = partition_data_tuples(block_1_data, block_2_data, block_3_data)


    participant_sequences = {} | generate_valid_total_sequence(block_1_data_1, block_2_data_1, block_3_data_1, starting_index[0]) | generate_valid_total_sequence(
        block_1_data_2, block_2_data_2, block_3_data_2, starting_index[0]+1)
    starting_index[0] += 2

    return participant_sequences


# participant_sequences = generate_multiple_sequence_runs(

#     v0, v1, v2, 1)

In [11]:
starting_index = [1]
participant_sequences = {}
# for i in range(int(TOTAL_PARTICIPANTS/12)):
#     participant_sequences |= generate_pair_sequence_runs(v0_data, v1_data, v2_data, starting_index)
#     participant_sequences |= generate_pair_sequence_runs(v0_data, v2_data, v1_data, starting_index)
#     participant_sequences |= generate_pair_sequence_runs(v1_data, v0_data, v2_data, starting_index)
#     participant_sequences |= generate_pair_sequence_runs(v1_data, v2_data, v0_data, starting_index)
#     participant_sequences |= generate_pair_sequence_runs(v2_data, v0_data, v1_data, starting_index)
#     participant_sequences |= generate_pair_sequence_runs(v2_data, v1_data, v0_data, starting_index)

# participant_sequences |= generate_pair_sequence_runs(v0_data, v1_data, v2_data, starting_index)
starting_index[0] += 2
participant_sequences |= generate_pair_sequence_runs(
    v0_data, v2_data, v1_data, starting_index)
participant_sequences |= generate_pair_sequence_runs(
    v1_data, v0_data, v2_data, starting_index)
participant_sequences |= generate_pair_sequence_runs(
    v1_data, v2_data, v0_data, starting_index)
participant_sequences |= generate_pair_sequence_runs(
    v2_data, v0_data, v1_data, starting_index)
participant_sequences |= generate_pair_sequence_runs(
    v2_data, v1_data, v0_data, starting_index)
participant_sequences |= generate_pair_sequence_runs(
    v0_data, v1_data, v2_data, starting_index)
participant_sequences |= generate_pair_sequence_runs(
    v0_data, v2_data, v1_data, starting_index)
participant_sequences |= generate_pair_sequence_runs(
    v1_data, v0_data, v2_data, starting_index)
participant_sequences |= generate_pair_sequence_runs(
    v1_data, v2_data, v0_data, starting_index)
participant_sequences |= generate_pair_sequence_runs(
    v2_data, v0_data, v1_data, starting_index)
participant_sequences |= generate_pair_sequence_runs(
    v2_data, v1_data, v0_data, starting_index)

[1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0]
{'clean': ['-clean-v0.png', '5-clean-v0.png', '6-clean-v0.png', '7-clean-v0.png', '9-clean-v0.png', '12-clean-v0.png', '13-clean-v0.png', '15-clean-v0.png', '30-clean-v0.png', '57-clean-v0.png', '60-clean-v0.png', '61-clean-v0.png'], 'obf': ['4-obf-v0.png', '8-obf-v0.png', '10-obf-v0.png', '11-obf-v0.png', '14-obf-v0.png', '16-obf-v0.png', '17-obf-v0.png', '18-obf-v0.png', '49-obf-v0.png', '54-obf-v0.png', '62-obf-v0.png', '63-obf-v0.png']}
Error: Tasks ordering, not enough distance
Error: Tasks ordering, not enough distance
Error: Tasks ordering, not enough distance
Error: Tasks ordering, not enough distance
Error: Tasks ordering, not enough distance
Error: Tasks ordering, not enough distance
Error: Tasks ordering, not enough distance
Error: Tasks ordering, not enough distance
Error: Tasks ordering, not enough distance
Error: Tasks ordering, not enough distance
Error: Tasks ordering, not enough distance
Error: Ta

### Check distribution of snippets across sequences

In [17]:
def check_snippet_distribution(participant_sequences: dict[Any, pd.DataFrame]):
    df = pd.concat(participant_sequences.values())

    df[SNIPPET_NAME] = df[SNIPPET_NAME].str.replace('-v0', '')
    df[SNIPPET_NAME] = df[SNIPPET_NAME].str.replace('-v1', '')
    df[SNIPPET_NAME] = df[SNIPPET_NAME].str.replace('-v2', '')
    # columns: SNIPPET_NAME, SEQUENCE_BLOCK, SEQUENCE_IN_BLOCK, SEQUENCE_TOTAL
    group_df = df.groupby(SNIPPET_NAME).agg(['mean', 'median'])

    # distribution of snippets within the blocks
    to_check_block = group_df[(group_df[(SEQUENCE_IN_BLOCK, 'mean')] < 8) |
                              (group_df[(SEQUENCE_IN_BLOCK, 'mean')] > 16) |
                              (group_df[(SEQUENCE_IN_BLOCK, 'median')] < 8) |
                              (group_df[(SEQUENCE_IN_BLOCK, 'median')] > 16)]
    display(to_check_block)
    print(f'Wrong snippet distribution in block: - absolute',
          to_check_block.shape[0], '- relative', to_check_block.shape[0]/group_df.shape[0])

    # distribution of snippets across total sequence
    to_check_blocks = group_df[(group_df[(SEQUENCE_TOTAL, 'mean')] < 29) |
                               (group_df[(SEQUENCE_TOTAL, 'mean')] > 43) |
                               (group_df[(SEQUENCE_TOTAL, 'median')] < 29) |
                               (group_df[(SEQUENCE_TOTAL, 'median')] > 43)]
    display(to_check_blocks)
    print(f'Wrong snippet distribution in sequence: - absolute',
          to_check_blocks.shape[0], '- relative', to_check_blocks.shape[0]/group_df.shape[0])

    # distribution of snippets in blocks
    group_df = df.groupby(SNIPPET_NAME)[SEQUENCE_BLOCK].agg([lambda s:s.value_counts()[
        0], lambda s:s.value_counts()[1], lambda s:s.value_counts()[2]])
    group_df.columns = ['0', '1', '2']
    blocks = group_df[(group_df['0'] != TOTAL_PARTICIPANTS/2) | (group_df['1']
                                                                 != TOTAL_PARTICIPANTS/2) | (group_df['2'] != TOTAL_PARTICIPANTS/2)]
    print(f'Wrong snippet distribution in block assignment: - absolute',
          blocks.shape[0], '- relative', blocks.shape[0]/group_df.shape[0])


# check_snippet_distribution(participant_sequences)

In [13]:
# Snippet Overview
def get_snippet_distribution_overview(participant_sequences: dict[Any, pd.DataFrame]):
    s_store = {}
    for p, df in participant_sequences.items():
        snippet_names = set(s.replace('-v0', '').replace('-v1', '').replace(
            '-v2', '').replace('.png', '') for s in df[SNIPPET_NAME].array)
        snippet_names = [
            '0'+s if s.startswith('-') else s for s in snippet_names]
        snippet_names.sort()
        s_store[p] = snippet_names
    df = pd.DataFrame.from_dict(s_store)
    df.to_csv(f'{RUN_DATA}/overview.csv', sep=';', index=False)

In [18]:
participant_sequence_files = Path(RUN_DATA).glob(f'p*.csv')
participant_sequences = {}
for participant_sequence_file in participant_sequence_files:
    group_df = pd.read_csv(participant_sequence_file, index_col=None)
    group_df.head()
    participant_sequences[participant_sequence_file.stem[1:]] = group_df

check_snippet_distribution(participant_sequences)
get_snippet_distribution_overview(participant_sequences)

Unnamed: 0_level_0,Block,Block,In-Block,In-Block,Total,Total
Unnamed: 0_level_1,mean,median,mean,median,mean,median
SnippetName,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2


Wrong snippet distribution in block: - absolute 0 - relative 0.0


Unnamed: 0_level_0,Block,Block,In-Block,In-Block,Total,Total
Unnamed: 0_level_1,mean,median,mean,median,mean,median
SnippetName,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2


Wrong snippet distribution in sequence: - absolute 0 - relative 0.0
Wrong snippet distribution in block assignment: - absolute 0 - relative 0.0


## Create files for partitions generated above

In [21]:
# Read Solutions for all snippets from CSV File
solutions = pd.read_csv(SNIPPETS_FILE, sep=';')
solutions.head()

Unnamed: 0,filename,right_answer,possible_wrong_answers,number,variant,version,Langhout AoC_category,AoC_category
0,4-clean-v0.png,0,-1;-2,4.0,clean,v0,infix operator precedence,confusingboolwitharith
1,4-clean-v1.png,6,4;3,4.0,clean,v1,infix operator precedence,confusingboolwitharith
2,4-clean-v2.png,7,16;8,4.0,clean,v2,infix operator precedence,confusingboolwitharith
3,4-obf-v0.png,0,-1;-2,4.0,obf,v0,infix operator precedence,confusingboolwitharith
4,4-obf-v1.png,6,4;3,4.0,obf,v1,infix operator precedence,confusingboolwitharith


Create the XLSX files for the participant in participant's folder with the applicable names and the solutions from the Solution file.

Filename: `conditions_{block_size}_{participant}.xlsx`

Structure of the CSV:

`SnippetName;ImagePath;answer1;answer2;answer3;correct;break_length;start_event`

- SnippetName: filename of the Snippet image, used as index
- ImagePath: path to the image relative to the image folder, `{block_name}/{SnippetName}`
- answer1, answer2, answer3: three possible answers, if only two answers, then third is `EMPTY`
- correct: correct answer from the three given ones
- break_length: length of break in seconds to take after the snippet was finished
- start_event: eeg signal to send via port, for clean `11`, for obf `12`

In [22]:
# Part 1: Generate code for comprehension Task, and Question about value in R together with answers
def prepare_answers(right_answer, possible_wrong_answers):
    answers = [right_answer] + possible_wrong_answers.split(';')
    new_answers = []
    for a in answers:
        try:
            new_answers.append(int(a))
        except Exception:
            new_answers.append(a)
    new_answers.sort()
    if len(new_answers) == 2:
        new_answers.append('EMPTY')
    return new_answers


def generate_display_files(participant_sequences):
    for participant in participant_sequences:
        sequence: pd.DataFrame = participant_sequences[participant]

        sequence[BREAK_LENGTH] = sequence[SEQUENCE_IN_BLOCK].map(
            lambda x: 20 if x % LONGER_SHORT_BREAK_ROUND == (-1 % LONGER_SHORT_BREAK_ROUND) else 5)
        sequence[START_EVENT] = sequence[SNIPPET_NAME].apply(
            lambda snippet: 11 if CLEAN in snippet else 12)

        df_con = sequence.merge(solutions, 'left',
                                left_on=SNIPPET_NAME, right_on='filename')
        df_con[IMAGE_PATH] = df_con['version'] + \
            '/' + df_con[SNIPPET_NAME]
        df_con[SNIPPET_NAME] = df_con[SNIPPET_NAME].apply(lambda snippet_name:f'\'{snippet_name}' if snippet_name.startswith('-')else snippet_name)
        df_con[CORRECT_ANSWER] = df_con['right_answer']
        df_con[ANSWER_1] = ''
        df_con[ANSWER_2] = ''
        df_con[ANSWER_3] = ''
        for index, row in df_con.iterrows():
            answers = prepare_answers(
                row['right_answer'], row['possible_wrong_answers'])
            df_con.loc[index, ANSWER_1] = answers[0]
            df_con.loc[index, ANSWER_2] = answers[1]
            df_con.loc[index, ANSWER_3] = answers[2]

        df_con = df_con.drop(columns=list(solutions.columns) +
                             [SEQUENCE_IN_BLOCK, SEQUENCE_TOTAL])

        blocks = df_con[SEQUENCE_BLOCK].unique()
        for b in blocks:
            df = df_con[df_con[SEQUENCE_BLOCK] == b].drop(
                columns=[SEQUENCE_BLOCK])
            df.to_excel(
                f'{RUN_DATA}/conditions_{b+1}_{str(participant).zfill(3)}.xlsx', 'w', index=False)


generate_display_files(participant_sequences)