# Perform Fixation Correction Iteration 2
Status: Implemented
 -- Fully automatized

In [1]:
import warnings

import pandas as pd
from tqdm.notebook import tqdm
from utils.utils import *

warnings.filterwarnings("ignore")

In [2]:
# get all files from participants
folders = get_participant_folder_per_participant()
sequence_orders = get_sequence_order_per_participant()
assert (set(folders.keys()) == set(sequence_orders.keys()))
snippets = {p: list(sequence_orders[p][SNIPPET].values)
            for p in sequence_orders}

In [3]:
global FIXATION_CORRECTION_ITERATION 
FIXATION_CORRECTION_ITERATION[0] = 2
print('Fixation correction iteration:', current_fixation_correction_iteration())
print('Previous Fixation correction iteration:', previous_fixation_correction_iteration())

Fixation correction iteration: 2
Previous Fixation correction iteration: 1


In [4]:
code_sizes = pd.read_csv(CODE_SIZE_PATH, sep=SEPARATOR,
                         index_col=False).set_index(SNIPPET)
display(code_sizes)

Unnamed: 0_level_0,Chars,EssentialChars,LoC,EssentialLoC,Max length LoC,Words,EssentialWords,CodeLines,EssentialCodeLines,number,version,Snippet_base,variant,Condition
Snippet,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
-clean-v0,49,33,3,3,26,16,10,"['int V1 = 10;', 'int V2 = 3;', 'boolean R = (...","[1, 1, 1]",0,v0,--v0,clean,clean
-clean-v1,48,32,3,3,26,16,10,"['int V1 = 4;', 'int V2 = 2;', 'boolean R = (V...","[1, 1, 1]",0,v1,--v1,clean,clean
-clean-v2,48,32,3,3,26,16,10,"['int V1 = 1;', 'int V2 = 5;', 'boolean R = (V...","[1, 1, 1]",0,v2,--v2,clean,clean
-obf-v0,54,36,3,3,31,18,10,"['int V1 = 10;', 'int V2 = 3;', 'boolean R = (...","[1, 1, 1]",0,v0,--v0,obf,confusing
-obf-v1,54,36,3,3,32,18,10,"['int V1 = 4;', 'int V2 = 2;', 'boolean R = ( ...","[1, 1, 1]",0,v1,--v1,obf,confusing
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9-clean-v1,37,26,3,3,21,11,7,"['int V1 = 5;', 'boolean R = (V1 > 2);', 'V1--;']","[1, 1, 1]",9,v1,9--v1,clean,clean
9-clean-v2,38,27,3,3,22,11,7,"['int V1 = 8;', 'boolean R = (V1 <= 7);', 'V1-...","[1, 1, 1]",9,v2,9--v2,clean,clean
9-obf-v0,35,25,2,2,24,10,6,"['int V1 = 2;', 'boolean R = (V1-- == 1);']","[1, 1]",9,v0,9--v0,obf,confusing
9-obf-v1,34,24,2,2,23,10,6,"['int V1 = 5;', 'boolean R = (V1-- > 2);']","[1, 1]",9,v1,9--v1,obf,confusing


### Take into account previous iteration of manual evaluation

In [5]:
# check whether there already exist manual evaluation files
manual_evaluation_paths = exist_previous_manual_accuracy_evaluation_paths()
if manual_evaluation_paths and current_fixation_correction_iteration() > 1:
    assert (set(folders.keys()) == set(manual_evaluation_paths.keys())), f'''folder: {folders.keys()}, \neval paths: {manual_evaluation_paths.keys()}'''

    print('There exist manual accuracy evaluation files from the previous iteration which will be considered for this iteration\'s fixation correction. ')
    print(manual_evaluation_paths)
    for participant in manual_evaluation_paths:
        print('----------------------------------------------')
        print(PARTICIPANT, participant)

        print('(01/04) Load accuracy evaluation file')
        manual_evaluation = pd.read_csv(
            manual_evaluation_paths[participant], sep=SEPARATOR, index_col=False, dtype={PARTICIPANT: str})

        print('(02/04) Select those in need of rework')
        to_rework = get_to_rework_manual_accuracy_evaluation(
            manual_evaluation, participant, snippets[participant])

        print('(03/04) Check that outliers were removed correctly and offsets were adapted')
        check_outlier_offset_actions(to_rework, participant)

        print('(04/04) Save snippets to rework')
        # save snippet names to rework in file
        to_rework[SNIPPET].to_csv(get_snippet_rework_paths(
            participant), sep=SEPARATOR, index=False)
        snippets[participant] = to_rework[SNIPPET].to_list()

There exist manual accuracy evaluation files from the previous iteration which will be considered for this iteration's fixation correction. 
{'003': WindowsPath('../08-Data-Trial_Recordings/manual_accuracy_evaluation/iteration_1/combined/003_DataAccuracy_manual_evaluation_combined.csv'), '004': WindowsPath('../08-Data-Trial_Recordings/manual_accuracy_evaluation/iteration_1/combined/004_DataAccuracy_manual_evaluation_combined.csv'), '005': WindowsPath('../08-Data-Trial_Recordings/manual_accuracy_evaluation/iteration_1/combined/005_DataAccuracy_manual_evaluation_combined.csv'), '007': WindowsPath('../08-Data-Trial_Recordings/manual_accuracy_evaluation/iteration_1/combined/007_DataAccuracy_manual_evaluation_combined.csv'), '008': WindowsPath('../08-Data-Trial_Recordings/manual_accuracy_evaluation/iteration_1/combined/008_DataAccuracy_manual_evaluation_combined.csv'), '009': WindowsPath('../08-Data-Trial_Recordings/manual_accuracy_evaluation/iteration_1/combined/009_DataAccuracy_manual_eva

### Auto-correction
* perform auto-correction with various algorithms

In [6]:
if current_fixation_correction_iteration() > 1:
    # Iterate over all participants
    for participant in tqdm(folders):
        print('----------------------------------------------')
        print(PARTICIPANT, participant)
        if not snippets[participant]:
            print('no snippets to (re-)work found.')
            continue

        print('(01/04) Delete existing fixation corrections')
        for snippet in tqdm(snippets[participant]):
            fixation_correction_paths = get_all_fixation_correction_paths(
                participant, snippet)
            for p in fixation_correction_paths:
                p.unlink()

        print('(02/04) Retrieve fixation data')
        participant_snippet_fixations = {}
        not_available = set()
        for snippet in tqdm(snippets[participant]):
            snippet_fixations_path = get_fixations_path(
                participant, snippet, no_outliers_if_exists=True, iteration=previous_fixation_correction_iteration)
            if snippet_fixations_path.exists():
                participant_snippet_fixations[snippet] = pd.read_csv(
                    snippet_fixations_path, index_col=False, sep=SEPARATOR, dtype={PARTICIPANT: str})
                participant_snippet_fixations[snippet].to_csv(get_fixations_path(
                    participant, snippet, no_outliers_if_exists=False), index=False, sep=SEPARATOR)
            else:
                not_available.add(snippet)
        if not_available:
            update_exclusions(participant, SNIPPET, VISUAL, {snippet: f'''01d4_PRE_VIS_Fixation_Correction-it{
                              current_fixation_correction_iteration()}: Fixation data not available''' for snippet in not_available})
        for snippet in not_available:
            snippets[participant].remove(snippet)

        print('(03/04) Perform auto-correction')
        corrected_snippet_fixations = calculate_corrected_fixations(
            participant_snippet_fixations, code_sizes)

        print('(04/04) Store corrected fixation data')
        # save fixation data
        for snippet in tqdm(corrected_snippet_fixations):
            corrected_fixation_datasets = corrected_snippet_fixations[snippet]
            for key, fixation_data in corrected_fixation_datasets.items():
                if key == FIXATION_CORRECTION_ALGORITHM_ORIGINAL:
                    continue
                fixation_data.to_csv(get_fixations_path(
                    participant, snippet, modified_version=key), index=False, sep=SEPARATOR)

  0%|          | 0/24 [00:00<?, ?it/s]

----------------------------------------------
Participant 003
(01/04) Delete existing fixation corrections


  0%|          | 0/67 [00:00<?, ?it/s]

(02/04) Retrieve fixation data


  0%|          | 0/67 [00:00<?, ?it/s]

(03/04) Perform auto-correction
(04/04) Store corrected fixation data


  0%|          | 0/67 [00:00<?, ?it/s]

----------------------------------------------
Participant 004
(01/04) Delete existing fixation corrections


  0%|          | 0/57 [00:00<?, ?it/s]

(02/04) Retrieve fixation data


  0%|          | 0/57 [00:00<?, ?it/s]

(03/04) Perform auto-correction
(04/04) Store corrected fixation data


  0%|          | 0/57 [00:00<?, ?it/s]

----------------------------------------------
Participant 005
(01/04) Delete existing fixation corrections


  0%|          | 0/71 [00:00<?, ?it/s]

(02/04) Retrieve fixation data


  0%|          | 0/71 [00:00<?, ?it/s]

(03/04) Perform auto-correction
(04/04) Store corrected fixation data


  0%|          | 0/71 [00:00<?, ?it/s]

----------------------------------------------
Participant 007
(01/04) Delete existing fixation corrections


  0%|          | 0/72 [00:00<?, ?it/s]

(02/04) Retrieve fixation data


  0%|          | 0/72 [00:00<?, ?it/s]

(03/04) Perform auto-correction
(04/04) Store corrected fixation data


  0%|          | 0/72 [00:00<?, ?it/s]

----------------------------------------------
Participant 008
(01/04) Delete existing fixation corrections


  0%|          | 0/68 [00:00<?, ?it/s]

(02/04) Retrieve fixation data


  0%|          | 0/68 [00:00<?, ?it/s]

(03/04) Perform auto-correction
(04/04) Store corrected fixation data


  0%|          | 0/68 [00:00<?, ?it/s]

----------------------------------------------
Participant 009
(01/04) Delete existing fixation corrections


  0%|          | 0/72 [00:00<?, ?it/s]

(02/04) Retrieve fixation data


  0%|          | 0/72 [00:00<?, ?it/s]

(03/04) Perform auto-correction
(04/04) Store corrected fixation data


  0%|          | 0/72 [00:00<?, ?it/s]

----------------------------------------------
Participant 010
(01/04) Delete existing fixation corrections


  0%|          | 0/67 [00:00<?, ?it/s]

(02/04) Retrieve fixation data


  0%|          | 0/67 [00:00<?, ?it/s]

(03/04) Perform auto-correction
(04/04) Store corrected fixation data


  0%|          | 0/67 [00:00<?, ?it/s]

----------------------------------------------
Participant 011
(01/04) Delete existing fixation corrections


  0%|          | 0/71 [00:00<?, ?it/s]

(02/04) Retrieve fixation data


  0%|          | 0/71 [00:00<?, ?it/s]

(03/04) Perform auto-correction
(04/04) Store corrected fixation data


  0%|          | 0/71 [00:00<?, ?it/s]

----------------------------------------------
Participant 012
(01/04) Delete existing fixation corrections


  0%|          | 0/71 [00:00<?, ?it/s]

(02/04) Retrieve fixation data


  0%|          | 0/71 [00:00<?, ?it/s]

(03/04) Perform auto-correction
(04/04) Store corrected fixation data


  0%|          | 0/71 [00:00<?, ?it/s]

----------------------------------------------
Participant 013
(01/04) Delete existing fixation corrections


  0%|          | 0/68 [00:00<?, ?it/s]

(02/04) Retrieve fixation data


  0%|          | 0/68 [00:00<?, ?it/s]

(03/04) Perform auto-correction
(04/04) Store corrected fixation data


  0%|          | 0/68 [00:00<?, ?it/s]

----------------------------------------------
Participant 014
(01/04) Delete existing fixation corrections


  0%|          | 0/70 [00:00<?, ?it/s]

(02/04) Retrieve fixation data


  0%|          | 0/70 [00:00<?, ?it/s]

(03/04) Perform auto-correction
(04/04) Store corrected fixation data


  0%|          | 0/70 [00:00<?, ?it/s]

----------------------------------------------
Participant 015
(01/04) Delete existing fixation corrections


  0%|          | 0/72 [00:00<?, ?it/s]

(02/04) Retrieve fixation data


  0%|          | 0/72 [00:00<?, ?it/s]

(03/04) Perform auto-correction
(04/04) Store corrected fixation data


  0%|          | 0/72 [00:00<?, ?it/s]

----------------------------------------------
Participant 016
(01/04) Delete existing fixation corrections


  0%|          | 0/72 [00:00<?, ?it/s]

(02/04) Retrieve fixation data


  0%|          | 0/72 [00:00<?, ?it/s]

(03/04) Perform auto-correction
(04/04) Store corrected fixation data


  0%|          | 0/72 [00:00<?, ?it/s]

----------------------------------------------
Participant 017
(01/04) Delete existing fixation corrections


  0%|          | 0/68 [00:00<?, ?it/s]

(02/04) Retrieve fixation data


  0%|          | 0/68 [00:00<?, ?it/s]

(03/04) Perform auto-correction
(04/04) Store corrected fixation data


  0%|          | 0/68 [00:00<?, ?it/s]

----------------------------------------------
Participant 018
(01/04) Delete existing fixation corrections


  0%|          | 0/68 [00:00<?, ?it/s]

(02/04) Retrieve fixation data


  0%|          | 0/68 [00:00<?, ?it/s]

(03/04) Perform auto-correction
(04/04) Store corrected fixation data


  0%|          | 0/68 [00:00<?, ?it/s]

----------------------------------------------
Participant 019
(01/04) Delete existing fixation corrections


  0%|          | 0/66 [00:00<?, ?it/s]

(02/04) Retrieve fixation data


  0%|          | 0/66 [00:00<?, ?it/s]

(03/04) Perform auto-correction
(04/04) Store corrected fixation data


  0%|          | 0/66 [00:00<?, ?it/s]

----------------------------------------------
Participant 020
(01/04) Delete existing fixation corrections


  0%|          | 0/71 [00:00<?, ?it/s]

(02/04) Retrieve fixation data


  0%|          | 0/71 [00:00<?, ?it/s]

(03/04) Perform auto-correction
(04/04) Store corrected fixation data


  0%|          | 0/71 [00:00<?, ?it/s]

----------------------------------------------
Participant 021
(01/04) Delete existing fixation corrections


  0%|          | 0/72 [00:00<?, ?it/s]

(02/04) Retrieve fixation data


  0%|          | 0/72 [00:00<?, ?it/s]

(03/04) Perform auto-correction
(04/04) Store corrected fixation data


  0%|          | 0/72 [00:00<?, ?it/s]

----------------------------------------------
Participant 022
(01/04) Delete existing fixation corrections


  0%|          | 0/69 [00:00<?, ?it/s]

(02/04) Retrieve fixation data


  0%|          | 0/69 [00:00<?, ?it/s]

(03/04) Perform auto-correction
	No corrected fixation data possible for 49-obf-v0 for cluster_alllines
	No corrected fixation data possible for 63-clean-v0 for cluster_alllines
(04/04) Store corrected fixation data


  0%|          | 0/69 [00:00<?, ?it/s]

----------------------------------------------
Participant 023
(01/04) Delete existing fixation corrections


  0%|          | 0/71 [00:00<?, ?it/s]

(02/04) Retrieve fixation data


  0%|          | 0/71 [00:00<?, ?it/s]

(03/04) Perform auto-correction
(04/04) Store corrected fixation data


  0%|          | 0/71 [00:00<?, ?it/s]

----------------------------------------------
Participant 024
(01/04) Delete existing fixation corrections


  0%|          | 0/72 [00:00<?, ?it/s]

(02/04) Retrieve fixation data


  0%|          | 0/72 [00:00<?, ?it/s]

(03/04) Perform auto-correction
(04/04) Store corrected fixation data


  0%|          | 0/72 [00:00<?, ?it/s]

----------------------------------------------
Participant 101
(01/04) Delete existing fixation corrections


  0%|          | 0/72 [00:00<?, ?it/s]

(02/04) Retrieve fixation data


  0%|          | 0/72 [00:00<?, ?it/s]

(03/04) Perform auto-correction
(04/04) Store corrected fixation data


  0%|          | 0/72 [00:00<?, ?it/s]

----------------------------------------------
Participant 102
(01/04) Delete existing fixation corrections


  0%|          | 0/71 [00:00<?, ?it/s]

(02/04) Retrieve fixation data


  0%|          | 0/71 [00:00<?, ?it/s]

(03/04) Perform auto-correction
(04/04) Store corrected fixation data


  0%|          | 0/71 [00:00<?, ?it/s]

----------------------------------------------
Participant 106
(01/04) Delete existing fixation corrections


  0%|          | 0/72 [00:00<?, ?it/s]

(02/04) Retrieve fixation data


  0%|          | 0/72 [00:00<?, ?it/s]

(03/04) Perform auto-correction
(04/04) Store corrected fixation data


  0%|          | 0/72 [00:00<?, ?it/s]