<h1> Data Analysis for REYeker</h1>

In [18]:
# lib for dataframes
import pandas as pd

# lib for saving np images
from PIL import Image

# lib for plotting
%matplotlib inline
import matplotlib.pyplot as plt

# lib for numerical computations
import numpy as np

# lib for crerating paths
from pathlib import Path

# REYeker lib
import modules.rEYEkerAnalysis as rEYEker

<h2>1. Configuration</h2>

<h5>Database configuration </h5>

In [19]:
# path to the datafile
config_datasheet_path = r'./results/data_of_all_removed.xlsx'

# columns with visual stimulus data
config_visual_stimulus_variable = "ClickData"

# columns with names of the algo
config_algo_name_variable = "Algorithm"

# columns with correctness value
config_corectness_variable = "Correctness"

# colums of response time
config_response_time_variable = "ResponseTime"

<h5>Configuration for REYEker data </h5>

In [20]:
# file for loading rEYEker settings
config_reyeker_settings_path = "data/used.json"

<h5>Import the preprocessed dataframe</h5>

In [21]:
df = pd.read_excel(config_datasheet_path)
algo_name_array = df["Algorithm"].unique()

df_array = []

for algo_name in algo_name_array:
    algo_df = df.loc[df[config_algo_name_variable]==algo_name]
    df_array.append(algo_df)

In [22]:
# data for loading the images
image_path_array = []

for algo_name in algo_name_array:
    image_path = 'images/' + algo_name + '.png'

    image_path_array.append(image_path)


<h4>Import REYeker Settings</h4>

In [23]:
(_data, _times, click_setting) = rEYEker.load_data_from_json(config_reyeker_settings_path)

<h4>Import Images Settings</h4>

In [24]:
image_array = []

# read in every image
for image_path in image_path_array:
    print(image_path)
    image = rEYEker.load_image(image_path)
   
    image_array.append(image)

images/CommonChars.png
images/ContainsSubstring.png
images/CountVowels.png
images/ReverseArray.png
images/BinarySearchStrings.png
images/Multiples.png
images/CrossSum.png
images/Swap.png
images/InsertionSort.png
images/GreatestCommonDivisor.png


<h4> Cast Data to Valid format</h4>

Import the visual stimulus measured Data

In [25]:
visual_stimulus_data_matrix = []

for idx, dataframe in enumerate(df_array):
    visual_stimulus_array = []

    #iter over every row 
    for _idx, row in dataframe.iterrows():
        data_str = row[config_visual_stimulus_variable]
        data_str = data_str.strip()
        coordinates_str = data_str.split(" ")
        coordinates = []
       
        # iter over every coordinate pair x-y
        for coordinate_str in coordinates_str:
            try:
                coordinate = coordinate_str.split("-")
                coordinate = (int(coordinate[0]), int(coordinate[1]))
                coordinates.append(coordinate)
            except:
                print(coordinate_str)
            
            
        visual_stimulus_array.append(coordinates)
        
    visual_stimulus_data_matrix.append(visual_stimulus_array)


<h4>Helper Functions</h4>

In [28]:
def save_images(image_array, folder, image_name):
    """
    :brief saves an array of images to a certain location incrementing the postfix by a number
    :param image_array:        array of images (np.ndarray)
    :param folder:     prefix of image/ folder location
    :param image_name: prefix for the image
    """
    
    Path(folder).mkdir(parents=True, exist_ok=True)
    
    prefix = folder + image_name
    
    #TODO create folders if there are none present
    for idx, data in enumerate(image_array):
        data = data*255
        data = np.uint8(data)
        im = Image.fromarray(data)
        im.save(prefix + str(idx) + '.png')
    
def is_in(value, tup):
    return tup[0] <= value <= tup[1]

def get_0_offset(number):
    i = 0
    number = int(number)
    while number != 0:
        number = int(number / 10)
        i = i + 1
    return i

<h2>2. Create Single Heatmaps</h2>

create heatmaps

In [29]:
heatmap_matrix = []
print("\tGoing to process datatable #" + str(idx) + " with " + str(len(visual_stimulus_data_matrix)) + " datasets: ")

# iterate over all the datasets
for dataset_idx, stimulus_dataset in enumerate(visual_stimulus_data_matrix):
    
    print("\t\tdataset #" + str(dataset_idx) + " (up to "+ str(len(stimulus_dataset)) + "): [", end='')
    heatmap_array = []

    # iterate over all the measurements of the dataset
    for visual_idx, stimulus_measurement in enumerate(stimulus_dataset):
        
        print(str(visual_idx), end=";")
        
        im = rEYEker.draw_shape_heat_map(image_array[dataset_idx], stimulus_measurement, click_setting, should_copy=True)
        heatmap_array.append(im)
   
    print("]")
    heatmaps_matrix.append(heatmap_array)

	Going to process datatable #9 with 10 datasets: 
		dataset #0 (up to 49): [0;1;2;3;4;5;6;

KeyboardInterrupt: 


save Heatmaps

In [13]:
for idx, heatmap_array in enumerate(heatmaps_matrix):
    path = "./results/" + str(algo_name_array[algo_idx]) + "/heatmaps/heatmaps/"
    print("Writing to:" + path)
    save_images(heatmap_array, path, "")

Writing to:./results/BinarySearch/heatmaps/heatmaps/BR/
Writing to:./results/BinarySearch/heatmaps/heatmaps/BI/
Writing to:./results/BinarySearch/heatmaps/heatmaps/TR/
Writing to:./results/BinarySearch/heatmaps/heatmaps/TI/
Writing to:./results/BubbleSort/heatmaps/heatmaps/BR/
Writing to:./results/BubbleSort/heatmaps/heatmaps/BI/
Writing to:./results/BubbleSort/heatmaps/heatmaps/TR/
Writing to:./results/BubbleSort/heatmaps/heatmaps/TI/
Writing to:./results/Factorial/heatmaps/heatmaps/BR/
Writing to:./results/Factorial/heatmaps/heatmaps/BI/
Writing to:./results/Factorial/heatmaps/heatmaps/TR/
Writing to:./results/Factorial/heatmaps/heatmaps/TI/
Writing to:./results/Fibonacci/heatmaps/heatmaps/BR/
Writing to:./results/Fibonacci/heatmaps/heatmaps/BI/
Writing to:./results/Fibonacci/heatmaps/heatmaps/TR/
Writing to:./results/Fibonacci/heatmaps/heatmaps/TI/
Writing to:./results/IntegerBinary/heatmaps/heatmaps/BR/
Writing to:./results/IntegerBinary/heatmaps/heatmaps/BI/
Writing to:./results/I

<h2>3. Create Average Heatmaps</h2>

create heatmaps

In [31]:
heatmap_array = []
mask_array = []
shape_array = []

# iterate over all the datasets
for idx, stimulus_dataset in enumerate(visual_stimulus_data_matrix):
    print("#" + str(idx), end="")
    image = image_array[idx]
    shape_array.append(image.shape)
    im, mask = rEYEker.draw_average_shape_heat_map_rel(image, stimulus_dataset, click_setting, 1.0, 0.0, None, should_copy=True)
    heatmap_array.append(im)
    mask_array.append(mask)
    
print()

#0#1

KeyboardInterrupt: 

save heatmaps

In [33]:
for idx, heatmap in enumerate(heatmap_array):
    #path = "./results/" + str(algo_name_aray[algo_idx]) + "/heatmaps/average_heatmap/"
    path = "./results/averageHeatMaps/"
    print("Writing to:" + path)
    save_images([heatmap], path, algo_name_array[idx])

Writing to:./results/averageHeatMaps/


<h2>4. Create Sequence diagramms</h2>

create sequence diagrams

In [16]:
sequence_diagrams_tensor = []

print("Going to process " + str(len(visual_stimulus_data_tensor)) + " datatables: ")
for algo_idx, visual_stimulus_data_matrix in enumerate(visual_stimulus_data_tensor):
    sequence_diagrams_matrix = []
    print("\tGoing to process datatable #" + str(idx) + " with " + str(len(visual_stimulus_data_matrix)) + " datasets: ")
    
    # iterate over all the datasets
    for dataset_idx, stimulus_dataset in enumerate(visual_stimulus_data_matrix):
        sequence_diagram_array = []
        print("\t\tdataset #" + str(dataset_idx) + " (up to "+ str(len(stimulus_dataset)) + "): [", end='')
    
        # iterate over all the measurements of the dataset
        for visual_idx, stimulus_measurement in enumerate(stimulus_dataset):
            print(str(visual_idx), end=";")
            im = image_tensor[algo_idx][dataset_idx]
            try:
                im = rEYEker.draw_vertical_line_diagram(im, stimulus_measurement, should_copy=True)
                sequence_diagram_array.append(im)
                
            except:
                #TODO
                sequence_diagram_array.append(im.copy())
                #print("W.I.P.:", end='')
                #print("to many clicks for dataset " + str(dataset_idx) + " datset " + str(visual_idx))
                
        print("]")
        sequence_diagrams_matrix.append(sequence_diagram_array)
    sequence_diagrams_tensor.append(sequence_diagrams_matrix)

Going to process 8 datatables: 
	Going to process datatable #3 with 4 datasets: 
		dataset #0 (up to 5): [0;1;2;3;4;]
		dataset #1 (up to 2): [0;1;]
		dataset #2 (up to 3): [0;1;2;]
		dataset #3 (up to 8): [0;1;2;3;4;5;6;7;]
	Going to process datatable #3 with 4 datasets: 
		dataset #0 (up to 12): [0;1;2;3;4;5;6;7;8;9;10;11;]
		dataset #1 (up to 7): [0;1;2;3;4;5;6;]
		dataset #2 (up to 5): [0;1;2;3;4;]
		dataset #3 (up to 14): [0;1;2;3;4;5;6;7;8;9;10;11;12;13;]
	Going to process datatable #3 with 4 datasets: 
		dataset #0 (up to 17): [0;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16;]
		dataset #1 (up to 25): [0;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16;17;18;19;20;21;22;23;24;]
		dataset #2 (up to 24): [0;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16;17;18;19;20;21;22;23;]
		dataset #3 (up to 16): [0;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;]
	Going to process datatable #3 with 4 datasets: 
		dataset #0 (up to 12): [0;1;2;3;4;5;6;7;8;9;10;11;]
		dataset #1 (up to 17): [0;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;

save sequence diagrams

In [17]:
for algo_idx, sequence_diagrams_matrix in enumerate(sequence_diagrams_tensor):
    for idx, sequence_diagram_array in enumerate(sequence_diagrams_matrix):
        path = "./results/" + str(algo_name_array[algo_idx]) + "/sequence_diagrams/" +  config_folder_prefix_array[idx]
        print("Writing to:" + path)
        save_images(sequence_diagram_array, path, config_image_prefix_tensor[algo_idx][idx])

Writing to:./results/BinarySearch/sequence_diagrams/BR/
Writing to:./results/BinarySearch/sequence_diagrams/BI/
Writing to:./results/BinarySearch/sequence_diagrams/TR/
Writing to:./results/BinarySearch/sequence_diagrams/TI/
Writing to:./results/BubbleSort/sequence_diagrams/BR/
Writing to:./results/BubbleSort/sequence_diagrams/BI/
Writing to:./results/BubbleSort/sequence_diagrams/TR/
Writing to:./results/BubbleSort/sequence_diagrams/TI/
Writing to:./results/Factorial/sequence_diagrams/BR/
Writing to:./results/Factorial/sequence_diagrams/BI/
Writing to:./results/Factorial/sequence_diagrams/TR/
Writing to:./results/Factorial/sequence_diagrams/TI/
Writing to:./results/Fibonacci/sequence_diagrams/BR/
Writing to:./results/Fibonacci/sequence_diagrams/BI/
Writing to:./results/Fibonacci/sequence_diagrams/TR/
Writing to:./results/Fibonacci/sequence_diagrams/TI/
Writing to:./results/IntegerBinary/sequence_diagrams/BR/
Writing to:./results/IntegerBinary/sequence_diagrams/BI/
Writing to:./results/I

# AOI categorization

In [62]:
config_prefix = ['BR', 'BI', 'TR', 'TI']
aoi_cat_path_matrix = []

for algo_name in algo_name_array:
    aoi_array =[
        'data/aoi_categorized/AOI_BR_' + algo_name + '.xlsx',
        'data/aoi_categorized/AOI_BI_' + algo_name + '.xlsx',
        'data/aoi_categorized/AOI_TR_' + algo_name + '.xlsx',
        'data/aoi_categorized/AOI_TI_' + algo_name + '.xlsx',
    ]
    aoi_cat_path_matrix.append(aoi_array)
    
aoi_df_matrix = []
for path_array in aoi_cat_path_matrix:
    aoi_df_array = []
    for path in path_array:
        raw = pd.read_excel(path)
        tmp_df = pd.DataFrame(raw)
        aoi_df_array.append(tmp_df)
    aoi_df_matrix.append(aoi_df_array) 
    
def is_in(df, y):
    for _idx, row in df.iterrows():
        if row["startHeight"] <= y <= row["stopHeight"]:
            return row["Name"]
    return "none"

iterative = ['none', 'main', 'Iterative definition', 'Pre calculation', 'Iteration Condition', 'Iteration Step', 'Return Result']
recursive = ['none','main', 'Recursive definition', 'Pre calculation', 'Recursive Condition', 'Recursive Step', 'Return Result']
order = ['0', '1', '2', '3', '4', '5', '6']

In [106]:
mask_array = []
for mask_2d in mask_tensor:
    for mask_1d in mask_2d:
        mask_array.append(mask_1d)
        
aoi_df_array = []
for aoi_df_1d in aoi_df_matrix:
    for df in aoi_df_1d:
        aoi_df_array.append(df)

algo_df = pd.DataFrame([], columns=["Comprehension", "Programming", "Algorithm"])
for algo in algo_name_array:
    for idx, prefix in enumerate(config_prefix):
        comprehension = "BU"
        if idx >= 2:
            comprehension = "TD"
        
        programming = "R"
        if idx%2==1:
            programming = "I"
        
        algo_df = algo_df.append(pd.DataFrame(
            [[comprehension, programming, algo]],
            columns=["Comprehension", "Programming", "Algorithm"]))
algo_df = algo_df.reset_index()
algo_df = algo_df.drop("index", axis=1)
    
additional = iterative.copy()
for element in recursive:
    additional.append(element)

additional = list(set(additional))
for element in additional:
    algo_df.insert(loc=3, column=element, value=0)

for idx, mask in enumerate(mask_array):
    height = shape_array[idx][0]
    width = shape_array[idx][1]
    aoi_df = aoi_df_array[idx]
    print(idx)
    for h in range(height):
        for w in range(width):
            mask_idx = h*width+w
            if mask[mask_idx] != 0:
                name = is_in(aoi_df, h)
                algo_df.at[idx, name] += 1


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31


In [129]:
#algo_df = algo_df.reset_index()
pattern = []
for idx, row in algo_df.iterrows():
    algo_pattern = [row["Algorithm"], row["Comprehension"], row["Programming"]]
    for element in additional:
        if row[element] >= 500:
            algo_pattern.append(element)
    pattern.append(algo_pattern)  
    
for data in pattern:
    print(str(data))

['BinarySearch', 'BU', 'R', 'main', 'Recursive Step', 'Pre calculation', 'Recursive Condition']
['BinarySearch', 'BU', 'I', 'main']
['BinarySearch', 'TD', 'R', 'main']
['BinarySearch', 'TD', 'I', 'Iteration Condition', 'main', 'Pre calculation', 'Iteration Step']
['BubbleSort', 'BU', 'R', 'main']
['BubbleSort', 'BU', 'I', 'main']
['BubbleSort', 'TD', 'R', 'main']
['BubbleSort', 'TD', 'I', 'Iteration Condition', 'main', 'Iteration Step']
['Factorial', 'BU', 'R', 'main', 'Recursive Step', 'Recursive definition']
['Factorial', 'BU', 'I', 'Iteration Condition', 'Iterative definition', 'Pre calculation']
['Factorial', 'TD', 'R', 'main', 'Recursive Step', 'Recursive definition']
['Factorial', 'TD', 'I', 'Iteration Condition', 'Iterative definition', 'Pre calculation']
['Fibonacci', 'BU', 'R', 'none', 'Recursive Step']
['Fibonacci', 'BU', 'I', 'Iteration Condition', 'Pre calculation', 'Iteration Step']
['Fibonacci', 'TD', 'R', 'Recursive Step']
['Fibonacci', 'TD', 'I', 'Iteration Condition', 