In [1]:
import os
import numpy as np
import polars as pl
import joblib

import sys
# to be changed after release!!!
sys.path.append('/mnt/mlshare/prasse/aeye_git/pymovements/src/')
import preprocessing.feature_extraction as feature_extraction
import config.config as config
import utils.helpers as helpers

%load_ext autoreload
%autoreload 2

In [2]:
save_dir = 'results/'

In [3]:
def dataframe_to_latex(input_df):
    df_columns = input_df.columns
    df_dict    = dict()
    for key in df_columns:
        df_dict[key] = list(input_df[key])
    table_string = '\\begin{tabular}{' + '|'.join('l' for a in range(len(df_columns))) + '}\n \\hline\n'
    table_string += ' & '.join('\\bf{' + str(key) + '}' for key in df_columns) + '\\\\\hline \n'
    for i in range(input_df.shape[0]):
        table_string += ' & '.join(str(df_dict[key][i])  for key in df_columns) + '\\\\\hline \n'
    table_string += '\\end{tabular}' 
    return table_string

In [4]:
column_name_mapping = {'acc': 'Text Comprehension',
                       'subj_acc': 'General Reading Comprehension',
                       'difficulty': 'Text Difficulty',
                       'native': 'Native Speaker',
                       'familarity': 'Text Familarity',
                       'classes': 'L1 with dyslexia vs. L1 w/o dyslexia vs. L2',
                       'l1vsl2': 'L1 vs. L2',
                       'dyslexia': 'Dyslexia',
                       'task_name': 'Task',
                       'detection_method': 'Detection Method',
                      }


decimals = 3
detection_methods = ['idt', 'ivt', 'microsaccades']
datasets          = ['sbsat','potec','copco','gazebase','gazebasevr']
for dataset in datasets:
    label_columns = helpers.get_datset_labels(dataset)
    label_counter = 0
    result_df = {column_name_mapping['detection_method']:[]}
    for label in label_columns:
        result_df[column_name_mapping[label]] = []
        for detection_method in detection_methods:
            detection_params = helpers.get_detection_params(detection_method)
            for detection_param in detection_params:
                dataset_name = dataset
                label_column = label
                result_prefix = detection_method

                if detection_method == 'ivt':
                    detection_par = {'minimum_duration': int(detection_param.split(' ')[1]),
                                        'velocity_threshold': float(detection_param.split(' ')[3]),
                                        }
                    detection_params = 'IVT $(min_{dur}=' + str(int(detection_param.split(' ')[1])) +\
                                        ', vel_{th}=' + str(float(detection_param.split(' ')[3])) + ')$'
                elif detection_method == 'idt':
                    detection_par = {'minimum_duration': int(detection_param.split(' ')[1]),
                                        'dispersion_threshold': float(detection_param.split(' ')[3]),
                                    }
                    detection_params = 'IDT $(min_{dur}=' + str(int(detection_param.split(' ')[1])) +\
                                        ', dis_{th}=' + str(float(detection_param.split(' ')[3])) + ')$'
                elif detection_method == 'microsaccades':
                    detection_par = {'minimum_duration': int(detection_param.split(' ')[1]),
                                    }
                    detection_params = 'Microsaccades $(min_{dur}=' + str(int(detection_param.split(' ')[1])) + ')$'
                
                detection_param_string = ''
                for key in detection_par:
                    detection_param_string += str(key) + '_' + str(detection_par[key]) + '_'
                detection_param_string = detection_param_string[0:len(detection_param_string)-1]
                result_path = save_dir + '/' + dataset_name + '_' + label_column + '_' + result_prefix +\
                    '_' + detection_param_string + '.csv'
                if label_counter == 0:
                    result_df[column_name_mapping['detection_method']].append(detection_params)
                if os.path.exists(result_path):
                    c_result_df = pl.read_csv(result_path)
                    if 'auc' in c_result_df.columns:
                        values = np.array(list(c_result_df['auc']), dtype=np.float32)
                    else:
                        values = np.array(list(c_result_df['acc']), dtype=np.float32)
                    mean = np.mean(values)
                    error = np.std(values) / np.sqrt(len(values))
                    metric_string = str(np.round(mean, decimals=decimals)) + ' $\pm$ ' + str(np.round(error, decimals=decimals))
                else:
                    metric_string = '---'
                result_df[column_name_mapping[label]].append(metric_string)
        label_counter += 1
    result_pl_df = pl.DataFrame(result_df)
    print(result_pl_df)
    print(dataset)
    print(dataframe_to_latex(result_pl_df))

shape: (36, 5)
┌────────────────────┬───────────────┬────────────────────┬────────────────────┬───────────────────┐
│ Detection Method   ┆ Text          ┆ Text Difficulty    ┆ General Reading    ┆ Native Speaker    │
│ ---                ┆ Comprehension ┆ ---                ┆ Comprehension      ┆ ---               │
│ str                ┆ ---           ┆ str                ┆ ---                ┆ str               │
│                    ┆ str           ┆                    ┆ str                ┆                   │
╞════════════════════╪═══════════════╪════════════════════╪════════════════════╪═══════════════════╡
│ IDT                ┆ 0.501 $\pm$   ┆ 0.536 $\pm$ 0.012  ┆ 0.451 $\pm$ 0.041  ┆ 0.631 $\pm$ 0.025 │
│ $(min_{dur}=10,    ┆ 0.032         ┆                    ┆                    ┆                   │
│ dis_{th}=0…        ┆               ┆                    ┆                    ┆                   │
│ IDT                ┆ 0.538 $\pm$   ┆ 0.565 $\pm$ 0.019  ┆ 0.474 $\pm$ 0.03