# Initialisation

## Imports

### Magics

In [1]:
%load_ext rpy2.ipython
%matplotlib inline

### R

In [2]:
%%R
library(fitdistrplus)
library(sjstats)
library(ARTool)
library(lsmeans)
library(ez)



Users are encouraged to switch to 'emmeans'.
See help('transition') for more information, including how
to convert 'lsmeans' objects and scripts to work with 'emmeans'.



### Python

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import math
import numpy as np
from scipy import stats

### Magics

In [4]:
%load_ext rpy2.ipython
%matplotlib inline

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


### Seaborn

In [5]:
sns.set_style('whitegrid', {'legend.frameon':True})
sns.set_palette(sns.xkcd_palette(['grey']) + sns.color_palette('colorblind', 3))

## Helpers

In [6]:
def fetch_questionnaire(questionnaire_id):
    base = 'https://docs.google.com/spreadsheets/d/<FILE_ID>/gviz/tq?tqx=out:csv'
    return pd.read_csv(base.replace('<FILE_ID>', questionnaire_id))

def MAD(arr):
    """ Median Absolute Deviation: a "Robust" version of standard deviation.
        Indices variabililty of the sample.
        https://en.wikipedia.org/wiki/Median_absolute_deviation 
    """
    arr = np.ma.array(arr).compressed() # should be faster to not use masked arrays.
    med = np.median(arr)
    return np.median(np.abs(arr - med))

def median_mad(arr):
    return '{} ({})'.format(np.median(arr), MAD(arr))

In [7]:
%%R 

anova_to_latex <- function(df1, df2, p, F, eta) {
    cmd = '\anova'
    df = paste('{', formatC(df1, digits = 2, format = "f"),  '}', '{', formatC(df2, digits = 2, format = "f"), '}', sep = "")
    eta = paste('{', formatC(eta, digits = 2, format = "f"), '}', sep = "")
    F = paste('{', formatC(F, digits = 2, format = "f"), '}', sep = "")

    if(p < 0.001) {
        p = 0.001
    } else if (p < 0.01) {
        p = 0.01
    } else if (p < 0.05) {
        p = 0.05
    } else {
        p = formatC(p, digits = 2, format = "f")
        cmd = '\anovaex'
    }

    p = paste('{',p,'}', sep = "")

    return(paste(cmd, df, F, p, eta, sep = ""))
}

#TODO: write a function that does this for ezANOVA and handles GG corrections.
#TODO: dfs are printed with decimals.
#TODO: remove leading zeros.
aov_to_latex <- function(m){
    s = unlist(summary(m))
    return(anova_to_latex(s['Df1'], s['Df2'],
                          s['Pr(>F)1'],
                          s['F value1'],
                          eta_sq(m)[2]$etasq))
}



# Data Manipulation

## Loading

In [8]:
df = pd.read_json('data.json')
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 11264 entries, 0 to 11263
Data columns (total 31 columns):
activatedWtihGuidance         10462 non-null float64
area                          11264 non-null object
attempts                      11264 non-null object
block                         11264 non-null int64
block_type                    11264 non-null object
checkedLinearMenu             11264 non-null bool
checkedShortcutInterface      11264 non-null bool
condition                     11264 non-null object
dismissedLinearMenu           11264 non-null bool
dismissedShortcutInterface    11264 non-null bool
erroredLinearMenu             11264 non-null bool
errors                        11264 non-null int64
eventualActivationMethod      11264 non-null object
finalReferenceTime            11264 non-null int64
finalTaskTime                 11264 non-null int64
frequency                     11264 non-null int64
grouped                       11264 non-null bool
hasKeyError            

In [9]:
subjective = fetch_questionnaire('18pwOH1C1Ocw0VwjzYf7NfIfoRtSxLi_1WRn3zHuNatU')
subjective = subjective[~subjective['Condition'].isnull()]

## Renaming

Rename everything with capitals so they look nice in the graphs

In [10]:
df['Block'] = df['block']
df['Participant'] = df['participant']
df['Total Task Time'] = df['totalTaskTime']
df['Recall'] = df['isSuccessfulRecall']
df['Expert Use'] = df['isSuccessfulRecall']
df['Condition'] = df['condition']
df['Cancellation Rate'] = df['dismissedShortcutInterface']

Error rate is measured as the number of trials in a block that contain one or more errors. It is an indicator variable

In [11]:
df['Error Rate'] = df['errors'].astype(bool).astype(int)

In [12]:
subjective['Participant'] = subjective['ID']

subjective['Primary'] = subjective['Preference for Primary Posture']
subjective['Secondary'] = subjective['Preference for Secondary Posture']
subjective['Tertiary'] = subjective['Preference for Tertiary Posture']
subjective['Quaternary'] = subjective['Preference for Quaternary Posture']

## Recoding

In [13]:
def convert_cond(val):
    if val == 'Feedback':
        return 'GuidedKey'
    else:
        return val

Recode the conditions so they order in the graphs properly

In [14]:
df['Condition'] = df['Condition'].map(convert_cond)
df['Trial'] = df['trial'] * df['block']
df['Condition'] = df['Condition'].astype('category', categories=['Baseline', 'GuidedKey', 'FingerArc', 'FingerChord'], ordered=True)
df['Grouped'] = df['grouped'].map(lambda x: 'Grouped' if x else 'Ungrouped').astype('category', ['Ungrouped', 'Grouped'])

In [15]:
subjective['Condition'] = subjective['Condition'].map(convert_cond)
subjective['Condition'] = subjective['Condition'].astype('category', categories=['Baseline', 'GuidedKey', 'FingerArc', 'FingerChord'], ordered=True)

## Filtering

Reduce the columns to a small set to make everything a bit cleaner and split out our test and training data.

In [16]:
posture = subjective[['Participant', 'Condition', 'Primary', 'Secondary',
       'Tertiary', 'Quaternary']]

demographics = subjective[['Participant', 'Condition', 'Age', 'Gender',
       'Dominant Hand', 'Dominant Hand for Trackpad or Mouse',
       'Daily Computer Use Hours', 'Weekly Computer Use', 'Typing Test - WPM',
       'Typing Test - CPM']]

# subjective = subjective[['Participant', 'Condition', 'Ease of Learning the Technique', 'Ease of Use']]
subjective = subjective[['Participant', 'Condition', 'Ease of Use', 'Ease of Learning the Technique',
       'Ease of Memorising Shortcuts', 'Hand Fatigue', 'Eye Fatigue',
       'Accuracy', 'Speed']]


subjective_melted = subjective.melt(id_vars=["Participant", "Condition"], var_name="Measure", value_name="Rating")
subjective_melted['Condition'] = subjective_melted['Condition'].astype('category', categories=['Baseline', 'GuidedKey', 'FingerArc', 'FingerChord'], ordered=True)

demographics_melted = demographics.melt(id_vars=["Participant", "Condition"], var_name="Measure", value_name="Value")
demographics_melted['Condition'] = demographics_melted['Condition'].astype('category', categories=['Baseline', 'GuidedKey', 'FingerArc', 'FingerChord'], ordered=True)

In [17]:
training = df[df['block_type'] == 'training'].copy()
training = training[['Trial', 'trial', 'Condition', 'Participant', 'Block', 'Grouped', 'Total Task Time', 'Error Rate', 'Cancellation Rate']]

training['Cancellation Rate'] = training['Cancellation Rate'].map(int)

In [18]:
def codeAttempt(row):
    if row['checkedLinearMenu']:
        return 'Linear Menu'
    if row['checkedShortcutInterface']:
        return 'Shortcut Guidance'
    else:
        return 'Shortcut'

modality = df[df['block_type'] == 'training'][['Condition', 'Participant', 'Block', 'Grouped', 'trial', 'checkedLinearMenu', 'checkedShortcutInterface']].copy()
modality['Selection Type'] = modality.apply(codeAttempt, axis=1)

modality['Shortcut Guidance'] = (modality['Selection Type'] == 'Shortcut Guidance').map(int)
modality['Shortcut'] = (modality['Selection Type'] == 'Shortcut').map(int)
modality['Linear Menu'] = (modality['Selection Type'] == 'Linear Menu').map(int)

del modality['Selection Type']
del modality['checkedLinearMenu']
del modality['checkedShortcutInterface']

modality = modality.melt(id_vars=['Condition', 'Participant', 'Block', 'Grouped', 'trial'], value_name='Number of Selections', var_name='Selection Type')
modality['Condition'] = modality['Condition'].astype('category', categories=['Baseline', 'GuidedKey', 'FingerArc', 'FingerChord'], ordered=True)


modality['Selection Type'] = modality['Selection Type'].astype('category', ordered=True, categories=['Linear Menu', 'Shortcut Guidance', 'Shortcut'])
modality_grouped = modality.groupby(['Condition', 'Participant', 'Block', 'Grouped', 'Selection Type'], as_index=False).sum()

## Aggregation

In [19]:
training_aggregations = {
    'Total Task Time' : 'median',
    'Error Rate' : 'mean',
}

training_participant_blocks = training.groupby(['Condition', 'Participant', 'Block'], as_index=False).agg(training_aggregations)
training_block_grouped = training.groupby(['Condition', 'Participant', 'Block', 'Grouped'], as_index=False).agg(training_aggregations)
training_last_blocks_grouped = training_block_grouped[training_block_grouped['Block'] >= 8].copy()

training_participant = training.groupby(['Condition', 'Participant'], as_index=False).agg(training_aggregations)
training_conditions = training.groupby(['Condition'], as_index=False).agg(training_aggregations)