
# Experiments Model Free Analysis

Statistical inquiry into the aggregate behaviour of **all** Tasks.

A more detailed analysis will be conducted to investigate the relationships between variables/experiments & *wcst*, however the data structures employed here will be leveraged for this subsequent analysis.


---------
```
Zach Wolpe
zachcolinwolpe@gmail.com
27 July 2021
```
---------




In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pickle
import os
import re
import sys
sys.path.append('../process data/')
import scipy.stats as stats
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.figure_factory as ff
import plotly.offline as pyo
import plotly.express as px
from encode_processed_data import encode_data

In [2]:
# with open('../data objects/batch_processing_object.pkl', 'rb') as file2:
#     bp = pickle.load(file2)

# ---- fetch data object ----x
with open('../data objects/batch_processing_object_with_encodings.pkl', 'rb') as file2:
    ed = pickle.load(file2)

   
ed.__dict__.keys()

dict_keys(['raw', 'summary_table', 'fitts_summary_stats', 'corsi_summary_stats', 'navon_summary_stats', 'nback_summary_stats', 'demographics_plot', 'demographics'])

In [4]:
ed.describe_data()



        ------------------------------------------------------------------
            self.path            : raw data loc
            self.metadata        : mturk metadata
            self.mapping         : reference table
            self.data_times      : reference times table
            self.participants    : list of participant identifiers
            self.parti_code      : list of participant codes
            self.n               : total number of samples
            self.wcst_paths      : paths to wcst  raw data
            self.nback_paths     : paths to nback raw data
            self.corsi_paths     : paths to corsi raw data
            self.fitts_paths     : paths to fitts raw data
            self.navon_paths     : paths to navon raw data
            self.wcst_data       : wcst  dataframe
            self.nback_data      : nback dataframe
            self.corsi_data      : corsi dataframe
            self.fitts_data      : fitts dataframe
            self.navon_data    

In [34]:
ed.clean_data_info()



                WCST - Wisconsin Card Sorting Task                                                  DataFrame: ed.raw.wcst_date
            ---------------------------------------------------------------------------------------------------------------------------
            
                participant                     : key               : participant ID
                card_no                         : categorical       : the card shown
                correct_card                    : categorical       : the card that should be clicked of the top four on screen      
                correct_persevering             : categorical       : the card that would be clicked if the participant is persevering
                seq_no                          : numeric           : trial number
                rule                            : categorical       : matching rule  
                card_shape                      : categorical       : current card shape
                card_num

In [42]:
# ----- all categories descriptors -----x
cats_demographics   = ['gender_a', 'handedness_a', 'education_a', 'age_group']
cats_navon          = [('level_of_target', '')]

# ---- add numerical descriptors ----x
num_demographics = ['age_a','income_a', 'computer_hours_a', 'mean_reation_time_ms']
num_nback = [('block_number', ''), ('score', 'mean'), ('score', 'std'), ('status', 'mean'), ('status', 'std'), ('miss', 'mean'), 
            ('miss', 'std'), ('false_alarm', 'mean'), ('false_alarm', 'std'), ('reaction_time_ms', 'mean'), ('reaction_time_ms', 'std')]
num_navon = [('correct', 'mean'), ('correct',  'std'), ('too_slow', 'mean'), ('too_slow',  'std'), 
            ('reaction_time_ms', 'mean'), ('reaction_time_ms',  'std')]
num_corsi = [('highest_span',  'max'), ('status', 'mean'), ('status',  'std')]
num_fitts = [('delta', 'mean'), ('delta',  'std'), ('status', 'mean')]


# ---- user selected menus ---x

ed.summary_table.head()

Unnamed: 0_level_0,nback_status,nback_reaction_time_ms,fitts_mean_deviation,corsi_block_span,navon_level_of_target,navon_perc_correct,navon_reaction_time_ms,wcst_RT,wcst_accuracy,demographics_age_a,demographics_gender_a,demographics_handedness_a,demographics_education_a,demographics_income_a,demographics_computer_hours_a,demographics_age_group,demographics_mean_reation_time_ms
participant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
100934.0,0.894444,2218.077778,-5.7,4.0,global,0.166667,664.583333,1661.590361,0.83,28.0,male,right,university,7.0,25.0,25-34,11453.571429
100934.0,0.894444,2218.077778,-5.7,4.0,local,0.461538,597.769231,1661.590361,0.83,28.0,male,right,university,7.0,25.0,25-34,11453.571429
100934.0,0.894444,2218.077778,-5.7,4.0,none,0.48,622.96,1661.590361,0.83,28.0,male,right,university,7.0,25.0,25-34,11453.571429
103322.0,0.916667,2366.011111,-203.05,6.0,global,0.923077,959.769231,1363.674419,0.86,51.0,male,right,university,6.0,82.0,45-54,5294.428571
103322.0,0.916667,2366.011111,-203.05,6.0,local,0.916667,999.75,1363.674419,0.86,51.0,male,right,university,6.0,82.0,45-54,5294.428571


# Fitts Law

In [8]:
def fitts_law_deviation(n=10, data=ed.fitts_summary_stats[[('delta','mean')]]):

    # ---- compute bins ----x
    def compute_fitts_bins(data, n=n):
        rng = np.linspace(min(data[('delta','mean')]), max(data[('delta','mean')]), n)
        data['bin'] = 'na'; data['group'] = 'na'
        for r in range(n-1): 
            data.loc[(data[('delta','mean')] > rng[r]) & (data[('delta','mean')] <= rng[r+1]), 'bin']   = str(round(rng[r+1])) + '-' + str(round(rng[r])) 
            data.loc[(data[('delta','mean')] > rng[r]) & (data[('delta','mean')] <= rng[r+1]), 'group'] = r+1
        return(data)
    data = compute_fitts_bins(data=data)
    
    # ---- plot ----x
    trace  = go.Histogram(x=data[('delta','mean')], marker_color='#4ca3dd')
    layout = go.Layout( title='Mean Fitts Law Deviation', xaxis={'title':'deviation from expected (Fitts Law Predicted) RT'}, yaxis={'title':'Frequency'}, 
                        template='none', width=700, height=500)
    fig    = go.Figure(data=trace, layout=layout)

    return({'data':data, 'figure':fig})


fitts = fitts_law_deviation()
fitts['figure']

In [12]:
data = ed.nback_summary_stats
data.head()
data.describe()

Unnamed: 0_level_0,participant,block_number,trial_counter,score,score,status,status,miss,miss,false_alarm,false_alarm,reaction_time_ms,reaction_time_ms
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,mean,std,mean,std,mean,std,mean,std
count,762.0,762.0,762.0,762.0,762.0,762.0,762.0,762.0,762.0,762.0,762.0,762.0,762.0
mean,546111.299213,2.0,26.666667,0.308727,0.460112,0.749694,0.366391,0.198381,0.340992,0.110346,0.232506,2214.854812,855.473684
std,261751.306114,0.817033,4.717141,0.090589,0.0424,0.193614,0.148714,0.134232,0.174419,0.126008,0.178033,640.105743,417.501856
min,100934.0,1.0,20.0,0.05,0.223607,0.15,0.0,0.0,0.0,0.0,0.0,149.366667,0.0
25%,317920.0,1.0,20.0,0.25,0.444262,0.633333,0.305129,0.1,0.305129,0.0,0.0,1975.783333,706.367101
50%,528567.5,2.0,30.0,0.3,0.470162,0.8,0.410391,0.2,0.410391,0.05,0.223607,2301.191667,1029.825048
75%,790956.0,3.0,30.0,0.366667,0.490133,0.9,0.48936,0.3,0.466092,0.2,0.406838,2657.325,1159.9737
max,998593.0,3.0,30.0,0.6,0.512989,1.0,0.512989,0.566667,0.512989,0.566667,0.512989,3000.0,1361.925104


In [13]:
def scatter_plot(data, xvar, yvar, group_var=False, xlab='', ylab='', title='', cols=px.colors.qualitative.Pastel):
    

    if not group_var: 
        traces = [go.Scatter(x=data[xvar], y=data[yvar], mode='markers', marker_color=cols[0])]
        layout = go.Layout( title=title, xaxis={'title':xlab}, yaxis={'title':ylab}, template='none')
    else:
        traces = []; c=0
        for g in np.unique(data[group_var]):
            c += 1
            dt = data.loc[data[group_var]==g,]
            traces.append(go.Scatter(x=dt[xvar], y=dt[yvar], mode='markers', marker_color=cols[c], name=round(g)))
        layout = go.Layout( title=title, xaxis={'title':xlab}, yaxis={'title':ylab}, template='none', legend_title_text='Trend')
    fig = go.Figure(data=traces, layout=layout)
    return fig

scatter_plot(data=data, group_var='block_number', xvar=('reaction_time_ms', 'mean'), yvar=('status', 'mean'), xlab='Reaction Time (ms)', ylab='% Correct', title='Performance by Reaction Time (RT)')


    

In [14]:
# ------------------------------- function: summmary dataset -------------------------------x

def return_summary_data(ed=ed):
    # ----- NBack -----x
    x = ed.nback_summary_stats.groupby('participant').agg({
        ('status', 'mean'): ['mean'],
        ('reaction_time_ms', 'mean'): ['mean']
    })
    x.columns = ['nback_status', 'nback_reaction_time_ms']
    nback = x 

    # ----- Fitts -----x
    x = ed.fitts_summary_stats[[('participant', ''), ('delta','mean')]].set_index('participant')
    x.columns = ['fitts_mean_deviation']
    fitts = x

    # ----- corsi -----x
    x = ed.corsi_summary_stats[[('participant', ''), ('highest_span','max')]].set_index('participant')
    x.columns = ['corsi_block_span']
    corsi = x

    # ----- Navon -----x
    x = ed.navon_summary_stats[[('participant', ''), ('level_of_target',''), ('correct','mean'), ('reaction_time_ms', 'mean')]].set_index('participant')
    x.columns = ['navon_level_of_target', 'navon_perc_correct', 'navon_reaction_time_ms']
    navon = x

    # ----- wcst ----x
    def compute_performance_trial_bins(n_bins=10, wcst_data=ed.raw.wcst_data):
        """Return: DataFrame capturing the performance per n_bins triasl"""

        # ---- add trial number ----x
        xx = []; df = wcst_data
        [xx.append((i%100)+1) for i in range(df.shape[0])]
        df['trial_no'] = xx 

        # ---- status==1 --> correct
        t = np.linspace(0,100,num=n_bins+1).tolist(); c=0

        for tt in t[1:]:
            c +=1
            x = df.loc[df['trial_no'] < tt,].groupby(['participant', 'status']).agg({
            'participant':              ['count'],
            'reaction_time_ms':         ['mean', 'std'],
            'perseverance_error':       ['mean'],
            'not_perseverance_error':   ['mean']
            }).reset_index()
            x['percentages'] = x[('participant', 'count')]/tt
            x['trials']      = str(round(t[c-1])) + '-' + str(round(t[c]))
            x['trials_2']    = t[c]
            if c==1:    data=x
            else:       data=data.append(other=x)

        # if x>0 --> perseverance_error > not_perseverance_error --> main error=perseverance_error
        data['main_error'] = np.where(data['perseverance_error'] - data['not_perseverance_error'] > 0, 'perserverance errors', 'non perserverance errors')

        return(data)

    x = compute_performance_trial_bins(n_bins=1)
    x = x.loc[x['status']==1, [('participant', ''), ('reaction_time_ms', 'mean'), ('percentages', '')]].set_index('participant')
    x.columns = ['wcst_RT', 'wcst_accuracy']
    wcst = x

    # ---- demograpics ----x
    x = ed.demographics.set_index('participant')
    x.columns = ['demographics_' + xx for xx in x.columns]
    demo = x


    # ---- Join ----x
    df = nback
    for d in [nback, fitts, corsi, navon, wcst, demo][1:]:
        df = df.join(d, how='outer')

    # ------ discrete vars ------x
    categorical_vars = ['navon_level_of_target', 'demographics_gender_a','demographics_handedness_a', 'demographics_education_a','demographics_age_group']

    # ------ continuous vars ------x
    continuous_vars  = ['nback_status', 'nback_reaction_time_ms', 'fitts_mean_deviation', 'corsi_block_span', 'navon_perc_correct', 
                        'navon_reaction_time_ms', 'wcst_RT', 'wcst_accuracy', 'demographics_age_a', 'demographics_income_a', 
                        'demographics_computer_hours_a', 'demographics_mean_reation_time_ms']

    # ----- fix datatypes -----x
    df[categorical_vars] = df[categorical_vars].astype('string')
    df[continuous_vars]  = df[continuous_vars].astype('float')

    return df


df = return_summary_data()
df.head()


Unnamed: 0_level_0,nback_status,nback_reaction_time_ms,fitts_mean_deviation,corsi_block_span,navon_level_of_target,navon_perc_correct,navon_reaction_time_ms,wcst_RT,wcst_accuracy,demographics_age_a,demographics_gender_a,demographics_handedness_a,demographics_education_a,demographics_income_a,demographics_computer_hours_a,demographics_age_group,demographics_mean_reation_time_ms
participant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
100934.0,0.894444,2218.077778,-5.7,4.0,global,0.166667,664.583333,1661.590361,0.83,28.0,male,right,university,7.0,25.0,25-34,11453.571429
100934.0,0.894444,2218.077778,-5.7,4.0,local,0.461538,597.769231,1661.590361,0.83,28.0,male,right,university,7.0,25.0,25-34,11453.571429
100934.0,0.894444,2218.077778,-5.7,4.0,none,0.48,622.96,1661.590361,0.83,28.0,male,right,university,7.0,25.0,25-34,11453.571429
103322.0,0.916667,2366.011111,-203.05,6.0,global,0.923077,959.769231,1363.674419,0.86,51.0,male,right,university,6.0,82.0,45-54,5294.428571
103322.0,0.916667,2366.011111,-203.05,6.0,local,0.916667,999.75,1363.674419,0.86,51.0,male,right,university,6.0,82.0,45-54,5294.428571


# Plots + Stats 

# ScatterPlot

In [15]:
def scatter_plot(data, xvar, yvar, group_var=False, xlab='', ylab='', title='', cols=px.colors.qualitative.Pastel):

    if not group_var: 
        data = data[[xvar, yvar]].dropna()
        traces = [go.Scatter(x=data[xvar], y=data[yvar], mode='markers', marker_color=cols[0])]
        layout = go.Layout( title=title, xaxis={'title':xlab}, yaxis={'title':ylab}, template='none')
    else:
        data = data[[xvar, yvar, group_var]].dropna()
        traces = []; c=0
        for g in data[group_var].unique():
            c += 1
            dt = data.loc[data[group_var]==g,]
            traces.append(go.Scatter(x=dt[xvar], y=dt[yvar], mode='markers', marker_color=cols[c], name=g))
        layout = go.Layout( title=title, xaxis={'title':xlab}, yaxis={'title':ylab}, template='none', legend_title_text='Trend')
    fig = go.Figure(data=traces, layout=layout)
    return fig

scatter_plot(data=df, group_var='demographics_gender_a', xvar='nback_status', yvar='corsi_block_span', xlab='NBack performance', ylab='Corsi Block Span', title='NBack vs Corsi Performance')

    

In [16]:
scatter_plot(data=df, group_var='demographics_education_a', xvar='wcst_RT', yvar='wcst_accuracy', title='WCST Performance vs WCST RT', cols=px.colors.qualitative.Safe)

In [17]:
# px.colors.qualitative.swatches()

In [18]:
from plotly.colors import n_colors
from plotly.subplots import make_subplots

import plotly.express as px



def distribution_plot(data, xvar, nbinsx=10, opacity=1, group_var=False, xlab='', ylab='', title='', cols=['#A56CC1', '#A6ACEC', '#63F5EF', 'steelblue', 'darkblue']):
    """Distribution of Variable 1"""
    if title=='': 
        if group_var: title = 'Distribution of ' + str(xvar) + ' by ' + str(group_var)
        else: title = 'Distribution of ' + str(xvar)

    if not group_var: 
        data   = data[[xvar]].dropna()
        traces = [go.Histogram(x=data[xvar], marker_color=cols[0], nbinsx=nbinsx, opacity=opacity)]
        layout = go.Layout(title=title, xaxis={'title':xlab}, yaxis={'title':ylab}, template='none')
        fig    = go.Figure(data=traces, layout=layout)
        return fig
    else:
        fig = make_subplots(rows=2, cols=1, subplot_titles=('', ''))
        data   = data[[xvar, group_var]].dropna()
        traces = []; c=0; RTs = []
        for g in data[group_var].unique():
            dt = data.loc[data[group_var]==g,]
            RTs.append(dt[xvar])


        
            c += 1
            fig.add_trace(go.Histogram(x=dt[xvar], nbinsx=nbinsx, marker_color=cols[c], name=g, opacity=opacity), row=2, col=1)

        # ---- sort lists ----x
        # RTs.append(dt[xvar])
        srt = np.argsort([np.mean(r) for r in RTs])
        RT  = [RTs[s] for s in srt]

        # ---- create figure: violin plots ----x
        c=-1
        for nm, rt in zip(data[group_var].unique(), RT):
            c+=1
            fig.add_trace(go.Violin(
                showlegend=False, y=rt, name=nm, box_visible=True,
                meanline_visible=True, fillcolor=cols[c], line_color=cols[-1]), row=1, col=1)
        
        
        fig.update_layout(title_text=title, height=700, template='none')
        return fig

    

    

    


distribution_plot(data=df, nbinsx=50, xvar='wcst_RT', group_var='demographics_education_a')

# Anova 

- Var X vs Groups


# Summary Stats
mean + std per grou[]

In [19]:
# =========================================== ANOVA == REACTION TIME ======================================================x
def ANOVA(data, group_var, value_var):
    # Create ANOVA backbone table
    raw_data = [['Between Groups', '', '', '', '', '', ''], ['Within Groups', '', '', '', '', '', ''], ['Total', '', '', '', '', '', '']] 
    anova_table = pd.DataFrame(raw_data, columns = ['Source of Variation', 'SS', 'df', 'MS', 'F', 'P-value', 'F crit']) 
    anova_table.set_index('Source of Variation', inplace = True)

    # calculate SSTR and update anova table
    x_bar = data[value_var].mean()
    SSTR = data.groupby(group_var).count() * (data.groupby(group_var).mean() - x_bar)**2
    anova_table['SS']['Between Groups'] = SSTR[value_var].sum()

    # calculate SSE and update anova table
    SSE = (data.groupby(group_var).count() - 1) * data.groupby(group_var).std()**2
    anova_table['SS']['Within Groups'] = SSE[value_var].sum()

    # calculate SSTR and update anova table
    SSTR = SSTR[value_var].sum() + SSE[value_var].sum()
    anova_table['SS']['Total'] = SSTR

    # update degree of freedom
    anova_table['df']['Between Groups'] = data[group_var].nunique() - 1
    anova_table['df']['Within Groups'] = data.shape[0] - data[group_var].nunique()
    anova_table['df']['Total'] = data.shape[0] - 1

    # calculate MS
    anova_table['MS'] = anova_table['SS'] / anova_table['df']

    # calculate F 
    F = anova_table['MS']['Between Groups'] / anova_table['MS']['Within Groups']
    anova_table['F']['Between Groups'] = F

    # p-value
    anova_table['P-value']['Between Groups'] = 1 - stats.f.cdf(F, anova_table['df']['Between Groups'], anova_table['df']['Within Groups'])

    # F critical 
    alpha = 0.05
    # possible types "right-tailed, left-tailed, two-tailed"
    tail_hypothesis_type = "two-tailed"
    if tail_hypothesis_type == "two-tailed":
        alpha /= 2
    anova_table['F crit']['Between Groups'] = stats.f.ppf(1-alpha, anova_table['df']['Between Groups'], anova_table['df']['Within Groups'])

    # Final ANOVA Table
    return(anova_table)

In [20]:
ANOVA(data=df, value_var='wcst_RT', group_var='demographics_education_a')

Unnamed: 0_level_0,SS,df,MS,F,P-value,F crit
Source of Variation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Between Groups,9265103.695754,2,4632551.847877,17.51658,0.0,3.705504
Within Groups,217127142.647837,821,264466.678012,,,
Total,226392246.343591,823,275081.708801,,,


# Summary Stats

Per group compute the:

- mean
- std
- mean RT

for each task:

- wcst
- navon 
- corsi
- nback
- fitts

In [22]:
df.columns

Index(['nback_status', 'nback_reaction_time_ms', 'fitts_mean_deviation',
       'corsi_block_span', 'navon_level_of_target', 'navon_perc_correct',
       'navon_reaction_time_ms', 'wcst_RT', 'wcst_accuracy',
       'demographics_age_a', 'demographics_gender_a',
       'demographics_handedness_a', 'demographics_education_a',
       'demographics_income_a', 'demographics_computer_hours_a',
       'demographics_age_group', 'demographics_mean_reation_time_ms'],
      dtype='object')

In [27]:
def compute_summary_stats(data=df, value_var='wcst_RT', group_var='demographics_education_a'):
    return data.groupby(group_var).agg({
        'wcst_accuracy':            ['mean', 'std'],
        'wcst_RT':                  'mean',
        'navon_perc_correct':       ['mean', 'std'],
        'navon_reaction_time_ms':   'mean',
        'nback_status':             ['mean', 'std'],
        'nback_reaction_time_ms':   'mean',
        'fitts_mean_deviation':     ['mean', 'std'],
        'corsi_block_span':         ['mean', 'std']    
        })

compute_summary_stats()

Unnamed: 0_level_0,wcst_accuracy,wcst_accuracy,wcst_RT,navon_perc_correct,navon_perc_correct,navon_reaction_time_ms,nback_status,nback_status,nback_reaction_time_ms,fitts_mean_deviation,fitts_mean_deviation,corsi_block_span,corsi_block_span
Unnamed: 0_level_1,mean,std,mean,mean,std,mean,mean,std,mean,mean,std,mean,std
demographics_education_a,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
graduate school,0.666383,0.194748,1792.95086,0.586568,0.340555,1382.407996,0.72003,0.184929,2209.18976,-431.330874,444.327909,2.755435,2.761953
high school,0.767254,0.137067,1460.116058,0.76342,0.211709,958.050568,0.801359,0.145566,2337.030935,-224.847482,260.012799,5.179856,2.058041
university,0.6906,0.181455,1690.271606,0.646279,0.304329,1142.875148,0.753364,0.181956,2192.409994,-367.324623,376.164444,3.5025,2.806186


In [7]:
ed.summary_table.columns

Index(['nback_status', 'nback_reaction_time_ms', 'fitts_mean_deviation',
       'corsi_block_span', 'navon_level_of_target', 'navon_perc_correct',
       'navon_reaction_time_ms', 'wcst_RT', 'wcst_accuracy',
       'demographics_age_a', 'demographics_gender_a',
       'demographics_handedness_a', 'demographics_education_a',
       'demographics_income_a', 'demographics_computer_hours_a',
       'demographics_age_group', 'demographics_mean_reation_time_ms'],
      dtype='object')

In [4]:
def compute_summary_stats(data, value_var='wcst_RT', group_var='demographics_education_a'):
        return data.groupby(group_var).agg({
            'wcst_accuracy':            ['mean', 'std'],
            'wcst_RT':                  'mean',
            'navon_perc_correct':       ['mean', 'std'],
            'navon_reaction_time_ms':   'mean',
            'nback_status':             ['mean', 'std'],
            'nback_reaction_time_ms':   'mean',
            'fitts_mean_deviation':     ['mean', 'std'],
            'corsi_block_span':         ['mean', 'std']    
            })

In [63]:
x = compute_summary_stats(data=ed.summary_table)

x


Unnamed: 0,demographics_education_a,wcst_accuracy mean,wcst_accuracy std,wcst_RT mean,navon_perc_correct mean,navon_perc_correct std,navon_reaction_time_ms mean,nback_status mean,nback_status std,nback_reaction_time_ms mean,fitts_mean_deviation mean,fitts_mean_deviation std,corsi_block_span mean,corsi_block_span std
0,graduate school,0.666383,0.194748,1792.95086,0.586568,0.340555,1382.407996,0.72003,0.184929,2209.18976,-431.330874,444.327909,2.755435,2.761953
1,high school,0.767254,0.137067,1460.116058,0.76342,0.211709,958.050568,0.801359,0.145566,2337.030935,-224.847482,260.012799,5.179856,2.058041
2,university,0.6906,0.181455,1690.271606,0.646279,0.304329,1142.875148,0.753364,0.181956,2192.409994,-367.324623,376.164444,3.5025,2.806186


ValueError: too many values to unpack (expected 2)

Unnamed: 0,demographics_education_a,wcst_accuracy mean,wcst_accuracy std,wcst_RT mean,navon_perc_correct mean,navon_perc_correct std,navon_reaction_time_ms mean,nback_status mean,nback_status std,nback_reaction_time_ms mean,fitts_mean_deviation mean,fitts_mean_deviation std,corsi_block_span mean,corsi_block_span std
0,graduate school,0.666383,0.194748,1792.95086,0.586568,0.340555,1382.407996,0.72003,0.184929,2209.18976,-431.330874,444.327909,2.755435,2.761953
1,high school,0.767254,0.137067,1460.116058,0.76342,0.211709,958.050568,0.801359,0.145566,2337.030935,-224.847482,260.012799,5.179856,2.058041
2,university,0.6906,0.181455,1690.271606,0.646279,0.304329,1142.875148,0.753364,0.181956,2192.409994,-367.324623,376.164444,3.5025,2.806186


In [55]:
x.columns.get_level_values(1)

Index(['mean', 'std', 'mean', 'mean', 'std', 'mean', 'mean', 'std', 'mean',
       'mean', 'std', 'mean', 'std'],
      dtype='object')

In [23]:
# [{"name": i, "id": i} for i in x.columns]
x

# pd.MultiIndex.to_flat_in/dex(x)

x.loc[len(x)] = x

KeyError: 3

In [None]:
from plotly.colors import n_colors

def violin_distribution_plot(
    group_var=('participant',''), groups=participants, value_var=('reaction_time_ms', 'mean'), 
    colors=n_colors('rgb(200, 10, 10)', 'rgb(5, 200, 200)', 10, colortype='rgb'),
    title='Reaction Time Distributions', xaxis={'title':'participants'}, yaxis={'title':'Reaction Times'}, template='plotly_white', 
    legend_title_text='Participant', width=700, height=500):
    
    # ---- extract data ----x
    RTs = []
    for g in groups:
        x = data.loc[data[group_var]==g,][value_var]
        RTs.append(x)

    # ---- sort lists ----x
    srt = np.argsort([np.mean(r) for r in RTs])
    RT = [RTs[s] for s in srt]

    # ---- create figure: violin plots ----x
    traces = []
    for rt in range(len(RT)):
        traces.append(go.Violin(y=RT[rt],
                                name=groups[rt],
                                box_visible=True,
                                meanline_visible=True))

    layout = go.Layout(title=title, xaxis=xaxis, yaxis=yaxis, template=template, legend_title_text=legend_title_text, width=width, height=height)
    fig = go.Figure(data=traces, layout=layout)

    # ---- create figure: distribution plots ----x
    fig_2 = go.Figure()
    for data_line, color, grp in zip(RT, colors, groups):
        fig_2.add_trace(go.Violin(x=data_line, line_color=color, name=grp))


    fig_2.update_traces(orientation='h', side='positive', width=3, points=False)
    fig_2.update_layout(xaxis_showgrid=False, xaxis_zeroline=False, title=title, yaxis=xaxis, xaxis=yaxis, template=template, legend_title_text=legend_title_text, width=width, height=height)


    return({'data':zip(groups, RTs), 'violin_plot':fig, 'distribution_plot':fig_2})


In [15]:
# ---------------- demographic data 
demo_pie_map = {
    'gender_a':     {'dummy_var':'gender_a',        'labels':['male', 'female', 'other'],                       'colors':['steelblue', 'darkred', 'cyan'],                                          'title':'Gender Distribution',     'name':'gender'},
    'education_a':  {'dummy_var':'education_a',     'labels':['university', 'graduate school', 'high school'],  'colors':['rgb(177, 127, 38)', 'rgb(129, 180, 179)', 'rgb(205, 152, 36)'],  'title':'Education Distribution',   'name':'education'},
    'handedness_a': {'dummy_var':'handedness_a',    'labels':['right', 'left', 'ambidextrous'],                 'colors':px.colors.sequential.RdBu,                                         'title':'Handedness Distribution',  'name':'handedness'},
    'age_group':    {'dummy_var':'age_group',       'labels':np.unique(ed.demographics[['age_group']]).tolist(),'colors':px.colors.sequential.GnBu,                                         'title':'Age Distribution',         'name':'age'}
    }
    
demo_continuous_naming = {
     'age_a':                   {'xlab':'Age',                      'ylab':'Count', 'name':'Age Distribution by '},
     'income_a':                {'xlab':'Income',                   'ylab':'Count', 'name':'Income Distribution by '},
     'computer_hours_a':        {'xlab':'Computer hours',           'ylab':'Count', 'name':'Computer Hours Distribution by '},
     'mean_reation_time_ms':    {'xlab':'RT (reaction time (ms))',  'ylab':'Count', 'name':'RT Distribution by '},
}

In [16]:
# ----- all categories descriptors -----x
cats_demographics   = ['gender_a', 'handedness_a', 'education_a', 'age_group']
cats_navon          = [('level_of_target', '')]

# ---- add numerical descriptors ----x
num_demographics = ['age_a','income_a', 'computer_hours_a', 'mean_reation_time_ms']
num_nback = [('block_number', ''), ('score', 'mean'), ('score', 'std'), ('status', 'mean'), ('status', 'std'), ('miss', 'mean'), 
            ('miss', 'std'), ('false_alarm', 'mean'), ('false_alarm', 'std'), ('reaction_time_ms', 'mean'), ('reaction_time_ms', 'std')]
num_navon = [('correct', 'mean'), ('correct',  'std'), ('too_slow', 'mean'), ('too_slow',  'std'), 
            ('reaction_time_ms', 'mean'), ('reaction_time_ms',  'std')]
num_corsi = [('highest_span',  'max'), ('status', 'mean'), ('status',  'std')]
num_fitts = [('delta', 'mean'), ('delta',  'std'), ('status', 'mean')]


categorical_vars = [
    # ------ demographics -----x
    'gender_a', 'education_a', 'handedness_a', 'age_group',
    # ------ Fitts ------x
    'mean_fitts_deviation',
    # ------ Nback ------x
    'nback_status', 'nback_reaction_time_ms',
    
    ]