# Model Free Analysis

Statistical inquiry into the aggregate behaviour of the *Wisconsin Sorting* & *NBack* Tasks.


---------
```
Zach Wolpe
zachcolinwolpe@gmail.com
21 July 2021
```
---------



# Executive Functions

The additional experiments are provided to gauge executive functions and computer literacy that may distinguish candidates when participating in the WCST & NBack Tasks.

In [18]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pickle
import os
import re
import sys
sys.path.append('../process data/')
import scipy.stats as stats
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.figure_factory as ff
import plotly.offline as pyo
import plotly.express as px
from encode_processed_data import encode_data

In [None]:
# with open('../data objects/batch_processing_object.pkl', 'rb') as file2:
#     bp = pickle.load(file2)

# ---- fetch data object ----x
with open('../data objects/batch_processing_object_with_encodings.pkl', 'rb') as file2:
    ed = pickle.load(file2)

   
ed.__dict__.keys()

In [2]:
ed.describe_data()

NameError: name 'ed' is not defined

In [None]:
ed.clean_data_info()

In [None]:
# ----- all categories descriptors -----x
cats_demographics   = ['gender_a', 'handedness_a', 'education_a', 'age_group']
cats_navon          = [('level_of_target', '')]

# ---- add numerical descriptors ----x
num_demographics = ['age_a','income_a', 'computer_hours_a', 'mean_reation_time_ms']
num_nback = [('block_number', ''), ('score', 'mean'), ('score', 'std'), ('status', 'mean'), ('status', 'std'), ('miss', 'mean'), 
            ('miss', 'std'), ('false_alarm', 'mean'), ('false_alarm', 'std'), ('reaction_time_ms', 'mean'), ('reaction_time_ms', 'std')]
num_navon = [('correct', 'mean'), ('correct',  'std'), ('too_slow', 'mean'), ('too_slow',  'std'), 
            ('reaction_time_ms', 'mean'), ('reaction_time_ms',  'std')]
num_corsi = [('highest_span',  'max'), ('status', 'mean'), ('status',  'std')]
num_fitts = [('delta', 'mean'), ('delta',  'std'), ('status', 'mean')]


# ---- user selected menus ---x

# Fitts Law

In [None]:
def fitts_law_deviation(n=10, data=ed.fitts_summary_stats[[('delta','mean')]]):

    # ---- compute bins ----x
    def compute_fitts_bins(data, n=n):
        rng = np.linspace(min(data[('delta','mean')]), max(data[('delta','mean')]), n)
        data['bin'] = 'na'; data['group'] = 'na'
        for r in range(n-1): 
            data.loc[(data[('delta','mean')] > rng[r]) & (data[('delta','mean')] <= rng[r+1]), 'bin']   = str(round(rng[r+1])) + '-' + str(round(rng[r])) 
            data.loc[(data[('delta','mean')] > rng[r]) & (data[('delta','mean')] <= rng[r+1]), 'group'] = r+1
        return(data)
    data = compute_fitts_bins(data=data)
    
    # ---- plot ----x
    trace  = go.Histogram(x=data[('delta','mean')], marker_color='#4ca3dd')
    layout = go.Layout( title='Mean Fitts Law Deviation', xaxis={'title':'deviation from expected (Fitts Law Predicted) RT'}, yaxis={'title':'Frequency'}, 
                        template='none', width=700, height=500)
    fig    = go.Figure(data=trace, layout=layout)

    return({'data':data, 'figure':fig})


fitts = fitts_law_deviation()
fitts['figure']

In [None]:
fitts['data'].head()

# Corsi Block Span

In [None]:
data = ed.corsi_summary_stats
data.head()

In [None]:
  # ---- plot ----x
data = ed.corsi_summary_stats
trace = go.Histogram(x=data[('highest_span','max')], marker_color='#c43078')
layout = go.Layout( title='Max corsi block span distribution', xaxis={'title':'Max Corsi Block Span'}, yaxis={'title':'Frequency'}, 
template='none', width=700, height=500)
fig    = go.Figure(data=trace, layout=layout)
fig.show()

#   # ---- plot: % Correct (NA) ----x
# trace = go.Histogram(x=data[('status','mean')], marker_color='#18d4e4')
# layout = go.Layout( title='Max corsi block span distribution', xaxis={'title':'Max Corsi Block Span'}, yaxis={'title':'Frequency'}, 
# template='none', width=700, height=500)
# fig    = go.Figure(data=trace, layout=layout)
# fig.show()


# Navon


In [30]:
data = ed.navon_summary_stats
data.head()

Unnamed: 0_level_0,participant,level_of_target,correct,correct,too_slow,too_slow,reaction_time_ms,reaction_time_ms
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std
0,100934.0,global,0.166667,0.389249,0.0,0.0,664.583333,221.98954
1,100934.0,local,0.461538,0.518875,0.0,0.0,597.769231,294.353856
2,100934.0,none,0.48,0.509902,0.0,0.0,622.96,343.110949
3,103322.0,global,0.923077,0.27735,0.0,0.0,959.769231,358.604739
4,103322.0,local,0.916667,0.288675,0.0,0.0,999.75,221.561453


In [31]:
fig = go.Figure()
cols = {'global':'#0988c4', 'local':'#d60e91', 'none':'#992277'}
traces = []
for t in ['global', 'local']:
    sub = data.loc[data.level_of_target==t,]
    fig.add_trace(go.Histogram(x=sub[('correct','mean')], name=t, marker_color=cols[t]))

fig.update_layout(template='none', title_text='Distribution of performance in Navon Task', xaxis_title_text='% Correct', yaxis_title_text='Freqency')

In [32]:
fig = go.Figure()
cols = {'global':'#0988c4', 'local':'#d60e91', 'none':'#992277'}
traces = []
for t in ['global', 'local', 'none']:
    sub = data.loc[data.level_of_target==t,]
    fig.add_trace(go.Histogram(x=sub[('reaction_time_ms','mean')], name=t, marker_color=cols[t]))

fig.update_layout(template='none', title_text='Distribution of Reaction Time in Navon Task', xaxis_title_text='Reaction Time', yaxis_title_text='Freqency')

# Nback

In [38]:
#         self.demographics           : dataframe
data = ed.nback_summary_stats
data.head()

Unnamed: 0_level_0,participant,block_number,trial_counter,score,score,status,status,miss,miss,false_alarm,false_alarm,reaction_time_ms,reaction_time_ms
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,mean,std,mean,std,mean,std,mean,std
0,100934.0,1,20,0.3,0.470162,0.85,0.366348,0.25,0.444262,0.05,0.223607,2155.4,1185.69808
1,100934.0,2,30,0.3,0.466092,0.9,0.305129,0.266667,0.449776,0.033333,0.182574,2175.133333,1191.591626
2,100934.0,3,30,0.2,0.406838,0.933333,0.253708,0.2,0.406838,0.0,0.0,2323.7,1142.043364
3,103322.0,1,20,0.25,0.444262,0.85,0.366348,0.2,0.410391,0.05,0.223607,2305.1,1090.350542
4,103322.0,2,30,0.3,0.466092,0.933333,0.253708,0.233333,0.430183,0.066667,0.253708,2425.966667,1059.981668


In [39]:

cols = {1:'#0988c4', 2:'#d60e91', 3:'#992277'}

def navon_plots(var=('score','mean'), xaxis_title_text='mean score', yaxis_title_text='Freqency', data=data, cols={1:'#0988c4', 2:'#d60e91', 3:'#992277'}):

    fig = go.Figure()
    traces = []
    for b in data.block_number.unique():
        sub = data.loc[data.block_number==b,]
        fig.add_trace(go.Histogram(x=sub[var], name='block no. ' + str(round(b)), marker_color=cols[b]))

    fig.update_layout(template='none', title_text='Nback Task: Distribution of ' + xaxis_title_text, xaxis_title_text='mean score', yaxis_title_text='freqency')
    fig.show()


In [40]:
data.columns
cols = [(           'score',  'mean'),
(           'score',   'std'),
(          'status',  'mean'),
(          'status',   'std'),
(            'miss',  'mean'),
(            'miss',   'std'),
(     'false_alarm',  'mean'),
(     'false_alarm',   'std'),
('reaction_time_ms',  'mean'),
('reaction_time_ms',   'std')]
for c in cols: 
    if c[1] == 'mean': cols={1:'#11ddee', 2:'#000083', 3:'#f3ac56'}
    else: cols={1:'#11ddee', 2:'#000083', 3:'#dddd77'}
    navon_plots(var=c, xaxis_title_text=c[1] + c[0], cols=cols)

In [41]:
def navon_plots(var='score', xaxis_title_text='mean score', yaxis_title_text='Freqency', data=data, cols={1:'#0988c4', 2:'#d60e91', 3:'#992277'}):

    fig = make_subplots(rows=2, cols=1)
    for b in data.block_number.unique():
        sub = data.loc[data.block_number==b,]
        fig.add_trace(go.Histogram(x=sub[(var, 'mean')], name='block no. ' + str(round(b)), marker_color=cols[b]), row=1, col=1)
        fig.add_trace(go.Histogram(x=sub[(var, 'std')], name='block no. ' + str(round(b)), marker_color=cols[b]), row=2, col=1)

    fig.update_layout(template='none', title_text='Nback Task: Distribution of ' + xaxis_title_text, xaxis_title_text='mean score', yaxis_title_text='freqency')
    fig.show()


navon_plots()

In [29]:
colors = ['#A56CC1', '#A6ACEC', '#63F5EF']

var = 'score'
fig = make_subplots(rows=2, cols=1, subplot_titles=(var+' mean', var+' std'))
hist_data_means = []; hist_data_std = []
group_labels = []
for i in [3,2,1]:
    group_labels.append('block ' + str(i))
    hist_data_means.append(data.loc[data['block_number']==i, (var, 'mean')])
    hist_data_std.append(data.loc[data['block_number']==i, (var, 'std')])


fig2 = ff.create_distplot(hist_data_means, group_labels, colors=colors, show_rug=False, bin_size=0.015)
fig3 = ff.create_distplot(hist_data_std, group_labels, colors=colors, show_rug=False, bin_size=0.015)


# ---- add subplots ----x
for i in [2,1,0]: 
    fig.add_trace(go.Histogram(fig2['data'][i], legendgroup=1), row=1, col=1)
    fig.add_trace(go.Histogram(fig3['data'][i], showlegend=False), row=2, col=1)
for j in [3,4,5]: 
    fig.add_trace(go.Scatter(fig2['data'][j]), row=1, col=1)
    fig.add_trace(go.Scatter(fig3['data'][j]), row=2, col=1)


# Add title
fig.update_layout(title_text='Nback Task Distribution', template='none')
fig.show()

In [43]:
def navon_plots(var='score', data=data, cols=['#A56CC1', '#A6ACEC', '#63F5EF']):

    fig = make_subplots(rows=2, cols=1, subplot_titles=(var+' mean', var+' std'))
    hist_data_means = []; hist_data_std = []
    group_labels = []
    for i in [3,2,1]:
        group_labels.append('block ' + str(i))
        hist_data_means.append(data.loc[data['block_number']==i, (var, 'mean')])
        hist_data_std.append(data.loc[data['block_number']==i, (var, 'std')])


    fig2 = ff.create_distplot(hist_data_means, group_labels, colors=colors, show_rug=False, bin_size=0.015)
    fig3 = ff.create_distplot(hist_data_std, group_labels, colors=colors, show_rug=False, bin_size=0.015)


    # ---- add subplots ----x
    for i in [2,1,0]: 
        fig.add_trace(go.Histogram(fig2['data'][i], legendgroup=1), row=1, col=1)
        fig.add_trace(go.Histogram(fig3['data'][i], showlegend=False), row=2, col=1)
    for j in [3,4,5]: 
        fig.add_trace(go.Scatter(fig2['data'][j]), row=1, col=1)
        fig.add_trace(go.Scatter(fig3['data'][j]), row=2, col=1)


    # Add title
    fig.update_layout(title_text='Nback Task Distribution', template='none')
    fig.show()

In [46]:
['#0988c4', '#d60e91', '#992277']

['score', 'status', 'miss', 'false_alarm','reaction_time_ms']
navon_plots('status')

In [47]:
data.columns


MultiIndex([(     'participant',      ''),
            (    'block_number',      ''),
            (   'trial_counter', 'count'),
            (           'score',  'mean'),
            (           'score',   'std'),
            (          'status',  'mean'),
            (          'status',   'std'),
            (            'miss',  'mean'),
            (            'miss',   'std'),
            (     'false_alarm',  'mean'),
            (     'false_alarm',   'std'),
            ('reaction_time_ms',  'mean'),
            ('reaction_time_ms',   'std')],
           )

In [22]:
fig2['data']

(Histogram({
     'autobinx': False,
     'histnorm': 'probability density',
     'legendgroup': 'block 3',
     'marker': {'color': '#A56CC1'},
     'name': 'block 3',
     'opacity': 0.7,
     'x': array([0.2       , 0.23333333, 0.36666667, ..., 0.2       , 0.36666667,
                 0.36666667]),
     'xaxis': 'x',
     'xbins': {'end': 0.5666666666666667, 'size': 0.015, 'start': 0.13333333333333333},
     'yaxis': 'y'
 }),
 Histogram({
     'autobinx': False,
     'histnorm': 'probability density',
     'legendgroup': 'block 2',
     'marker': {'color': '#A6ACEC'},
     'name': 'block 2',
     'opacity': 0.7,
     'x': array([0.3       , 0.3       , 0.36666667, ..., 0.43333333, 0.33333333,
                 0.33333333]),
     'xaxis': 'x',
     'xbins': {'end': 0.5666666666666667, 'size': 0.015, 'start': 0.13333333333333333},
     'yaxis': 'y'
 }),
 Histogram({
     'autobinx': False,
     'histnorm': 'probability density',
     'legendgroup': 'block 1',
     'marker': {'color': '

In [None]:


# distplot
hist_data = [df['V_1'].values, df['V_2'].values]
group_labels = ['Group 1', 'Group 2']
fig2 = ff.create_distplot(hist_data, group_labels)


# rug / margin plot to immitate ff.create_distplot
df['rug 1'] = 1.1
df['rug 2'] = 1
fig.add_trace(go.Scatter(x=df['V_1'], y = df['rug 1'],
                       mode = 'markers',
                       marker=dict(color = 'blue', symbol='line-ns-open')
                        ), row=2, col=2)

fig.add_trace(go.Scatter(x=df['V_2'], y = df['rug 2'],
                       mode = 'markers',
                       marker=dict(color = 'red', symbol='line-ns-open')
                        ), row=2, col=2)

# some manual adjustments on the rugplot
fig.update_yaxes(range=[0.95,1.15], tickfont=dict(color='rgba(0,0,0,0)', size=14), row=2, col=2)
fig.update_layout(showlegend=False)

fig.show()




In [6]:
def compute_performance_trial_bins(n_bins=10, wcst_data=ed.raw.wcst_data):
    """Return: DataFrame capturing the performance per n_bins triasl"""

    # ---- add trial number ----x
    xx = []; df = wcst_data
    [xx.append((i%100)+1) for i in range(df.shape[0])]
    df['trial_no'] = xx 

    # ---- status==1 --> correct
    t = np.linspace(0,100,num=n_bins+1).tolist(); c=0

    for tt in t[1:]:
        c +=1
        x = df.loc[df['trial_no'] < tt,].groupby(['participant', 'status']).agg({
        'participant':              ['count'],
        'reaction_time_ms':         ['mean', 'std'],
        'perseverance_error':       ['mean'],
        'not_perseverance_error':   ['mean']
        }).reset_index()
        x['percentages'] = x[('participant', 'count')]/tt
        x['trials']      = str(round(t[c-1])) + '-' + str(round(t[c]))
        x['trials_2']    = t[c]
        if c==1:    data=x
        else:       data=data.append(other=x)

    # if x>0 --> perseverance_error > not_perseverance_error --> main error=perseverance_error
    data['main_error'] = np.where(data['perseverance_error'] - data['not_perseverance_error'] > 0, 'perserverance errors', 'non perserverance errors')

    return(data)

In [7]:
data = compute_performance_trial_bins()
data.head()

Unnamed: 0_level_0,participant,status,participant,reaction_time_ms,reaction_time_ms,perseverance_error,not_perseverance_error,percentages,trials,trials_2,main_error
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,mean,mean,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,100934.0,1,9,2913.666667,1901.24617,0.0,0.0,0.9,0-10,10.0,non perserverance errors
1,103322.0,1,8,1524.125,714.072113,0.0,0.0,0.8,0-10,10.0,non perserverance errors
2,103322.0,2,1,7644.0,,0.0,1.0,0.1,0-10,10.0,non perserverance errors
3,107700.0,1,7,1640.857143,456.098465,0.0,0.0,0.7,0-10,10.0,non perserverance errors
4,107700.0,2,2,3401.5,1221.173411,0.0,1.0,0.2,0-10,10.0,non perserverance errors


In [10]:
# ---- random sample of n participants ----x
def random_participant_sample(n=10, data=data):
    participants = np.random.choice(data[('participant', '')].unique(), n)
    sub = data.loc[data[('participant', '')].isin(participants) & (data['status']==1),:]
    return(sub)

In [11]:
sub = random_participant_sample()
sub.head()

Unnamed: 0_level_0,participant,status,participant,reaction_time_ms,reaction_time_ms,perseverance_error,not_perseverance_error,percentages,trials,trials_2,main_error
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,mean,mean,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,103322.0,1,8,1524.125,714.072113,0.0,0.0,0.8,0-10,10.0,non perserverance errors
25,140183.0,1,5,4463.4,2293.294421,0.0,0.0,0.5,0-10,10.0,non perserverance errors
122,316448.0,1,6,4197.0,2955.674475,0.0,0.0,0.6,0-10,10.0,non perserverance errors
155,379824.0,1,6,4170.666667,3086.93276,0.0,0.0,0.6,0-10,10.0,non perserverance errors
291,612204.0,1,7,2601.142857,2632.966478,0.0,0.0,0.7,0-10,10.0,non perserverance errors


# Todo

### Visuals | Groups

- Function: WCST Performance plot
- Function: Heatmap plots
- Function: Pie Chart (Group)
- Function: QQ-plots
- Function: ANOVA
- Function: Summary stats (Groups)

### Groups

- Function: Select Groups
- DASH: Select Groups



In [13]:
# ---- random sample of n participants ----x
participants = np.random.choice(data[('participant', '')].unique(), 10)
sub = data.loc[data[('participant', '')].isin(participants) & (data['status']==1),:]

In [16]:

# ---- random sample of n participants ----x
def wcst_performance_plot(data=sub, group_var=('participant', ''), mean_plot=False,
            colours={'perserverance errors':'darkred', 'non perserverance errors': 'steelblue'},
            title='WCST Performance', xaxis={'title':'trails'}, yaxis={'title':'% Correct'}, template='none', legend_title_text='Participant', width=900, height=500):
    
    groups = data[group_var].unique()
    traces = []
    for g in groups:
        df    = data.loc[(data[group_var] == g), ['trials_2', 'percentages', 'main_error', 'reaction_time_ms']]
        df['participant'] = g
        trace = go.Scatter(x=df.trials_2, y=df.percentages, mode='lines+markers', name='{}'.format(g),
                line=dict(color='black'), 
                marker=dict(
                    size=df[('reaction_time_ms','mean')]/100,
                    color=[colours[i] for i in df.main_error],
                    opacity=0.75,
                    line=dict(color='white')))
        traces.append(trace)
    
    if mean_plot:
        s2 = data
        s2['RT'] = s2[('reaction_time_ms','mean')]
        s2 = s2.groupby('trials_2').agg({
            'RT':           ['mean', 'std'],
            'percentages':  ['mean', 'std']}).reset_index()

        df = s2
        g  = 'aggregate'
        df['participant'] = g
        trace = go.Scatter(x=df.trials_2, y=df[('percentages','mean')], mode='lines+markers', name='{}'.format(g),
                line=dict(color='black'), 
                marker=dict(
                    size=df[('RT','mean')]/100,
                    color='darkred',
                    opacity=0.75,
                    line=dict(color='white')))
        traces.append(trace)

    layout  = go.Layout(title=title, xaxis=xaxis, yaxis=yaxis, template=template, legend_title_text=legend_title_text, width=width, height=height)
    fig     = go.Figure(data=traces, layout=layout)
    return(fig)

wcst_fig = wcst_performance_plot(mean_plot=True)
wcst_fig

In [23]:

# ANOVA
# https://towardsdatascience.com/anova-test-with-python-cfbf4013328b


# ----- QQ-plots ------x
fig = make_subplots(rows=3, cols=3)
c=0; r=1
for p in sub[('participant','')].unique()[:9]:
    c+=1
    if c==4:c=1; r+=1
    qq = stats.probplot(sub[sub[('participant', '')]==p]['percentages'], dist="norm")
    x = np.array([qq[0][0][0], qq[0][0][-1]])
    fig.add_scatter(x=qq[0][0], y=qq[0][1], mode='markers', row=r, col=c)
    fig.add_scatter(x=x, y=qq[1][1] + qq[1][0]*x, mode='lines', row=r, col=c)
    fig.layout.update(showlegend=False, template='none', title='QQ probability plots')
fig.show()

In [27]:
from plotly.colors import n_colors

def violin_distribution_plot(
    group_var=('participant',''), groups=participants, value_var=('reaction_time_ms', 'mean'), 
    colors=n_colors('rgb(200, 10, 10)', 'rgb(5, 200, 200)', 10, colortype='rgb'),
    title='Reaction Time Distributions', xaxis={'title':'participants'}, yaxis={'title':'Reaction Times'}, template='plotly_white', 
    legend_title_text='Participant', width=700, height=500):
    
    # ---- extract data ----x
    RTs = []
    for g in groups:
        x = data.loc[data[group_var]==g,][value_var]
        RTs.append(x)

    # ---- sort lists ----x
    srt = np.argsort([np.mean(r) for r in RTs])
    RT = [RTs[s] for s in srt]

    # ---- create figure: violin plots ----x
    traces = []
    for rt in range(len(RT)):
        traces.append(go.Violin(y=RT[rt],
                                name=groups[rt],
                                box_visible=True,
                                meanline_visible=True))

    layout = go.Layout(title=title, xaxis=xaxis, yaxis=yaxis, template=template, legend_title_text=legend_title_text, width=width, height=height)
    fig = go.Figure(data=traces, layout=layout)

    # ---- create figure: distribution plots ----x
    fig_2 = go.Figure()
    for data_line, color, grp in zip(RT, colors, groups):
        fig_2.add_trace(go.Violin(x=data_line, line_color=color, name=grp))


    fig_2.update_traces(orientation='h', side='positive', width=3, points=False)
    fig_2.update_layout(xaxis_showgrid=False, xaxis_zeroline=False, title=title, yaxis=xaxis, xaxis=yaxis, template=template, legend_title_text=legend_title_text, width=width, height=height)


    return({'data':zip(groups, RTs), 'violin_plot':fig, 'distribution_plot':fig_2})


In [28]:
# ---- Reaction Time Plots ----x
RTs = violin_distribution_plot()
RTs['violin_plot']

In [29]:
RTs['distribution_plot']

In [30]:
 performance = violin_distribution_plot(group_var=('participant',''), groups=participants, value_var='percentages', title='Performance Distributions', 
                                        xaxis={'title':'participants'}, yaxis={'title':'% Correct'})
performance['violin_plot']

In [31]:
performance['distribution_plot']

In [32]:
data.tail()

Unnamed: 0_level_0,participant,status,participant,reaction_time_ms,reaction_time_ms,perseverance_error,not_perseverance_error,percentages,trials,trials_2,main_error
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,mean,mean,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
665,996407.0,1,64,1189.859375,482.186484,0.0,0.0,0.64,90-100,100.0,non perserverance errors
666,996407.0,2,35,1165.885714,468.93313,0.114286,0.885714,0.35,90-100,100.0,non perserverance errors
667,998593.0,1,64,2898.1875,1283.695631,0.0,0.0,0.64,90-100,100.0,non perserverance errors
668,998593.0,2,31,3669.064516,1744.578381,0.032258,0.967742,0.31,90-100,100.0,non perserverance errors
669,998593.0,3,4,8994.75,2010.5,0.25,0.75,0.04,90-100,100.0,non perserverance errors


In [33]:


fig = px.density_heatmap(x=data[('reaction_time_ms','mean')], y=data['percentages'], title='Performance by Mean of Reaction Time',  
    labels={
        'x': 'Std Reaction Time',
        'y': '% Correct'}
        )
fig.show()



fig = px.density_heatmap(x=data[('reaction_time_ms','std')], y=data['percentages'], title='Performance by Std of Reaction Time',  
    labels={
        'x': 'Std Reaction Time',
        'y': '% Correct'}
        )
fig.show()

# ANOVA Tests

Test whether or not the mean differences in
- Performance: % correct actions
- Reaction Times: speed of response

between groups differs significantly.

In [34]:
# =========================================== ANOVA == REACTION TIME ======================================================x
def ANOVA(dataframe=sub, group_var=('participant', ''), value_var='percentages'):
    # Create ANOVA backbone table
    raw_data = [['Between Groups', '', '', '', '', '', ''], ['Within Groups', '', '', '', '', '', ''], ['Total', '', '', '', '', '', '']] 
    anova_table = pd.DataFrame(raw_data, columns = ['Source of Variation', 'SS', 'df', 'MS', 'F', 'P-value', 'F crit']) 
    anova_table.set_index('Source of Variation', inplace = True)

    # calculate SSTR and update anova table
    x_bar = sub[value_var].mean()
    SSTR = sub.groupby(group_var).count() * (sub.groupby(group_var).mean() - x_bar)**2
    anova_table['SS']['Between Groups'] = SSTR[value_var].sum()

    # calculate SSE and update anova table
    SSE = (sub.groupby(group_var).count() - 1) * sub.groupby(group_var).std()**2
    anova_table['SS']['Within Groups'] = SSE[value_var].sum()

    # calculate SSTR and update anova table
    SSTR = SSTR[value_var].sum() + SSE[value_var].sum()
    anova_table['SS']['Total'] = SSTR

    # update degree of freedom
    anova_table['df']['Between Groups'] = sub[group_var].nunique() - 1
    anova_table['df']['Within Groups'] = sub.shape[0] - sub[group_var].nunique()
    anova_table['df']['Total'] = sub.shape[0] - 1

    # calculate MS
    anova_table['MS'] = anova_table['SS'] / anova_table['df']

    # calculate F 
    F = anova_table['MS']['Between Groups'] / anova_table['MS']['Within Groups']
    anova_table['F']['Between Groups'] = F

    # p-value
    anova_table['P-value']['Between Groups'] = 1 - stats.f.cdf(F, anova_table['df']['Between Groups'], anova_table['df']['Within Groups'])

    # F critical 
    alpha = 0.05
    # possible types "right-tailed, left-tailed, two-tailed"
    tail_hypothesis_type = "two-tailed"
    if tail_hypothesis_type == "two-tailed":
        alpha /= 2
    anova_table['F crit']['Between Groups'] = stats.f.ppf(1-alpha, anova_table['df']['Between Groups'], anova_table['df']['Within Groups'])

    # Final ANOVA Table
    return(anova_table)

In [35]:
# -------- ANOVA: performance differences -----------x
ANOVA(dataframe=sub, group_var=('participant', ''), value_var='percentages')

Unnamed: 0_level_0,SS,df,MS,F,P-value,F crit
Source of Variation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Between Groups,1.957704,9,0.217523,33.041704,0.0,2.260442
Within Groups,0.585912,89,0.006583,,,
Total,2.543616,98,0.025955,,,


In [36]:
# -------- ANOVA: RT differences -----------x
ANOVA(dataframe=sub, group_var=('participant', ''), value_var=('reaction_time_ms', 'mean'))

Unnamed: 0_level_0,SS,df,MS,F,P-value,F crit
Source of Variation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Between Groups,50791142.852179,9,5643460.316909,56.45756,0.0,2.260442
Within Groups,8896381.040156,89,99959.33753,,,
Total,59687523.892335,98,609056.366248,,,


# Final Datasets

These curated datasets are now joined, pruned & transformed to produce the `final` set of datasets - on which we will conduct our anaylsis.


# Structure

#### wcst


In [185]:
# ed.demographics.head()
# ed.nback_summary_stats.head()
# ed.navon_summary_stats.head()
# ed.corsi_summary_stats.head()
# ed.fitts_summary_stats.head()

Unnamed: 0_level_0,participant,delta,delta,status,status
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
0,100934.0,-5.7,186.60713,1.0,0.0
1,103322.0,-203.05,174.3084,1.0,0.0
2,107700.0,-342.65,345.637059,1.1,0.447214
3,117200.0,-85.05,156.117188,1.0,0.0
4,117306.0,-141.8,167.808036,1.0,0.0


In [152]:
# --- demographics dataset ---x
wcst = ed.raw.wcst_data[['participant', 'card_no', 'seq_no', 'rule', 'card_shape', 'card_number', 'card_colour', 'reaction_time_ms', 'status',
                                    'card_selected', 'error','perseverance_error', 'not_perseverance_error']]
wcst.set_index('participant').join(ed.demographics.set_index('participant'))


Unnamed: 0_level_0,card_no,seq_no,rule,card_shape,card_number,card_colour,reaction_time_ms,status,card_selected,error,perseverance_error,not_perseverance_error,age_a,gender_a,handedness_a,education_a,income_a,computer_hours_a,age_group,mean_reation_time_ms
participant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
100934.0,52,1,shape,triangle,1,yellow,4567,1,1,0,0,0,28.0,male,right,university,7.0,25.0,25-34,11453.571429
100934.0,59,2,shape,triangle,3,red,4661,1,3,0,0,0,28.0,male,right,university,7.0,25.0,25-34,11453.571429
100934.0,23,3,shape,cross,2,red,1319,1,2,0,0,0,28.0,male,right,university,7.0,25.0,25-34,11453.571429
100934.0,52,4,shape,triangle,1,yellow,2336,1,1,0,0,0,28.0,male,right,university,7.0,25.0,25-34,11453.571429
100934.0,52,5,shape,triangle,1,yellow,6634,1,1,0,0,0,28.0,male,right,university,7.0,25.0,25-34,11453.571429
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
998593.0,61,2,color,triangle,4,blue,2489,1,4,0,0,0,25.0,female,right,graduate school,8.0,72.0,18-24,9284.714286
998593.0,14,3,color,circle,4,green,2193,1,4,0,0,0,25.0,female,right,graduate school,8.0,72.0,18-24,9284.714286
998593.0,14,4,color,circle,4,green,2310,1,4,0,0,0,25.0,female,right,graduate school,8.0,72.0,18-24,9284.714286
998593.0,23,5,color,cross,2,red,1430,1,2,0,0,0,25.0,female,right,graduate school,8.0,72.0,18-24,9284.714286
