# Model Free Analysis: WCST

This is a testing notebook: building out the first set of functionality for the *_summary_plots_and_figures.py_* class.

Statistical inquiry into the aggregate behaviour of the *Wisconsin Sorting* & *NBack* Tasks.


---------
```
Zach Wolpe
zachcolinwolpe@gmail.com
29 July 2021
```
---------



# Executive Functions

The additional experiments are provided to gauge executive functions and computer literacy that may distinguish candidates when participating in the WCST & NBack Tasks.

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pickle
import os
import re
import sys
sys.path.append('../process data/')
import scipy.stats as stats
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.figure_factory as ff
import plotly.offline as pyo
import plotly.express as px
from encode_processed_data import encode_data

In [4]:
# with open('../data objects/batch_processing_object.pkl', 'rb') as file2:
#     bp = pickle.load(file2)

# ---- fetch data object ----x
with open('../data objects/batch_processing_object_with_encodings.pkl', 'rb') as file2:
    ed = pickle.load(file2)

   
ed.__dict__.keys()

dict_keys(['raw', 'summary_table', 'fitts_summary_stats', 'corsi_summary_stats', 'navon_summary_stats', 'nback_summary_stats', 'demographics_plot', 'demographics'])

In [5]:
# ed.describe_data()
ed.summary_table.head()

Unnamed: 0_level_0,nback_status,nback_reaction_time_ms,fitts_mean_deviation,corsi_block_span,navon_level_of_target,navon_perc_correct,navon_reaction_time_ms,wcst_RT,wcst_accuracy,demographics_age_a,demographics_gender_a,demographics_handedness_a,demographics_education_a,demographics_income_a,demographics_computer_hours_a,demographics_age_group,demographics_mean_reation_time_ms
participant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
100934.0,0.894444,2218.077778,-5.7,4.0,global,0.166667,664.583333,1661.590361,0.83,28.0,male,right,university,7.0,25.0,25-34,11453.571429
100934.0,0.894444,2218.077778,-5.7,4.0,local,0.461538,597.769231,1661.590361,0.83,28.0,male,right,university,7.0,25.0,25-34,11453.571429
100934.0,0.894444,2218.077778,-5.7,4.0,none,0.48,622.96,1661.590361,0.83,28.0,male,right,university,7.0,25.0,25-34,11453.571429
103322.0,0.916667,2366.011111,-203.05,6.0,global,0.923077,959.769231,1363.674419,0.86,51.0,male,right,university,6.0,82.0,45-54,5294.428571
103322.0,0.916667,2366.011111,-203.05,6.0,local,0.916667,999.75,1363.674419,0.86,51.0,male,right,university,6.0,82.0,45-54,5294.428571


In [6]:
ed.clean_data_info()



                WCST - Wisconsin Card Sorting Task                                                  DataFrame: ed.raw.wcst_date
            ---------------------------------------------------------------------------------------------------------------------------
            
                participant                     : key               : participant ID
                card_no                         : categorical       : the card shown
                correct_card                    : categorical       : the card that should be clicked of the top four on screen      
                correct_persevering             : categorical       : the card that would be clicked if the participant is persevering
                seq_no                          : numeric           : trial number
                rule                            : categorical       : matching rule  
                card_shape                      : categorical       : current card shape
                card_num

In [7]:
# ----- all categories descriptors -----x
cats_demographics   = ['gender_a', 'handedness_a', 'education_a', 'age_group']
cats_navon          = [('level_of_target', '')]

# ---- add numerical descriptors ----x
num_demographics = ['age_a','income_a', 'computer_hours_a', 'mean_reation_time_ms']
num_nback = [('block_number', ''), ('score', 'mean'), ('score', 'std'), ('status', 'mean'), ('status', 'std'), ('miss', 'mean'), 
            ('miss', 'std'), ('false_alarm', 'mean'), ('false_alarm', 'std'), ('reaction_time_ms', 'mean'), ('reaction_time_ms', 'std')]
num_navon = [('correct', 'mean'), ('correct',  'std'), ('too_slow', 'mean'), ('too_slow',  'std'), 
            ('reaction_time_ms', 'mean'), ('reaction_time_ms',  'std')]
num_corsi = [('highest_span',  'max'), ('status', 'mean'), ('status',  'std')]
num_fitts = [('delta', 'mean'), ('delta',  'std'), ('status', 'mean')]


# ---- user selected menus ---x

In [8]:
def compute_performance_trial_bins(n_bins=10, wcst_data=ed.raw.wcst_data):
    """Return: DataFrame capturing the performance per n_bins triasl"""

    # ---- add trial number ----x
    xx = []; df = wcst_data
    [xx.append((i%100)+1) for i in range(df.shape[0])]
    df['trial_no'] = xx 

    # ---- status==1 --> correct
    t = np.linspace(0,100,num=n_bins+1).tolist(); c=0

    for tt in t[1:]:
        c +=1
        x = df.loc[df['trial_no'] < tt,].groupby(['participant', 'status']).agg({
        'participant':              ['count'],
        'reaction_time_ms':         ['mean', 'std'],
        'perseverance_error':       ['mean'],
        'not_perseverance_error':   ['mean']
        }).reset_index()
        x['percentages'] = x[('participant', 'count')]/tt
        x['trials']      = str(round(t[c-1])) + '-' + str(round(t[c]))
        x['trials_2']    = t[c]
        if c==1:    data=x
        else:       data=data.append(other=x)

    # if x>0 --> perseverance_error > not_perseverance_error --> main error=perseverance_error
    data['main_error'] = np.where(data['perseverance_error'] - data['not_perseverance_error'] > 0, 'perserverance errors', 'non perserverance errors')

    return(data)

In [9]:
data = compute_performance_trial_bins()
data.head()

Unnamed: 0_level_0,participant,status,participant,reaction_time_ms,reaction_time_ms,perseverance_error,not_perseverance_error,percentages,trials,trials_2,main_error
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,mean,mean,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,100934.0,1,9,2913.666667,1901.24617,0.0,0.0,0.9,0-10,10.0,non perserverance errors
1,103322.0,1,8,1524.125,714.072113,0.0,0.0,0.8,0-10,10.0,non perserverance errors
2,103322.0,2,1,7644.0,,0.0,1.0,0.1,0-10,10.0,non perserverance errors
3,107700.0,1,7,1640.857143,456.098465,0.0,0.0,0.7,0-10,10.0,non perserverance errors
4,107700.0,2,2,3401.5,1221.173411,0.0,1.0,0.2,0-10,10.0,non perserverance errors


In [10]:
# ---- random sample of n participants ----x
def random_participant_sample(n=10, data=data):
    participants = np.random.choice(data[('participant', '')].unique(), n)
    sub = data.loc[data[('participant', '')].isin(participants) & (data['status']==1),:]
    return(sub)

In [14]:
sub = random_participant_sample()
sub.head()

Unnamed: 0_level_0,participant,status,participant,reaction_time_ms,reaction_time_ms,perseverance_error,not_perseverance_error,percentages,trials,trials_2,main_error
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,mean,mean,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
23,139330.0,1,6,1063.0,197.034007,0.0,0.0,0.6,0-10,10.0,non perserverance errors
147,364516.0,1,7,2897.857143,1760.671598,0.0,0.0,0.7,0-10,10.0,non perserverance errors
188,438401.0,1,9,1249.666667,471.590924,0.0,0.0,0.9,0-10,10.0,non perserverance errors
307,652890.0,1,8,1763.625,670.194841,0.0,0.0,0.8,0-10,10.0,non perserverance errors
313,663372.0,1,9,1183.888889,205.293354,0.0,0.0,0.9,0-10,10.0,non perserverance errors


In [12]:
sub = random_participant_sample()
sub.head()

Unnamed: 0_level_0,participant,status,participant,reaction_time_ms,reaction_time_ms,perseverance_error,not_perseverance_error,percentages,trials,trials_2,main_error
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,mean,mean,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
71,223984.0,1,8,1267.125,159.131067,0.0,0.0,0.8,0-10,10.0,non perserverance errors
102,279127.0,1,8,3664.125,2716.807603,0.0,0.0,0.8,0-10,10.0,non perserverance errors
106,291694.0,1,5,2247.0,670.792815,0.0,0.0,0.5,0-10,10.0,non perserverance errors
136,352526.0,1,9,1769.0,667.616656,0.0,0.0,0.9,0-10,10.0,non perserverance errors
164,402320.0,1,8,2784.25,1947.078379,0.0,0.0,0.8,0-10,10.0,non perserverance errors


# Todo

### Visuals | Groups

- Function: WCST Performance plot
- Function: Heatmap plots
- Function: Pie Chart (Group)
- Function: QQ-plots
- Function: ANOVA
- Function: Summary stats (Groups)

### Groups

- Function: Select Groups
- DASH: Select Groups



In [10]:
# ---- random sample of n participants ----x
participants = np.random.choice(data[('participant', '')].unique(), 10)
sub = data.loc[data[('participant', '')].isin(participants) & (data['status']==1),:]

In [11]:

# ---- random sample of n participants ----x
def wcst_performance_plot(data=sub, group_var=('participant', ''), mean_plot=False,
            colours={'perserverance errors':'darkred', 'non perserverance errors': 'steelblue'},
            title='WCST Performance', xaxis={'title':'trails'}, yaxis={'title':'% Correct'}, template='none', legend_title_text='Participant', width=900, height=500):
    
    groups = data[group_var].unique()
    traces = []
    for g in groups:
        df    = data.loc[(data[group_var] == g), ['trials_2', 'percentages', 'main_error', 'reaction_time_ms']]
        df['participant'] = g
        trace = go.Scatter(x=df.trials_2, y=df.percentages, mode='lines+markers', name='{}'.format(g),
                line=dict(color='black'), 
                marker=dict(
                    size=df[('reaction_time_ms','mean')]/100,
                    color=[colours[i] for i in df.main_error],
                    opacity=0.75,
                    line=dict(color='white')))
        traces.append(trace)
    
    if mean_plot:
        s2 = data
        s2['RT'] = s2[('reaction_time_ms','mean')]
        s2 = s2.groupby('trials_2').agg({
            'RT':           ['mean', 'std'],
            'percentages':  ['mean', 'std']}).reset_index()

        df = s2
        g  = 'aggregate'
        df['participant'] = g
        trace = go.Scatter(x=df.trials_2, y=df[('percentages','mean')], mode='lines+markers', name='{}'.format(g),
                line=dict(color='black'), 
                marker=dict(
                    size=df[('RT','mean')]/100,
                    color='darkred',
                    opacity=0.75,
                    line=dict(color='white')))
        traces.append(trace)

    layout  = go.Layout(title=title, xaxis=xaxis, yaxis=yaxis, template=template, legend_title_text=legend_title_text, width=width, height=height)
    fig     = go.Figure(data=traces, layout=layout)
    return(fig)

wcst_fig = wcst_performance_plot(mean_plot=True)
wcst_fig

In [12]:

# ANOVA
# https://towardsdatascience.com/anova-test-with-python-cfbf4013328b


# ----- QQ-plots ------x
fig = make_subplots(rows=3, cols=3)
c=0; r=1
for p in sub[('participant','')].unique()[:9]:
    c+=1
    if c==4:c=1; r+=1
    qq = stats.probplot(sub[sub[('participant', '')]==p]['percentages'], dist="norm")
    x = np.array([qq[0][0][0], qq[0][0][-1]])
    fig.add_scatter(x=qq[0][0], y=qq[0][1], mode='markers', row=r, col=c)
    fig.add_scatter(x=x, y=qq[1][1] + qq[1][0]*x, mode='lines', row=r, col=c)
    fig.layout.update(showlegend=False, template='none', title='QQ probability plots')
fig.show()

In [13]:
from plotly.colors import n_colors

def violin_distribution_plot(
    group_var=('participant',''), groups=participants, value_var=('reaction_time_ms', 'mean'), 
    colors=n_colors('rgb(200, 10, 10)', 'rgb(5, 200, 200)', 10, colortype='rgb'),
    title='Reaction Time Distributions', xaxis={'title':'participants'}, yaxis={'title':'Reaction Times'}, template='plotly_white', 
    legend_title_text='Participant', width=700, height=500):
    
    # ---- extract data ----x
    RTs = []
    for g in groups:
        x = data.loc[data[group_var]==g,][value_var]
        RTs.append(x)

    # ---- sort lists ----x
    srt = np.argsort([np.mean(r) for r in RTs])
    RT = [RTs[s] for s in srt]

    # ---- create figure: violin plots ----x
    traces = []
    for rt in range(len(RT)):
        traces.append(go.Violin(y=RT[rt],
                                name=groups[rt],
                                box_visible=True,
                                meanline_visible=True))

    layout = go.Layout(title=title, xaxis=xaxis, yaxis=yaxis, template=template, legend_title_text=legend_title_text, width=width, height=height)
    fig = go.Figure(data=traces, layout=layout)

    # ---- create figure: distribution plots ----x
    fig_2 = go.Figure()
    for data_line, color, grp in zip(RT, colors, groups):
        fig_2.add_trace(go.Violin(x=data_line, line_color=color, name=grp))


    fig_2.update_traces(orientation='h', side='positive', width=3, points=False)
    fig_2.update_layout(xaxis_showgrid=False, xaxis_zeroline=False, title=title, yaxis=xaxis, xaxis=yaxis, template=template, legend_title_text=legend_title_text, width=width, height=height)


    return({'data':zip(groups, RTs), 'violin_plot':fig, 'distribution_plot':fig_2})


In [14]:
# ---- Reaction Time Plots ----x
RTs = violin_distribution_plot()
RTs['violin_plot']

In [15]:
RTs['distribution_plot']

In [16]:
 performance = violin_distribution_plot(group_var=('participant',''), groups=participants, value_var='percentages', title='Performance Distributions', 
                                        xaxis={'title':'participants'}, yaxis={'title':'% Correct'})
performance['violin_plot']

In [17]:
performance['distribution_plot']

In [18]:
data.tail()

Unnamed: 0_level_0,participant,status,participant,reaction_time_ms,reaction_time_ms,perseverance_error,not_perseverance_error,percentages,trials,trials_2,main_error
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,mean,mean,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
665,996407.0,1,64,1189.859375,482.186484,0.0,0.0,0.64,90-100,100.0,non perserverance errors
666,996407.0,2,35,1165.885714,468.93313,0.114286,0.885714,0.35,90-100,100.0,non perserverance errors
667,998593.0,1,64,2898.1875,1283.695631,0.0,0.0,0.64,90-100,100.0,non perserverance errors
668,998593.0,2,31,3669.064516,1744.578381,0.032258,0.967742,0.31,90-100,100.0,non perserverance errors
669,998593.0,3,4,8994.75,2010.5,0.25,0.75,0.04,90-100,100.0,non perserverance errors


In [19]:


fig = px.density_heatmap(x=data[('reaction_time_ms','mean')], y=data['percentages'], title='Performance by Mean of Reaction Time',  
    labels={
        'x': 'Std Reaction Time',
        'y': '% Correct'}
        )
fig.show()



fig = px.density_heatmap(x=data[('reaction_time_ms','std')], y=data['percentages'], title='Performance by Std of Reaction Time',  
    labels={
        'x': 'Std Reaction Time',
        'y': '% Correct'}
        )
fig.show()

# ANOVA Tests

Test whether or not the mean differences in
- Performance: % correct actions
- Reaction Times: speed of response

between groups differs significantly.

In [20]:
# =========================================== ANOVA == REACTION TIME ======================================================x
def ANOVA(dataframe=sub, group_var=('participant', ''), value_var='percentages'):
    # Create ANOVA backbone table
    raw_data = [['Between Groups', '', '', '', '', '', ''], ['Within Groups', '', '', '', '', '', ''], ['Total', '', '', '', '', '', '']] 
    anova_table = pd.DataFrame(raw_data, columns = ['Source of Variation', 'SS', 'df', 'MS', 'F', 'P-value', 'F crit']) 
    anova_table.set_index('Source of Variation', inplace = True)

    # calculate SSTR and update anova table
    x_bar = sub[value_var].mean()
    SSTR = sub.groupby(group_var).count() * (sub.groupby(group_var).mean() - x_bar)**2
    anova_table['SS']['Between Groups'] = SSTR[value_var].sum()

    # calculate SSE and update anova table
    SSE = (sub.groupby(group_var).count() - 1) * sub.groupby(group_var).std()**2
    anova_table['SS']['Within Groups'] = SSE[value_var].sum()

    # calculate SSTR and update anova table
    SSTR = SSTR[value_var].sum() + SSE[value_var].sum()
    anova_table['SS']['Total'] = SSTR

    # update degree of freedom
    anova_table['df']['Between Groups'] = sub[group_var].nunique() - 1
    anova_table['df']['Within Groups'] = sub.shape[0] - sub[group_var].nunique()
    anova_table['df']['Total'] = sub.shape[0] - 1

    # calculate MS
    anova_table['MS'] = anova_table['SS'] / anova_table['df']

    # calculate F 
    F = anova_table['MS']['Between Groups'] / anova_table['MS']['Within Groups']
    anova_table['F']['Between Groups'] = F

    # p-value
    anova_table['P-value']['Between Groups'] = 1 - stats.f.cdf(F, anova_table['df']['Between Groups'], anova_table['df']['Within Groups'])

    # F critical 
    alpha = 0.05
    # possible types "right-tailed, left-tailed, two-tailed"
    tail_hypothesis_type = "two-tailed"
    if tail_hypothesis_type == "two-tailed":
        alpha /= 2
    anova_table['F crit']['Between Groups'] = stats.f.ppf(1-alpha, anova_table['df']['Between Groups'], anova_table['df']['Within Groups'])

    # Final ANOVA Table
    return(anova_table)

In [21]:
# -------- ANOVA: performance differences -----------x
ANOVA(dataframe=sub, group_var=('participant', ''), value_var='percentages')

Unnamed: 0_level_0,SS,df,MS,F,P-value,F crit
Source of Variation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Between Groups,1.826394,9,0.202933,26.842767,0.0,2.262158
Within Groups,0.665284,88,0.00756,,,
Total,2.491679,97,0.025687,,,


In [22]:
# -------- ANOVA: RT differences -----------x
ANOVA(dataframe=sub, group_var=('participant', ''), value_var=('reaction_time_ms', 'mean'))

Unnamed: 0_level_0,SS,df,MS,F,P-value,F crit
Source of Variation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Between Groups,31798694.776304,9,3533188.308478,78.87207,0.0,2.262158
Within Groups,3942087.108442,88,44796.444414,,,
Total,35740781.884746,97,368461.668915,,,


# Final Datasets

These curated datasets are now joined, pruned & transformed to produce the `final` set of datasets - on which we will conduct our anaylsis.


# Structure

#### wcst


In [23]:
# ed.demographics.head()
# ed.nback_summary_stats.head()
# ed.navon_summary_stats.head()
# ed.corsi_summary_stats.head()
# ed.fitts_summary_stats.head()

In [24]:
# --- demographics dataset ---x
wcst = ed.raw.wcst_data[['participant', 'card_no', 'seq_no', 'rule', 'card_shape', 'card_number', 'card_colour', 'reaction_time_ms', 'status',
                                    'card_selected', 'error','perseverance_error', 'not_perseverance_error']]
wcst.set_index('participant').join(ed.demographics.set_index('participant'))


Unnamed: 0_level_0,card_no,seq_no,rule,card_shape,card_number,card_colour,reaction_time_ms,status,card_selected,error,perseverance_error,not_perseverance_error,age_a,gender_a,handedness_a,education_a,income_a,computer_hours_a,age_group,mean_reation_time_ms
participant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
100934.0,52,1,shape,triangle,1,yellow,4567,1,1,0,0,0,28.0,male,right,university,7.0,25.0,25-34,11453.571429
100934.0,59,2,shape,triangle,3,red,4661,1,3,0,0,0,28.0,male,right,university,7.0,25.0,25-34,11453.571429
100934.0,23,3,shape,cross,2,red,1319,1,2,0,0,0,28.0,male,right,university,7.0,25.0,25-34,11453.571429
100934.0,52,4,shape,triangle,1,yellow,2336,1,1,0,0,0,28.0,male,right,university,7.0,25.0,25-34,11453.571429
100934.0,52,5,shape,triangle,1,yellow,6634,1,1,0,0,0,28.0,male,right,university,7.0,25.0,25-34,11453.571429
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
998593.0,61,2,color,triangle,4,blue,2489,1,4,0,0,0,25.0,female,right,graduate school,8.0,72.0,18-24,9284.714286
998593.0,14,3,color,circle,4,green,2193,1,4,0,0,0,25.0,female,right,graduate school,8.0,72.0,18-24,9284.714286
998593.0,14,4,color,circle,4,green,2310,1,4,0,0,0,25.0,female,right,graduate school,8.0,72.0,18-24,9284.714286
998593.0,23,5,color,cross,2,red,1430,1,2,0,0,0,25.0,female,right,graduate school,8.0,72.0,18-24,9284.714286


In [5]:
from summary_plots_and_figures import summary_plots_and_figures    

# ---- fetch data object ----x
with open('../data objects/batch_processing_object_with_encodings.pkl', 'rb') as file2:
    ed = pickle.load(file2)
spf = summary_plots_and_figures(ed)
spd = ed.summary_table
spd.head()

Unnamed: 0_level_0,nback_status,nback_reaction_time_ms,fitts_mean_deviation,corsi_block_span,navon_level_of_target,navon_perc_correct,navon_reaction_time_ms,wcst_RT,wcst_accuracy,demographics_age_a,demographics_gender_a,demographics_handedness_a,demographics_education_a,demographics_income_a,demographics_computer_hours_a,demographics_age_group,demographics_mean_reation_time_ms
participant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
100934.0,0.894444,2218.077778,-5.7,4.0,global,0.166667,664.583333,1661.590361,0.83,28.0,male,right,university,7.0,25.0,25-34,11453.571429
100934.0,0.894444,2218.077778,-5.7,4.0,local,0.461538,597.769231,1661.590361,0.83,28.0,male,right,university,7.0,25.0,25-34,11453.571429
100934.0,0.894444,2218.077778,-5.7,4.0,none,0.48,622.96,1661.590361,0.83,28.0,male,right,university,7.0,25.0,25-34,11453.571429
103322.0,0.916667,2366.011111,-203.05,6.0,global,0.923077,959.769231,1363.674419,0.86,51.0,male,right,university,6.0,82.0,45-54,5294.428571
103322.0,0.916667,2366.011111,-203.05,6.0,local,0.916667,999.75,1363.674419,0.86,51.0,male,right,university,6.0,82.0,45-54,5294.428571


In [6]:
spf.compute_wcst_performance_trial_bins(10)
spf.wcst_performance

Unnamed: 0_level_0,participant,status,participant,reaction_time_ms,reaction_time_ms,perseverance_error,not_perseverance_error,percentages,trials,trials_2,main_error
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,mean,mean,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,100934.0,1,9,2913.666667,1901.246170,0.000000,0.000000,0.90,0-10,10.0,non perserverance errors
1,103322.0,1,8,1524.125000,714.072113,0.000000,0.000000,0.80,0-10,10.0,non perserverance errors
2,103322.0,2,1,7644.000000,,0.000000,1.000000,0.10,0-10,10.0,non perserverance errors
3,107700.0,1,7,1640.857143,456.098465,0.000000,0.000000,0.70,0-10,10.0,non perserverance errors
4,107700.0,2,2,3401.500000,1221.173411,0.000000,1.000000,0.20,0-10,10.0,non perserverance errors
...,...,...,...,...,...,...,...,...,...,...,...
665,996407.0,1,64,1189.859375,482.186484,0.000000,0.000000,0.64,90-100,100.0,non perserverance errors
666,996407.0,2,35,1165.885714,468.933130,0.114286,0.885714,0.35,90-100,100.0,non perserverance errors
667,998593.0,1,64,2898.187500,1283.695631,0.000000,0.000000,0.64,90-100,100.0,non perserverance errors
668,998593.0,2,31,3669.064516,1744.578381,0.032258,0.967742,0.31,90-100,100.0,non perserverance errors


In [66]:
# def create_performance_bins(nbins=10, data=ed.summary_table)
nbins=10; data=ed.summary_table



n_steps=10
a=aa[0]; b=bb[0]
a,b

def create_performance_groupings(n_steps=10, spd=ed.summary_table):
    aa = ['nback_group', 'fitts_group','corsi_group','navon_group','wcst_group']
    bb = ['nback_status', 'fitts_mean_deviation', 'corsi_block_span', 'navon_perc_correct', 'wcst_accuracy']

    for a, b in zip(aa, bb):
        # ---- groups
        srt=min(spd[b]); stp=max(spd[b]); 
        steps = np.linspace(start=srt, stop=stp, num=n_steps)
        grps  = [str(np.round(steps[i],2)) + '-' + str(np.round(steps[i+1],2)) for i in range(len(steps)-1)]

        spd[a] = 'Na'
        for s in range(n_steps-1):
            spd.loc[(spd[b]>steps[s]) & (spd[b]<=steps[s+1]),a] = grps[s]
    self.summary_table = spd






In [70]:
spd

Unnamed: 0_level_0,nback_status,nback_reaction_time_ms,fitts_mean_deviation,corsi_block_span,navon_level_of_target,navon_perc_correct,navon_reaction_time_ms,wcst_RT,wcst_accuracy,demographics_age_a,...,demographics_education_a,demographics_income_a,demographics_computer_hours_a,demographics_age_group,demographics_mean_reation_time_ms,nback_group,fitts_group,corsi_group,navon_group,wcst_group
participant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
100934.0,0.894444,2218.077778,-5.7,4.0,global,0.166667,664.583333,1661.590361,0.83,28.0,...,university,7.0,25.0,25-34,11453.571429,0.84-0.92,-137.21-43.7,3.0-4.0,0.11-0.22,0.81-0.9
100934.0,0.894444,2218.077778,-5.7,4.0,local,0.461538,597.769231,1661.590361,0.83,28.0,...,university,7.0,25.0,25-34,11453.571429,0.84-0.92,-137.21-43.7,3.0-4.0,0.44-0.56,0.81-0.9
100934.0,0.894444,2218.077778,-5.7,4.0,none,0.48,622.96,1661.590361,0.83,28.0,...,university,7.0,25.0,25-34,11453.571429,0.84-0.92,-137.21-43.7,3.0-4.0,0.44-0.56,0.81-0.9
103322.0,0.916667,2366.011111,-203.05,6.0,global,0.923077,959.769231,1363.674419,0.86,51.0,...,university,6.0,82.0,45-54,5294.428571,0.84-0.92,-318.11--137.21,5.0-6.0,0.89-1.0,0.81-0.9
103322.0,0.916667,2366.011111,-203.05,6.0,local,0.916667,999.75,1363.674419,0.86,51.0,...,university,6.0,82.0,45-54,5294.428571,0.84-0.92,-318.11--137.21,5.0-6.0,0.89-1.0,0.81-0.9
103322.0,0.916667,2366.011111,-203.05,6.0,none,0.88,1377.44,1363.674419,0.86,51.0,...,university,6.0,82.0,45-54,5294.428571,0.84-0.92,-318.11--137.21,5.0-6.0,0.78-0.89,0.81-0.9
107700.0,0.95,2137.211111,-342.65,6.0,global,0.916667,723.916667,1340.134146,0.82,35.0,...,university,4.0,4.0,25-34,4175.714286,0.92-1.0,-499.02--318.11,5.0-6.0,0.89-1.0,0.81-0.9
107700.0,0.95,2137.211111,-342.65,6.0,local,0.923077,698.0,1340.134146,0.82,35.0,...,university,4.0,4.0,25-34,4175.714286,0.92-1.0,-499.02--318.11,5.0-6.0,0.89-1.0,0.81-0.9
107700.0,0.95,2137.211111,-342.65,6.0,none,0.96,768.52,1340.134146,0.82,35.0,...,university,4.0,4.0,25-34,4175.714286,0.92-1.0,-499.02--318.11,5.0-6.0,0.89-1.0,0.81-0.9
117200.0,0.766667,2359.627778,-85.05,4.0,global,0.333333,859.083333,1074.269231,0.78,39.0,...,university,3.0,34.0,35-44,3637.142857,0.76-0.84,-137.21-43.7,3.0-4.0,0.22-0.33,0.71-0.81
