# Model Free Analysis

Statistical inquiry into the aggregate behaviour of the *Wisconsin Sorting* & *NBack* Tasks.


---------
```
Zach Wolpe
zachcolinwolpe@gmail.com
17 July 2021
```
---------



# Executive Functions

The additional experiments are provided to gauge executive functions and computer literacy that may distinguish candidates when participating in the WCST & NBack Tasks.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pickle
import os
import re
import sys
sys.path.append('../process data/')
import plotly.graph_objs as go
import plotly.offline as pyo
import plotly.express as px
from encode_processed_data import encode_data

In [2]:
# with open('../data objects/batch_processing_object.pkl', 'rb') as file2:
#     bp = pickle.load(file2)

# ---- fetch data object ----x
with open('../data objects/batch_processing_object_with_encodings.pkl', 'rb') as file2:
    ed = pickle.load(file2)

   
ed.__dict__.keys()

dict_keys(['raw', 'fitts_summary_stats', 'corsi_summary_stats', 'navon_summary_stats', 'nback_summary_stats', 'demographics_plot', 'demographics'])

In [3]:
ed.describe_data()



        ------------------------------------------------------------------
            self.path            : raw data loc
            self.metadata        : mturk metadata
            self.mapping         : reference table
            self.data_times      : reference times table
            self.participants    : list of participant identifiers
            self.parti_code      : list of participant codes
            self.n               : total number of samples
            self.wcst_paths      : paths to wcst  raw data
            self.nback_paths     : paths to nback raw data
            self.corsi_paths     : paths to corsi raw data
            self.fitts_paths     : paths to fitts raw data
            self.navon_paths     : paths to navon raw data
            self.wcst_data       : wcst  dataframe
            self.nback_data      : nback dataframe
            self.corsi_data      : corsi dataframe
            self.fitts_data      : fitts dataframe
            self.navon_data    

In [4]:
ed.clean_data_info()



                WCST - Wisconsin Card Sorting Task                                                  DataFrame: ed.raw.wcst_date
            ---------------------------------------------------------------------------------------------------------------------------
            
                participant                     : key               : participant ID
                card_no                         : categorical       : the card shown
                correct_card                    : categorical       : the card that should be clicked of the top four on screen      
                correct_persevering             : categorical       : the card that would be clicked if the participant is persevering
                seq_no                          : numeric           : trial number
                rule                            : categorical       : matching rule  
                card_shape                      : categorical       : current card shape
                card_num

In [8]:
ed.navon_summary_stats.head()

Unnamed: 0_level_0,participant,level_of_target,correct,correct,too_slow,too_slow,reaction_time_ms,reaction_time_ms
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std
0,100934.0,global,0.166667,0.389249,0.0,0.0,664.583333,221.98954
1,100934.0,local,0.461538,0.518875,0.0,0.0,597.769231,294.353856
2,100934.0,none,0.48,0.509902,0.0,0.0,622.96,343.110949
3,103322.0,global,0.923077,0.27735,0.0,0.0,959.769231,358.604739
4,103322.0,local,0.916667,0.288675,0.0,0.0,999.75,221.561453


In [9]:
ed.corsi_summary_stats.head()

Unnamed: 0_level_0,participant,highest_span,n_items,status,status
Unnamed: 0_level_1,Unnamed: 1_level_1,max,max,mean,std
0,100934.0,4,5,0.428571,0.534522
1,103322.0,6,7,0.625,0.517549
2,107700.0,6,7,0.625,0.517549
3,117200.0,4,5,0.5,0.547723
4,117306.0,5,6,0.571429,0.534522


In [10]:
ed.fitts_summary_stats.head()

Unnamed: 0_level_0,participant,delta,delta,status,status
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
0,100934.0,-5.7,186.60713,1.0,0.0
1,103322.0,-203.05,174.3084,1.0,0.0
2,107700.0,-342.65,345.637059,1.1,0.447214
3,117200.0,-85.05,156.117188,1.0,0.0
4,117306.0,-141.8,167.808036,1.0,0.0


In [11]:
ed.demographics.head()

Unnamed: 0,participant,age_a,gender_a,handedness_a,education_a,income_a,computer_hours_a,age_group,mean_reation_time_ms
0,816404.0,28.0,female,right,university,6.0,20.0,25-34,5502.0
1,221478.0,25.0,female,right,graduate school,7.0,60.0,18-24,12353.428571
2,192208.0,29.0,male,right,university,6.0,5.0,25-34,8302.285714
3,803102.0,47.0,male,right,university,7.0,74.0,45-54,10340.857143
4,844810.0,32.0,female,right,university,6.0,22.0,25-34,6612.428571


# Final Datasets

These curated datasets are now joined, pruned & transformed to produce the `final` set of datasets - on which we will conduct our anaylsis.


# Structure

#### wcst


In [12]:
ed.demographics.head()

Unnamed: 0,participant,age_a,gender_a,handedness_a,education_a,income_a,computer_hours_a,age_group,mean_reation_time_ms
0,816404.0,28.0,female,right,university,6.0,20.0,25-34,5502.0
1,221478.0,25.0,female,right,graduate school,7.0,60.0,18-24,12353.428571
2,192208.0,29.0,male,right,university,6.0,5.0,25-34,8302.285714
3,803102.0,47.0,male,right,university,7.0,74.0,45-54,10340.857143
4,844810.0,32.0,female,right,university,6.0,22.0,25-34,6612.428571


In [165]:
"""Describe the available data associated with the class"""
message = """

    ===========================================================================================================================
        WCST - Wisconsin Card Sorting Task                                                  DataFrame: ed.raw.wcst_date
    ---------------------------------------------------------------------------------------------------------------------------
    
        participant                     : key               : participant ID
        card_no                         : categorical       : the card shown
        correct_card                    : categorical       : the card that should be clicked of the top four on screen      
        correct_persevering             : categorical       : the card that would be clicked if the participant is persevering
        seq_no                          : numeric           : trial number
        rule                            : categorical       : matching rule  
        card_shape                      : categorical       : current card shape
        card_number                     : categorical       : current card number
        card_colour                     : categorical       : current card colour
        reaction_time_ms                : numeric           : reaction time (ms)
        status                          : categorical       : 1=correct, 2=wrong card, 3=too slow
        card_selected                   : categorical       : card chosen
        error                           : binary            : 1=error, 0=no error
        perseverance_error              : binary            : 1=perserverance error,       0=otherwise
        not_perseverance_error          : binary            : 1=not a perseveration error, 0=otherwise

    ---------------------------------------------------------------------------------------------------------------------------    
        Demographic                                                                         DataFrame: ed.demographics
    ---------------------------------------------------------------------------------------------------------------------------

        participant                     : key 
        age_a                           : numeric
        gender_a                        : categorical 
        handedness_a                    : categorical
        education_a                     : categorical
        income_a                        : categorical 
        computer_hours_a                : numeric
        age_group                       : categorical
        mean_reation_time_ms.           : numeric

    ---------------------------------------------------------------------------------------------------------------------------    
        N-Back                                                                              DataFrame: ed.nback_summary_stats
    ---------------------------------------------------------------------------------------------------------------------------

        participant                     : key 
        block_number                    : numeric       : trial block number 
        trial_counter   - count         : numeric       : number of trials in the block 
        score           - mean          : probability   : score of current trail (1=correct, 0=wrong)
                        - std           : probability       
        status          - mean          : probability   : whether the response given was a correct match (1=correct, 0=wrong)
                        - std           : probability   
        miss            - mean          : probability   : whether the response given was a miss (1=miss, 0=otherwise)
                        - std           : probability     
        false_alarm     - mean          : probability   : 1=participant clicked but there was no-match, 0=otherwise
                        - std           : probability    
        reaction_time_ms- mean          : numeric       
                        - std           : numeric  

    ---------------------------------------------------------------------------------------------------------------------------    
        Navon                                                                              DataFrame: ed.nback_summary_stats
    ---------------------------------------------------------------------------------------------------------------------------

        participant                     : key 
        level_of_target                 : categorical   : type of signal (global/local/none)
        correct         - mean          : probability   : correct action
                        - std           : probability
        too_slow        - mean          : probability   : acted too slow
                        - std           : probability
        reaction_time_ms- mean
                        - std

    ---------------------------------------------------------------------------------------------------------------------------    
        Corsi Block Span                                                                    DataFrame: ed.corsi_summary_stats
    ---------------------------------------------------------------------------------------------------------------------------

        participant                     : key 
        highest_span    - max           : numeric       : highest corsi block span
        n_items         - max           : numeric       : (max) number of items to be remembered
        status          - mean          : probability   : current trial (1=correct, 0=wrong)
                        - std           : probability 

    ---------------------------------------------------------------------------------------------------------------------------    
        Fitts Law                                                                          DataFrame: ed.fitts_summary_stats
    ---------------------------------------------------------------------------------------------------------------------------

        participant                     : key 
        delta           - mean          : numeric       : average deviation in expects (fitts law) performance
                        - std           : numeric       : std dev in expected (fitts law) performance
        status          - mean          : numeric       : status (1=correct, 2=error, 3=too slow)
                        - std           : numeric     
                        
    ===========================================================================================================================
"""
# print(message)


In [185]:
# ed.demographics.head()
# ed.nback_summary_stats.head()
# ed.navon_summary_stats.head()
# ed.corsi_summary_stats.head()
# ed.fitts_summary_stats.head()

Unnamed: 0_level_0,participant,delta,delta,status,status
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
0,100934.0,-5.7,186.60713,1.0,0.0
1,103322.0,-203.05,174.3084,1.0,0.0
2,107700.0,-342.65,345.637059,1.1,0.447214
3,117200.0,-85.05,156.117188,1.0,0.0
4,117306.0,-141.8,167.808036,1.0,0.0


In [152]:
# --- demographics dataset ---x
wcst = ed.raw.wcst_data[['participant', 'card_no', 'seq_no', 'rule', 'card_shape', 'card_number', 'card_colour', 'reaction_time_ms', 'status',
                                    'card_selected', 'error','perseverance_error', 'not_perseverance_error']]
wcst.set_index('participant').join(ed.demographics.set_index('participant'))


Unnamed: 0_level_0,card_no,seq_no,rule,card_shape,card_number,card_colour,reaction_time_ms,status,card_selected,error,perseverance_error,not_perseverance_error,age_a,gender_a,handedness_a,education_a,income_a,computer_hours_a,age_group,mean_reation_time_ms
participant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
100934.0,52,1,shape,triangle,1,yellow,4567,1,1,0,0,0,28.0,male,right,university,7.0,25.0,25-34,11453.571429
100934.0,59,2,shape,triangle,3,red,4661,1,3,0,0,0,28.0,male,right,university,7.0,25.0,25-34,11453.571429
100934.0,23,3,shape,cross,2,red,1319,1,2,0,0,0,28.0,male,right,university,7.0,25.0,25-34,11453.571429
100934.0,52,4,shape,triangle,1,yellow,2336,1,1,0,0,0,28.0,male,right,university,7.0,25.0,25-34,11453.571429
100934.0,52,5,shape,triangle,1,yellow,6634,1,1,0,0,0,28.0,male,right,university,7.0,25.0,25-34,11453.571429
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
998593.0,61,2,color,triangle,4,blue,2489,1,4,0,0,0,25.0,female,right,graduate school,8.0,72.0,18-24,9284.714286
998593.0,14,3,color,circle,4,green,2193,1,4,0,0,0,25.0,female,right,graduate school,8.0,72.0,18-24,9284.714286
998593.0,14,4,color,circle,4,green,2310,1,4,0,0,0,25.0,female,right,graduate school,8.0,72.0,18-24,9284.714286
998593.0,23,5,color,cross,2,red,1430,1,2,0,0,0,25.0,female,right,graduate school,8.0,72.0,18-24,9284.714286


In [169]:
# --- demographics dataset ---x
# ed.nback_summary_stats
# ed.fitts_summary_stats

In [61]:
# ---- Scatter Plots ----x

# --- data ---x
df1 = ed.raw.wcst_data[['participant', 'seq_no', 'card_no', 'rule', 'card_shape', 'card_number',
                        'card_colour', 'reaction_time_ms',
                        'status', 'card_selected', 'error','perseverance_error', 'not_perseverance_error']]
df2 = ed.demographics[[ 'participant', 'age_a','gender_a','handedness_a','education_a', 'income_a', 
                        'computer_hours_a','age_group']]
df  = df1.set_index('participant').join(df2.set_index('participant'))


# --- scatter plot ---x
trace_1 = go.Scatter(x=df.computer_hours_a, y=df.age_a, mode='markers', marker=dict(size=12, color='steelblue', symbol='pentagon', line={'width':2}))
data    = [trace_1]
layout  = go.Layout(title='title', xaxis={'title':'age'}, yaxis={'title':'computer hours'}, hovermode='closest')
fig     = go.Figure(data=data, layout=layout)
pyo.plot(fig, filename='temp-plot.html')

'temp-plot.html'

In [66]:
# ---- line plot ----x
trace_1 = go.Scatter(x=df.seq_no, y=df.gender_a, mode='markers', marker=dict(size=5, color='darkblue', symbol='circle'))
trace_2 = go.Scatter(x=df.seq_no, y=df.education_a, mode='markers', marker=dict(size=5, color='darkblue', symbol='circle'))

data    = [trace_1, trace_2]
layout  = go.Layout(title='R Time', xaxis={'title':'age'}, yaxis={'title':'computer hours'})
fig = go.Figure(data=data, layout=layout)
pyo.plot(fig)


'temp-plot.html'