# Model Free Analysis

Statistical inquiry into the aggregate behaviour of the *Wisconsin Sorting* & *NBack* Tasks.


---------
```
Zach Wolpe
zachcolinwolpe@gmail.com
17 July 2021
```
---------



# Executive Functions

The additional experiments are provided to gauge executive functions and computer literacy that may distinguish candidates when participating in the WCST & NBack Tasks.

In [11]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pickle
import os
import re
import sys
sys.path.append('../process data/')
import plotly.graph_objs as go
import plotly.offline as pyo
import plotly.express as px
from encode_processed_data import encode_data

In [12]:
# with open('../data objects/batch_processing_object.pkl', 'rb') as file2:
#     bp = pickle.load(file2)

# ---- fetch data object ----x
with open('../data objects/batch_processing_object_with_encodings.pkl', 'rb') as file2:
    ed = pickle.load(file2)

   
ed.__dict__.keys()

dict_keys(['raw', 'fitts_summary_stats', 'corsi_summary_stats', 'navon_summary_stats', 'nback_summary_stats', 'demographics_plot', 'demographics'])

In [13]:
ed.describe_data()



        ------------------------------------------------------------------
            self.path            : raw data loc
            self.metadata        : mturk metadata
            self.mapping         : reference table
            self.data_times      : reference times table
            self.participants    : list of participant identifiers
            self.parti_code      : list of participant codes
            self.n               : total number of samples
            self.wcst_paths      : paths to wcst  raw data
            self.nback_paths     : paths to nback raw data
            self.corsi_paths     : paths to corsi raw data
            self.fitts_paths     : paths to fitts raw data
            self.navon_paths     : paths to navon raw data
            self.wcst_data       : wcst  dataframe
            self.nback_data      : nback dataframe
            self.corsi_data      : corsi dataframe
            self.fitts_data      : fitts dataframe
            self.navon_data    

In [14]:
ed.clean_data_info()



                WCST - Wisconsin Card Sorting Task                                                  DataFrame: ed.raw.wcst_date
            ---------------------------------------------------------------------------------------------------------------------------
            
                participant                     : key               : participant ID
                card_no                         : categorical       : the card shown
                correct_card                    : categorical       : the card that should be clicked of the top four on screen      
                correct_persevering             : categorical       : the card that would be clicked if the participant is persevering
                seq_no                          : numeric           : trial number
                rule                            : categorical       : matching rule  
                card_shape                      : categorical       : current card shape
                card_num

In [15]:
# ----- all categories descriptors -----x
cats_demographics   = ['gender_a', 'handedness_a', 'education_a', 'age_group']
cats_navon          = [('level_of_target', '')]

# ---- add numerical descriptors ----x
num_demographics = ['age_a','income_a', 'computer_hours_a', 'mean_reation_time_ms']
num_nback = [('block_number', ''), ('score', 'mean'), ('score', 'std'), ('status', 'mean'), ('status', 'std'), ('miss', 'mean'), 
            ('miss', 'std'), ('false_alarm', 'mean'), ('false_alarm', 'std'), ('reaction_time_ms', 'mean'), ('reaction_time_ms', 'std')]
num_navon = [('correct', 'mean'), ('correct',  'std'), ('too_slow', 'mean'), ('too_slow',  'std'), 
            ('reaction_time_ms', 'mean'), ('reaction_time_ms',  'std')]
num_corsi = [('highest_span',  'max'), ('status', 'mean'), ('status',  'std')]
num_fitts = [('delta', 'mean'), ('delta',  'std'), ('status', 'mean')]


# ---- user selected menus ---x

array([  0.,  10.,  20.,  30.,  40.,  50.,  60.,  70.,  80.,  90., 100.])

In [99]:
# ---- base plots ----x

# x < 25
# 25 <= x < 50
# 50 <= x < 75
# 75 <= x 

# df.iloc[df.index <25, :]

# ---- add trial number ----x
xx = []; df = ed.raw.wcst_data
[xx.append((i%100)+1) for i in range(df.shape[0])]
df['trial_no'] = xx 

# ---- status==1 --> correct


n_groups=4; n_groups=10
t = np.linspace(0,100,num=n_groups+1).tolist(); c=0

for tt in t[1:]:
    c +=1
    x = df.loc[df['trial_no'] < tt,].groupby(['participant', 'status']).agg({
    'participant':              ['count'],
    'reaction_time_ms':         ['mean', 'std'],
    'perseverance_error':       ['mean'],
    'not_perseverance_error':   ['mean']
    }).reset_index()
    x['percentages'] = x[('participant', 'count')]/tt
    x['trials'] = str(round(t[c-1])) + '-' + str(round(t[c]))
    if c==1:    data=x
    else:       data=data.append(other=x)



Unnamed: 0_level_0,participant,status,participant,reaction_time_ms,reaction_time_ms,perseverance_error,not_perseverance_error,percentages,trials
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,mean,mean,Unnamed: 8_level_1,Unnamed: 9_level_1
0,100934.0,1,21,2435.857143,1537.558951,0.0,0.0,0.84,0-25
1,100934.0,2,3,1512.0,129.630243,0.0,1.0,0.12,0-25
2,103322.0,1,20,1599.5,716.376518,0.0,0.0,0.8,0-25
3,103322.0,2,4,2821.25,3216.095083,0.0,1.0,0.16,0-25
4,107700.0,1,18,1506.111111,414.343405,0.0,0.0,0.72,0-25
5,107700.0,2,6,2084.666667,1157.700076,0.0,1.0,0.24,0-25
6,117200.0,1,16,1210.625,340.999682,0.0,0.0,0.64,0-25
7,117200.0,2,8,1460.0,447.697602,0.0,1.0,0.32,0-25
8,117306.0,1,21,2164.380952,1272.244885,0.0,0.0,0.84,0-25
9,117306.0,2,3,1896.0,426.240542,0.0,1.0,0.12,0-25


25

In [84]:
data

NameError: name 'data' is not defined

In [118]:
x

Unnamed: 0_level_0,participant,status,status,reaction_time_ms,reaction_time_ms,perseverance_error,perseverance_error,not_perseverance_error,not_perseverance_error
Unnamed: 0_level_1,count,count,std,mean,std,mean,std,mean,std
participant,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
100934.0,3,3,0.0,1512.0,129.630243,0,0.0,1,0.0
103322.0,4,4,0.0,2821.25,3216.095083,0,0.0,1,0.0
107700.0,6,6,0.0,2084.666667,1157.700076,0,0.0,1,0.0
117200.0,8,8,0.0,1460.0,447.697602,0,0.0,1,0.0
117306.0,3,3,0.0,1896.0,426.240542,0,0.0,1,0.0
120307.0,15,15,0.0,2162.666667,1492.494062,0,0.0,1,0.0
122240.0,3,3,0.0,1666.333333,789.652033,0,0.0,1,0.0
127180.0,11,11,0.0,3076.181818,1670.831878,0,0.0,1,0.0
130202.0,7,7,0.0,1976.714286,1196.215381,0,0.0,1,0.0
130501.0,12,12,0.0,2757.75,2447.820445,0,0.0,1,0.0


# Final Datasets

These curated datasets are now joined, pruned & transformed to produce the `final` set of datasets - on which we will conduct our anaylsis.


# Structure

#### wcst


In [12]:
ed.demographics.head()

Unnamed: 0,participant,age_a,gender_a,handedness_a,education_a,income_a,computer_hours_a,age_group,mean_reation_time_ms
0,816404.0,28.0,female,right,university,6.0,20.0,25-34,5502.0
1,221478.0,25.0,female,right,graduate school,7.0,60.0,18-24,12353.428571
2,192208.0,29.0,male,right,university,6.0,5.0,25-34,8302.285714
3,803102.0,47.0,male,right,university,7.0,74.0,45-54,10340.857143
4,844810.0,32.0,female,right,university,6.0,22.0,25-34,6612.428571


In [185]:
# ed.demographics.head()
# ed.nback_summary_stats.head()
# ed.navon_summary_stats.head()
# ed.corsi_summary_stats.head()
# ed.fitts_summary_stats.head()

Unnamed: 0_level_0,participant,delta,delta,status,status
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
0,100934.0,-5.7,186.60713,1.0,0.0
1,103322.0,-203.05,174.3084,1.0,0.0
2,107700.0,-342.65,345.637059,1.1,0.447214
3,117200.0,-85.05,156.117188,1.0,0.0
4,117306.0,-141.8,167.808036,1.0,0.0


In [152]:
# --- demographics dataset ---x
wcst = ed.raw.wcst_data[['participant', 'card_no', 'seq_no', 'rule', 'card_shape', 'card_number', 'card_colour', 'reaction_time_ms', 'status',
                                    'card_selected', 'error','perseverance_error', 'not_perseverance_error']]
wcst.set_index('participant').join(ed.demographics.set_index('participant'))


Unnamed: 0_level_0,card_no,seq_no,rule,card_shape,card_number,card_colour,reaction_time_ms,status,card_selected,error,perseverance_error,not_perseverance_error,age_a,gender_a,handedness_a,education_a,income_a,computer_hours_a,age_group,mean_reation_time_ms
participant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
100934.0,52,1,shape,triangle,1,yellow,4567,1,1,0,0,0,28.0,male,right,university,7.0,25.0,25-34,11453.571429
100934.0,59,2,shape,triangle,3,red,4661,1,3,0,0,0,28.0,male,right,university,7.0,25.0,25-34,11453.571429
100934.0,23,3,shape,cross,2,red,1319,1,2,0,0,0,28.0,male,right,university,7.0,25.0,25-34,11453.571429
100934.0,52,4,shape,triangle,1,yellow,2336,1,1,0,0,0,28.0,male,right,university,7.0,25.0,25-34,11453.571429
100934.0,52,5,shape,triangle,1,yellow,6634,1,1,0,0,0,28.0,male,right,university,7.0,25.0,25-34,11453.571429
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
998593.0,61,2,color,triangle,4,blue,2489,1,4,0,0,0,25.0,female,right,graduate school,8.0,72.0,18-24,9284.714286
998593.0,14,3,color,circle,4,green,2193,1,4,0,0,0,25.0,female,right,graduate school,8.0,72.0,18-24,9284.714286
998593.0,14,4,color,circle,4,green,2310,1,4,0,0,0,25.0,female,right,graduate school,8.0,72.0,18-24,9284.714286
998593.0,23,5,color,cross,2,red,1430,1,2,0,0,0,25.0,female,right,graduate school,8.0,72.0,18-24,9284.714286


In [169]:
# --- demographics dataset ---x
# ed.nback_summary_stats
# ed.fitts_summary_stats

In [61]:
# ---- Scatter Plots ----x

# --- data ---x
df1 = ed.raw.wcst_data[['participant', 'seq_no', 'card_no', 'rule', 'card_shape', 'card_number',
                        'card_colour', 'reaction_time_ms',
                        'status', 'card_selected', 'error','perseverance_error', 'not_perseverance_error']]
df2 = ed.demographics[[ 'participant', 'age_a','gender_a','handedness_a','education_a', 'income_a', 
                        'computer_hours_a','age_group']]
df  = df1.set_index('participant').join(df2.set_index('participant'))


# --- scatter plot ---x
trace_1 = go.Scatter(x=df.computer_hours_a, y=df.age_a, mode='markers', marker=dict(size=12, color='steelblue', symbol='pentagon', line={'width':2}))
data    = [trace_1]
layout  = go.Layout(title='title', xaxis={'title':'age'}, yaxis={'title':'computer hours'}, hovermode='closest')
fig     = go.Figure(data=data, layout=layout)
pyo.plot(fig, filename='temp-plot.html')

'temp-plot.html'

In [66]:
# ---- line plot ----x
trace_1 = go.Scatter(x=df.seq_no, y=df.gender_a, mode='markers', marker=dict(size=5, color='darkblue', symbol='circle'))
trace_2 = go.Scatter(x=df.seq_no, y=df.education_a, mode='markers', marker=dict(size=5, color='darkblue', symbol='circle'))

data    = [trace_1, trace_2]
layout  = go.Layout(title='R Time', xaxis={'title':'age'}, yaxis={'title':'computer hours'})
fig = go.Figure(data=data, layout=layout)
pyo.plot(fig)


'temp-plot.html'