# Process the WCST data

Transform to a suitable format for final processing.

----
```
author:     Zach Wolpe
email:      zachcolinwolpe@gmail.com
date:       21 January 2022
```
----

In [10]:
import sys
sys.path.append('../process data')
from dependencies import *
# import nbformatx

# Encode Choices

In [81]:
# encode choice cards --------*
choice_cards = pd.DataFrame({'card':[1,2,3,4], 'colour':['red','green','blue','yellow'], 'shape':['circle', 'triangle', 'cross', 'star'], 'number':[1,2,3,4]})
choice_cards

# encode choice cards --------*
choice_cards_2 = pd.DataFrame({'card':[1,2,3,4], 'color':['red','green','blue','yellow'], 'shape':['circle', 'triangle', 'cross', 'star'], 'number':[1,2,3,4]})
choice_cards_2

Unnamed: 0,card,color,shape,number
0,1,red,circle,1
1,2,green,triangle,2
2,3,blue,cross,3
3,4,yellow,star,4


# Correct WCST

- Account for `no rule match`
- rebalance choice data

In [82]:
# create correct sequence ------------------------------------------------------------------------------------------------------------------*
seq_correct = ['shape','number','color','shape','number','color','number','shape','color','shape','number','shape','color','shape','color']
seq_no      = [10,9,8,8,7,5,6,7,5,5,7,6,5,6,6]
seq_final   = []
for s,n in zip(seq_correct, seq_no): seq_final.append(n*[s])
seq_final   = [item for sublist in seq_final for item in sublist]
# create correct sequence ------------------------------------------------------------------------------------------------------------------*

In [83]:
# load data -----*
loc         = '/Users/zachwolpe/Documents/Production/Dynocog/Python Implementation/final instance/model-free analysis/final_dataframes'
wcst_raw    = pd.read_pickle(loc + '/wcst_raw_data.pkl')
wcst_raw    = wcst_raw[['participant', 'rule', 'card_shape', 'card_number', 'card_colour', 'status','correct_card','card_selected']]
wcst_raw    = wcst_raw[['participant', 'status', 'rule', 'card_shape', 'card_number', 'card_colour', 'card_selected']] # correct_card

In [84]:
# compute pseudo rule used
matching_rule = []
for idx in tqdm(wcst_raw.index):

    sub = wcst_raw.loc[idx,]

    # extract info about card selected
    card_spec = choice_cards.loc[choice_cards.card==sub['card_selected'],]
    col = card_spec['colour'].values[0]
    shp = card_spec['shape'].values[0]
    nmb = card_spec['number'].values[0]

    if sub.status == 1: matching_rule.append('cxx')
    
    else:
        # find matching rule
        if sub.card_shape==shp or sub.card_number==nmb or sub.card_colour==col:
            if sub['card_shape']    == shp: matching_rule.append('shape')
            if sub['card_number']   == nmb: matching_rule.append('number')
            if sub['card_colour']   == col: matching_rule.append('color')

        else:
            # NO MATCH! 
            matching_rule.append('no rule')

wcst_raw['matching_rule'] = matching_rule


100%|██████████| 27400/27400 [00:07<00:00, 3844.68it/s]


In [85]:
# encode action space --------------------------------------------------------------------------------------------------------*
wcst_raw['rule_correct']    = len(wcst_raw.participant.unique())*seq_final # correct encoding
wcst_raw['rule_used']       = None
rules                       = ['color', 'shape', 'number']


rules_used = []
for idx in tqdm(wcst_raw.index):
    sub = wcst_raw.loc[idx,]
    
    # rule change required
    if sub.matching_rule == sub.rule_correct:

        if sub.matching_rule != 'no rule':
            # random sample
            sub_rules = [i for i in rules if i != sub.rule_correct]
            rules_used.append(np.random.choice(sub_rules))
        else:
            # no rule applied!
            rules_used.append(None)
    
    # no change!
    else:
        rules_used.append(sub.matching_rule)


# status==2: random sample
wcst_raw['rule_used'] = rules_used

# status==1: correct rule 
wcst_raw.loc[wcst_raw.status==1, 'rule_used'] = wcst_raw.loc[wcst_raw.status==1, 'rule_correct'] 

# status==3: too slow
wcst_raw.loc[wcst_raw.status==3, 'rule_used'] = 'time out'
# encode action space --------------------------------------------------------------------------------------------------------*


100%|██████████| 27400/27400 [00:02<00:00, 12181.19it/s]


In [93]:
wcst_raw.rule_used.unique()

array(['shape', 'color', 'number', 'time out', 'no rule'], dtype=object)

In [66]:
# DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED 
# Exact Correct Card: STORED ------------------------------*
# given the 'rule' locate the correct card.
# correct_card = []
# for idx in tqdm(wcst_raw.index):

#     sub     = wcst_raw.loc[idx,]
#     rule    = sub['rule']

#     if rule == 'color':     ix = 'card_colour'
#     if rule == 'shape':     ix = 'card_shape'
#     if rule == 'number':    ix = 'card_number'

#     card_indx = 1
#     for r in choice_cards_2[rule]:

#         # if match
#         if r == sub[ix]: correct_card.append(card_indx)
#         card_indx += 1


# wcst_raw['correct_card_indx'] = correct_card
# wcst_raw
# DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED 

In [65]:
# DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED 
##CORRECT MAPPING!
# numbs           = [10,9,8,8,7,5,6,7,5,5,7,6,5,6,6]
# rules           = ['number', 'shape', 'number', 'number', 'shape', 'number', 'shape', 'number', 'number', 'number', 'shape', 'number', 'number', 'number', 'number']
# altered_map     = ['number', 'shape', 'number', 'color', 'shape', 'color', 'shape', 'number', 'color', 'number', 'shape', 'number', 'number', 'number', 'color']
# correct_rules   = []
# adj_corr_rules  = []
# for n,r,adj in zip(numbs, rules, altered_map): 
#     correct_rules.append(n*[r])
#     adj_corr_rules.append(n*[adj])

# # flatten
# correct_rules   = [i for sublist in correct_rules  for i in sublist]
# adj_corr_rules  = [i for sublist in adj_corr_rules for i in sublist]
# # correct_rules
# adj_corr_rules
# DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED 

In [94]:
# Rt (correct action) --------* 
wcst_raw['reward'] = [int(i) for i in wcst_raw.status == 1]

# encode actions -------------*
encoding =  {'shape':1, 'number':2, 'color':3, 'na':0}
wcst_raw['action'] = None
for k in encoding.keys(): wcst_raw['action'][wcst_raw.rule_used==k] = encoding[k]

# # check ----------------------*
wcst = wcst_raw[['participant', 'reward', 'status', 'action', 'rule', 'rule_correct', 'rule_used']]
wcst['n_t'] = wcst_raw[['participant']].reset_index()['index'] % 100

Unnamed: 0,participant,reward,status,action,rule,rule_correct,rule_used,n_t
0,816404.0,1,1,1,shape,shape,shape,0
1,816404.0,1,1,1,shape,shape,shape,1
2,816404.0,1,1,1,shape,shape,shape,2
3,816404.0,1,1,1,shape,shape,shape,3
4,816404.0,1,1,1,shape,shape,shape,4
...,...,...,...,...,...,...,...,...
27395,684712.0,1,1,3,color,color,color,95
27396,684712.0,1,1,3,color,color,color,96
27397,684712.0,1,1,3,color,color,color,97
27398,684712.0,1,1,3,color,color,color,98


`note`: `matching_rule` has changed to `rule_used`, which may effect downstream analysis.

In [97]:
# save -------------------------------------*
loc = '/Users/zachwolpe/Documents/Production/Dynocog/Python Implementation/final instance/model-free analysis/final_dataframes/'
wcst.to_pickle(loc + 'wcst.pkl')
wcst.to_csv(loc + 'wcst.csv')
# save -------------------------------------*