# Binarization, Median-splits, Z-norming
`4.trial_bin_split_znorm`

Binarize, apply median splits, mean-centering, and other secondary trial-level variant variables.

### Setup: Modules, Directories, & Data Import

In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
gaze_dir = os.path.join('..','derivatives','03.gaze-import')
bins_dir = os.path.join('..','derivatives','04.binarized')
try: os.mkdir(bins_dir)
except OSError as e:
    print(e)

[WinError 183] Cannot create a file when that file already exists: '..\\derivatives\\04.binarized'


In [3]:
file = os.path.join(gaze_dir,'econdec-full_task-all_eye.csv')
data = pd.read_csv(file)[[
    'study','subjnum','block','trial','domain',
    'estimation','trueprob','choicert','esttaskrt','outcomert',
    'stockchosen','stockvalue','stockpic','bondpic','facepic',
    'bondmem','stockmem','facemem','oscillations','dwell-time','osc-rate'
]]

## Calculate/Transform
* 'trueprob-norm'
* 'val-estdiff',
* 'abs-estdiff',
* 'b4choiceprob',
* 'optimalchoicewas',
* 'abs-stockvalue',
* 'facemem-clean-reverse','val-estdiff-valid',

In [4]:
def normalize_framing(study,trueprob):
    if study == 2: trueprob_norm = 1 - trueprob
    else: trueprob_norm = trueprob
    return(trueprob_norm)
data['trueprob-norm'] = np.vectorize(normalize_framing)(data['study'],data['trueprob'])

In [5]:
data.estimation = data.estimation.str.strip('\"\'\\|[]{};:,<.>/?!@#$%^&*()_+=-`~AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz')
data.estimation = data.estimation.astype(float).astype(int)
data['val-estdiff'] = (data['estimation']/100) - data['trueprob-norm']
data['abs-estdiff'] = abs(data['val-estdiff'])

In [6]:
data['b4choiceprob'] = data['trueprob-norm']

In [7]:
def optimal_choice(b4choiceprob,stockchosen):
    if b4choiceprob == 0.5: return(None)
    else: return( int(stockchosen == int(b4choiceprob > 0.5)) )

np.vectorize(optimal_choice)(data['b4choiceprob'],data['stockchosen'])

TypeError: int() argument must be a string, a bytes-like object or a number, not 'NoneType'

In [182]:
for i in range(len(data[['trueprob-norm','stockchosen']])):
    
    if data['block'][i] == data['block'][i-1] or data['trial'][i] == 1:
        b4choiceprob = 0.5
    else:
        b4choiceprob = data['trueprob-norm'][i-1]
        
    stockchosen = data['stockchosen'][i]
    
    if b4choiceprob == 0.5: print(None)
    else: print( int(stockchosen == int(b4choiceprob > 0.5)) )

KeyError: -1

## Exclude
* Invalid:
    * 'abs-estdiff-valid',

* 3SD outliers:
    * 'choicert-3sd-12',
    * 'esttaskrt-3sd-123',
    * 'esttaskrt-3sd-12',
    * 'esttaskrt-3sd-123',
    * 'outcomert-3sd-12',
    * 'outcomert-3sd-123',

## Binarize
- 'binary-domain',
- 'bin-abs-stockvalue',

In [52]:
data['binary-domain'] = np.vectorize(lambda domain: 1 if domain == 'GAIN' else -1)(data['domain'])

## Z-normalize:
* 'val-estdiff-valid-mc-12',
* 'val-estdiff-valid-mc-123',
* 'abs-estdiff-valid-mc-12',
* 'abs-estdiff-valid-mc-123',
* 'choicert-mc-12',
* 'choicert-mc-123',
* 'esttaskrt-mc-12',
* 'esttaskrt-mc-123',
* 'outcomert-mc-12',
* 'outcomert-mc-123',
* 'bin-abs-stockvalue-mc-12',
* 'bin-abs-stockvalue-mc-123',
* 'facemem-mc-123',

## Median Splits
* 'facemem-split-123',
* 'choice-split-12',
* 'choice-split-123',
* 'outcome-split-12',
* 'outcome-split-123',

### ChoiceRT

#### choicert-median-12

In [None]:
data[data['study'] != 3]['choicert'].median()

#### choicert-median-123

In [None]:
data.groupby(['study']).median()['choicert']

### OutcomeRT

#### outcomert-median-12

In [None]:
data[data['study'] != 3]['outcomert'].median()

#### outcomert-median-123

In [None]:
data.groupby(['study']).median()['outcomert']

### FaceMem

#### clean and reverse-scale

In [None]:
data['facemem-clean-reverse'] = (pd.to_numeric(data['facemem'], errors='coerce') * -1) + 5

#### median-split not necessary. Mean-center?

In [None]:
data[data['study'] != 3]['facemem-clean-reverse'].median()

In [None]:
data.groupby(['study']).median()['facemem-clean-reverse']

# IGNORE?:
* 'facemem-median-123',
* 'choice-median-12',
* 'choice-median-123',
* 'outcome-median-12',
* 'outcome-median-123'

### Output

In [None]:
file = os.path.join(bins_dir,'econdec-all_task-all.csv')
data.to_csv(file, index=False)