# Binarization, Median-splits, Z-norming
`4.trial_bin_split_znorm`

Binarize, apply median splits, mean-centering, and other secondary trial-level variant variables.

### Setup: Modules, Directories, & Data Import

In [None]:
from pathlib import Path
from datetime import datetime

import pandas as pd
import numpy as np

In [None]:
date = datetime.today().strftime('%y%m%d')

In [None]:
from config import derivatives_dir
gaze_dir = derivatives_dir / '03.gaze-import'
bins_dir = derivatives_dir / '04.binarized'
if not Path.exists(bins_dir): Path.mkdir(bins_dir)

In [None]:
file = gaze_dir / ('econdec-full_task-all_eye_' + date + '.csv')
data = pd.read_csv(file)

In [None]:
columns=[
    'study','subjnum','block','trial','domain',#'binary-domain',
    'estimation','trueprob',
    #'val-estdiff','val-estdiff-valid','val-estdiff-valid-mc-12','val-estdiff-valid-mc-123',
    #'abs-estdiff','abs-estdiff-valid','abs-estdiff-valid-mc-12','abs-estdiff-valid-mc-123',
    'choicert',#'choicert-3sd-12','esttaskrt-3sd-123','choicert-mc-12','choicert-mc-123',
    'esttaskrt',#'esttaskrt-3sd-12','esttaskrt-3sd-123','esttaskrt-mc-12','esttaskrt-mc-123',
    'outcomert',#'outcomert-3sd-12','outcomert-3sd-123','outcomert-mc-12','esttaskrt-mc-123',
    'stockchosen',#'waschoiceoptimal','optimalchoicewas',
    'stockvalue',#'abs-stockvalue','bin-abs-stockvalue',
    #'bin-abs-stockvalue-mc-12','bin-abs-stockvalue-mc-123','b4choiceprob',
    'stockpic','bondpic','facepic','bondmem','stockmem','facemem',
    #'facemem-clean-reverse','facemem-mc-123','facemem-split-123','facemem-median-123',
    #'choice-split-12','choice-split-123','outcome-split-12','outcome-split-123',
    #'choice-median-12','choice-median-123','outcome-median-12','outcome-median-123'
    'oscillations','dwell-time','osc-rate'
]

In [None]:
data = data[columns]

# Relatively simple measures

Validate and calculate Valenced & Absolute Estimated Difference:

In [None]:
data['val-estdiff-valid'] = (pd.to_numeric(data.loc[:,'estimation'], errors = 'coerce') / 100) - (pd.to_numeric(data.loc[:,'trueprob']))
data['abs-estdiff-valid'] = abs(data['val-estdiff-valid'])

Turn `trueprob` into `trueprob-good` and `trueprob-bad` based on study label

In [None]:
def clean_trueprob(row):
    study_label = row['study']
    true_prob = row['trueprob']
    
    if study_label == 2:
        return (1 - true_prob)
    else:
        return(true_prob)

In [None]:
data['trueprob-good'] = data.apply(clean_trueprob, axis=1)
data['trueprob-bad'] = 1 - data['trueprob-good']
data.drop(columns='trueprob', inplace=True)

## Calculate Optimal Choice

## Median Splits

### ChoiceRT

#### choicert-median-12

In [None]:
data[data['study'] != 3]['choicert'].median()

#### choicert-median-123

In [None]:
data.groupby(['study']).median()['choicert']

### OutcomeRT

#### outcomert-median-12

In [None]:
data[data['study'] != 3]['outcomert'].median()

#### outcomert-median-123

In [None]:
data.groupby(['study']).median()['outcomert']

### FaceMem

#### clean and reverse-scale

In [None]:
data['facemem-clean-reverse'] = (pd.to_numeric(data['facemem'], errors='coerce') * -1) + 5

#### median-split not necessary. Mean-center?

In [None]:
data[data['study'] != 3]['facemem-clean-reverse'].median()

In [None]:
data.groupby(['study']).median()['facemem-clean-reverse']

### Output

In [None]:
data.sample(11)

In [None]:
file = bins_dir / ('econdec-all_task-all.' + date + 'csv')
data.to_csv(file, index=False)