# Bulk Data Processing
This notebook is for processing and saving data across multiple subjects in bulk.

# Setup
Run this first to import everything and set up notebook

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import logging
import os
import pickle

import numpy as np
from foraging import utils

import utils.beliefs
import utils.data

# Filter out annoying matplotlib logs
mlogger = logging.getLogger('matplotlib')
mlogger.setLevel(logging.WARNING)

EXPERIMENTS_DIR = '../data/experiments'
SAVE_DIR = '../data/analysis'
SEED = 42

# Load experiment data
Load matlab datafiles into a pandas Dataframe

In [2]:
df = utils.data.make_dataframe(EXPERIMENTS_DIR)
df = utils.data.exclusion_criteria(df)
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,session id,_session,schedule,box,push times,same-box push intervals,reward outcomes,reward intervals,box rank,normalized pushes,consecutive push intervals,push # by box,stay/switch
subject,session,block,push #,stimulus type,shape,kappa,week day,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
dylan,1,2,1,probability,1,0.1,M,20211206,20,15.0,1,8.465,8.465,False,30.614,0,0.564333,8.465,1,False
dylan,1,2,2,probability,1,0.1,M,20211206,20,15.0,1,9.468,1.003,False,11.345,0,0.066867,1.003,2,False
dylan,1,2,3,probability,1,0.1,M,20211206,20,15.0,1,11.667,2.199,False,16.31,0,0.1466,2.199,3,False
dylan,1,2,4,probability,1,0.1,M,20211206,20,21.0,2,17.125,17.125,False,4.837,1,0.815476,5.458,1,True
dylan,1,2,5,probability,1,0.1,M,20211206,20,21.0,2,19.904,2.779,False,13.391,1,0.132333,2.779,2,False


# Compute beliefs without color
Compute the posterior for all blocks where the color cue was uninformative

In [3]:
supp = np.arange(1, 30)
df_nocol = utils.data.filter_df(df, {'kappa': 0})
posteriors, err_beliefs = utils.data.process_blocks(df_nocol, utils.beliefs.compute_posteriors, supp, use_tqdm=True)

100%|██████████| 26/26 [00:00<00:00, 113.46it/s]


In [4]:
func = lambda df, index: utils.beliefs.compute_latent_beliefs_over_time(df, index, posteriors[index], dt=0.5,
                                                                        padding_time=0.5)
latent_beliefs_time, err_latent_time = utils.data.process_blocks(df_nocol, func, use_tqdm=True)

100%|██████████| 26/26 [00:00<00:00, 531.53it/s]


In [5]:
func = lambda df, index: utils.beliefs.compute_joint_beliefs(df, index, posteriors[index])
joint_beliefs, err_joint = utils.data.process_blocks(df_nocol, func, use_tqdm=True)

100%|██████████| 26/26 [00:01<00:00, 24.95it/s]


In [6]:
func = lambda df, index: utils.beliefs.compute_reward_beliefs(df, index, posteriors[index])
reward_beliefs, err_reward = utils.data.process_blocks(df_nocol, func, use_tqdm=True)

100%|██████████| 26/26 [00:01<00:00, 22.19it/s]



## Perfect model
Compute the exact reward probabilities under a perfect model of the boxes (one where the schedules are known exactly)

In [7]:
reward_probabilities, err_reward_probabilities = utils.data.process_blocks(df_nocol,
                                                                           utils.beliefs.compute_reward_probabilities,
                                                                           use_tqdm=True)

100%|██████████| 26/26 [00:00<00:00, 50.98it/s]


# Save data

In [8]:
with open(os.path.join(SAVE_DIR, 'bulk_beliefs.pkl'), 'wb') as f:
    pickle.dump({
        'data': {
            'posteriors': posteriors,
            'latent_beliefs_over_time': latent_beliefs_time,
            'joint_beliefs': joint_beliefs,
            'reward_beliefs': reward_beliefs,
            'reward probabilities': reward_probabilities
        },
        'error': {
            'err_schedule_beliefs': err_beliefs,
            'err_joint_beliefs': err_joint,
            'err_reward_beliefs': err_reward,
            'err_reward probabilities': err_reward_probabilities,
        },
        'schedule candidates': supp,
        'dt': 0.5,
        'padding_time': 0.5
    }, f)