In [1]:
# ready for use 061518
# Analysis Step 3

import numpy as np
import pandas as pd
import pickle
import feather
import fnmatch
import os
import re

In [2]:
def load_obj(name ):
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [3]:
def save_obj(obj, name ):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

In [4]:
import csv

def write_list_to_file(missing_list, missing_filename):
    """Write the list to csv file."""

    with open(missing_filename, "w") as outfile:
        for entries in missing_list:
            entries = str(entries)
            outfile.write(entries)
            outfile.write("\n")

In [7]:
sub_list = [33]
block_list = [1, 2, 3, 4, 5]

In [8]:

for sub in sub_list:
    data_file = "eye_data_prepared_sub%d" % sub
    mydata = load_obj(data_file)
    for b in block_list:
        block_file = "eye_data_sub%d_block%d" % (sub, b)
        block_data = mydata[block_file]['all_trial_data']
        missing_trials = []
        
        for k in block_data:
            trial_data = block_data.get(k)
            pupil = trial_data['LEFT_PUPIL_SIZE']
            pupil = pd.to_numeric(pupil, errors='coerce')
            total_nans = pupil.isnull().sum() # counting missing values
            trial_length = len(pupil) # counting total number of samples
            percent = total_nans/trial_length
            missing_trials.append(k)
            missing_trials.append(percent)
            if percent <= .40:
                # need to exclude nans in beginning (and end?)
                pupil_int = pupil.interpolate(limit_direction = 'both')  # linear interpolation of missing values of pupil diameter, allows for interpolation of NaNs at beginning and end
                pupil_int_df = pupil_int.to_frame()
                filename = "pupil_int_%s.feather" % (k)
                feather.write_dataframe(pupil_int_df, filename) # for reading: df = feather.read_dataframe(path)
        missing_filename = "missing_trials_sub%d_block%d.csv" % (sub, b)
        write_list_to_file(missing_trials, missing_filename)

In [441]:
# use R scripts to call hampel function on feather files

In [9]:
# for baseline
for sub in sub_list:
    data_file = "eye_data_prepared_sub%d" % sub
    mydata = load_obj(data_file)
    for b in block_list:
        block_file = "eye_data_sub%d_block%d" % (sub, b)
        first_baseline_data = mydata[block_file]['first_baseline_data']
        missing_trials = []
        pupil = first_baseline_data['LEFT_PUPIL_SIZE']
        pupil = pd.to_numeric(pupil, errors='coerce')
        total_nans = pupil.isnull().sum() # counting missing values
        baseline_length = len(pupil) # counting total number of samples
        percent = total_nans/baseline_length
        k = "baseline_first_trial_sub%d_block%d" % (sub, b)
        missing_trials.append(k)
        missing_trials.append(percent)
        if percent <= .40:
            pupil_int = pupil.interpolate(limit_direction = 'both')  # linear interpolation of missing values of pupil diameter, allows for interpolation of NaNs at beginning and end
            pupil_int_df = pupil_int.to_frame()
            filename = "pupil_int_first_baseline_sub%d_block%d.feather" % (sub, b)
            feather.write_dataframe(pupil_int_df, filename) # for reading: df = feather.read_dataframe(path) 
        trials_baseline_data = mydata[block_file]['trials_baseline_data']
        for k in trials_baseline_data:
            baseline_data = trials_baseline_data.get(k)
            pupil = baseline_data['LEFT_PUPIL_SIZE']
            pupil = pd.to_numeric(pupil, errors='coerce')
            total_nans = pupil.isnull().sum() # counting missing values
            baseline_length = len(pupil) # counting total number of samples
            percent = total_nans/baseline_length
            missing_trials.append(k)
            missing_trials.append(percent)
            if percent <= .40:
                pupil_int = pupil.interpolate(limit_direction = 'both')  # linear interpolation of missing values of pupil diameter, allows for interpolation of NaNs at beginning and end
                pupil_int_df = pupil_int.to_frame()
                filename = "pupil_int_%s.feather" % (k)
                feather.write_dataframe(pupil_int_df, filename) # for reading: df = feather.read_dataframe(path)
        missing_filename = "missing_baseline_trials_sub%d_block%d.csv" % (sub, b)
        write_list_to_file(missing_trials, missing_filename)

In [440]:
# use R scripts to call hampel function on baseline feather files

In [6]:
# read hampel files back into python

for sub in sub_list:
    data_dict = {}
    for block in block_list:
        trial_file_list = []
        base_file_list = []
        hamp_baseline_file = 'hampel_pupil_int_baseline_trial*sub%d_block%d.feather' % (sub, block)
        hamp_trial_file = 'hampel_pupil_int_trial*sub%d_block%d.feather' % (sub, block)
        hamp_first_base_file = 'hampel_pupil_int_first_baseline_sub%d_block%d.feather' % (sub, block)
        for file in os.listdir('.'):
            if fnmatch.fnmatch(file, hamp_baseline_file):
                base_file_list.append(file) # excludes first baseline
        for file in os.listdir('.'):
            if fnmatch.fnmatch(file, hamp_trial_file):
                trial_file_list.append(file) # contains all 15 trials
        dict_block_name = 'eye_output_sub%d_block%d' % (sub, block)
        # data_dict = {}
        data_dict[dict_block_name] = {}
        
        trial_number_list = []
        for at in trial_file_list:
            trial_id = re.findall('\d+', at)[0]
            trial_id2 = 'trial' + trial_id + '_'
            trial_id22 = 'trial' + trial_id
            t = int(trial_id22[5:])
            trial_number_list.append(t)
            trial_dict = {}
            #trial_dict[trial_id22] = {}
            for bt in base_file_list:
                if  trial_id2 in bt:
                    bt_data = feather.read_dataframe(bt) # pandas.core.frame.DataFrame
                    at_data = feather.read_dataframe(at)
                    bt_median = bt_data.median()
                    at_div = at_data / bt_median
                    at_mean = at_div.mean()
                    trial_dict["bt_file"] = bt
                    trial_dict["at_file"] = at
                    trial_dict["bt_median"] = bt_median
                    trial_dict["at_div"]= at_div
                    trial_dict["at_mean"] = at_mean
                    
                    
                    #trial_dict[trial_id22]["bt_file"] = bt
                    #trial_dict[trial_id22]["at_file"] = at
                    #trial_dict[trial_id22]["bt_median"] = bt_median
                    #trial_dict[trial_id22]["at_div"]= at_div
                    #trial_dict[trial_id22]["at_mean"] = at_mean
                    data_dict[dict_block_name][trial_id22] = trial_dict
        first_trial = min(trial_number_list)
        # trial_id2 = 'trial' + str(first_trial) + '_'
        trial_id22 = 'trial' + str(first_trial)
        hamp_first_trial_file = 'hampel_pupil_int_%s_sub%d_block%d.feather' % (trial_id22, sub, block)
        
        bt_data = feather.read_dataframe(hamp_first_base_file)
        at_data = feather.read_dataframe(hamp_first_trial_file)
        bt_median = bt_data.median()
        at_div = at_data / bt_median
        at_mean = at_div.mean()
        trial_dict = {}
        trial_dict["bt_file"] = hamp_first_base_file
        trial_dict["at_file"] = hamp_first_trial_file
        trial_dict["bt_median"] = bt_median
        trial_dict["at_div"]= at_div
        trial_dict["at_mean"] = at_mean
        
        data_dict[dict_block_name][trial_id22] = trial_dict
    dict_filename = 'eye_data_COMPLETE_sub%d' % sub
    obj = data_dict
    save_obj(obj, dict_filename )
                    
                    
                    
            
                    
                
            
        
                

            