In [124]:
from pprint import pprint
from datetime import datetime
from tqdm import tqdm
import matplotlib
import matplotlib.pyplot as plt
import numpy as np 
import pandas as pd
from numpy.lib.stride_tricks import sliding_window_view
from utils import *
from utils import markdown
from scipy import stats
from collections import defaultdict
from itertools import product

def data_iterator():
    data = load_nested_dict('data/Processed', ignore=IGNORE_NO_EYETRACKING)
    for i, (participant, _data1) in enumerate(data.items()):
        for j, (experiment, _data2) in enumerate(_data1.items()):
            yield (i, participant), (j, experiment[3:]), _data2

def compute_eyetracking_intervals(gaze_only=True):
    def _compute_eyetracking_intervals(data, task, start_time, finish_time):
        data = data['eye_tracking']
        window_properties = ALL_WINDOW_PROPERTIES[task]
        # compute gaze intervals
        intaskbox = in_box(data['x'].to_numpy(), data['y'].to_numpy(), window_properties['position'], window_properties['size'])
        if gaze_only: # USE THIS IF WE ONLY WANT GAZE
            gaze = data['gaze'].to_numpy().astype(bool)
            intaskbox = np.logical_and(intaskbox, gaze).astype(int)
        return compute_time_intervals(intaskbox, data['timestamp'].to_numpy(), start_time, finish_time, pad='next').intervals
    
    def _gen():
        for (_, participant), (_, experiment), _data2 in data_iterator():
            start_time, finish_time =  _data2['start_time'], _data2['finish_time']
            for task, _data3 in sorted(_data2['tasks'].items()):
                wi = _compute_eyetracking_intervals(_data2, task, start_time, finish_time) - start_time
                yield np.full(wi.shape[0], participant), np.full(wi.shape[0], experiment), np.full(wi.shape[0], task), wi[:,0], wi[:,1]
    
    data = [np.concatenate(x, axis=0) for x in zip(*_gen())]
    data = dict(zip(['participant', 'experiment', 'task', 't1', 't2'], data))
    return pd.DataFrame(data)


df = compute_eyetracking_intervals(False)

def get_transition_sequenced_binned(df, bin_size=0.2):
    df = df.copy()
    df = df[['task', 't1', 't2']]
    df = df.sort_values('t1').reset_index(drop=True)
    t1min = df['t1'].min()
    df[['t1', 't2']] -= t1min # remove empty starting values...?
    tasks = list(pd.unique(df['task']))
    bins = int(df['t2'].max() / bin_size)+1
    task_to_char = lambda x : x[0].capitalize()
    df_bc = pd.DataFrame(data = np.zeros((bins, len(tasks) + 1)), columns=tasks + ['n'])
    df_bc['n'] = 1e-10
    def contribution(task, t1, t2):
        start_index = int(t1 // bin_size)
        end_index = int(t2 // bin_size)
        # start contribution
        if end_index - start_index == 0:
            df_bc[task].iloc[start_index] += t2 - t1
        else:
            df_bc[task].iloc[start_index] += ((start_index + 1) * bin_size) - t1
            # end contribution
            df_bc[task].iloc[end_index] += t2 - (end_index * bin_size)
            # intermediate contribution (if any)
            for i in list(range(start_index, end_index + 1))[1:-1]:
                df_bc[task].iloc[i] += bin_size
    for _, row in df.iterrows():
        contribution(*row.to_numpy())
    return "".join(df_bc.idxmax(axis=1).apply(task_to_char))

# computes the transition sequence for a given participant & experiment.
# This may (bin_size > 0) or may not  (bin_size > 0 | None) be temportally binned.
def get_transition_sequence(df, bin_size=0.2):

    task_to_char = lambda x : x[0].capitalize()
    if bin_size is None:
        df = df[['task', 't1', 't2']]
        df = df.copy()
        # eyetracking is non-overlapping!
        
        return "".join(df['task'].apply(task_to_char))
    else:
        return get_transition_sequenced_binned(df, bin_size=bin_size)
   
    

        system  tracking      fuel             n
0     0.049967  0.000000  0.000000  1.000000e-10
1     0.000000  0.066769  0.000000  1.000000e-10
2     0.000000  0.100000  0.000000  1.000000e-10
3     0.000000  0.099680  0.000000  1.000000e-10
4     0.000000  0.000000  0.067002  1.000000e-10
...        ...       ...       ...           ...
1685  0.000000  0.000000  0.100000  1.000000e-10
1686  0.000000  0.000000  0.100000  1.000000e-10
1687  0.000000  0.000000  0.100000  1.000000e-10
1688  0.000000  0.000000  0.100000  1.000000e-10
1689  0.000000  0.000000  0.028403  1.000000e-10

[1690 rows x 4 columns]
STTTFFFFFSSTTFNNFFTTSSSSSSSSSTFFFFTTFFSSTTTSSFFFFFFFFFFFFFFFFFFFFTTTTTTTTSSFFFFFFFFFFFFSSSSSSSSSTTTSSTTTTNFFFFFFFFFSSFFFFFFFFFFTTFFFFFFFFFNNTTTFFFTTTFFFFFTTTTSSSTTFFFTTTTSSTTFFFFFFFFFFFFFFFFFFTFFFFSSTTTSTTTTTTTTTTTFFFFFFFFFFFFFFFFFFFFFFFFFTSSSSSSTTTSSSTTTSSNTTTTSFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFTTTTTTSSTTTSSSSSSSSTTTNNTTTTFFFFFFFTTTSTTTFFFTSSSSSSSSSSFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF

In [97]:

bins = 10
tasks = ['T', 'S', 'F']
df_bc = pd.DataFrame(data = np.zeros((bins, len(tasks))), columns=tasks)

def contribution(task, t1, t2, bin_size):
    start_index = int(t1 // bin_size)
    end_index = int(t2 // bin_size)
    print(start_index, end_index)
    # start contribution
    if end_index - start_index == 0:
        df_bc[task].iloc[start_index] += t2 -t1
    else:
        df_bc[task].iloc[start_index] += ((start_index + 1) * bin_size) - t1
        # end contribution
        df_bc[task].iloc[end_index] += t2 - (end_index * bin_size)
        # intermediate contribution (if any)
        for i in list(range(start_index, end_index + 1))[1:-1]:
            df_bc[task].iloc[i] += bin_size

# Sample usage
t1 = 7
t2 = 15.4
bin_size = 5.1

contribution("F", t1, t2, bin_size)  # Output: [1, 2, 3]

print(df_bc)

1 3
     T    S    F
0  0.0  0.0  0.0
1  0.0  0.0  3.2
2  0.0  0.0  5.1
3  0.0  0.0  0.1
4  0.0  0.0  0.0
5  0.0  0.0  0.0
6  0.0  0.0  0.0
7  0.0  0.0  0.0
8  0.0  0.0  0.0
9  0.0  0.0  0.0
