In [1]:
import pickle
import datetime
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.animation import FuncAnimation
from mpl_toolkits.mplot3d import Axes3D
from scipy.spatial.transform import Rotation as R
import seaborn as sns
from pympler import asizeof
from scipy import interpolate
from ahrs.filters import EKF
from ahrs import QuaternionArray
from scipy.stats import zscore
#%matplotlib inline
%matplotlib widget

# Processing Survey Data

In [2]:
FOLDER = '/Volumes/Secondary/PhDStudy_Results'

In [4]:
def printSurveys(session):
    surveys = session['surveys']
    for s in surveys:
        print(s)
        for k in surveys[s]:
            print(f'\t{k}[{type(surveys[s][k])}]: {surveys[s][k]}') 

            
def z_score_dicts(dict_list, keys):
    """
    This function z-scores the values of specified keys across all dictionaries 
    and appends them to the dictionaries with a 'z_' prefix.
    
    Args:
        dict_list (list): List of dictionaries.
        keys (list): List of keys to z-score.

    Returns:
        list: List of dictionaries with z-scored values appended.
    """
    for key in keys:
        print(key)
        # Get the values for the key from all dictionaries where it exists, along with their indices
        values, indices = zip(*[(d[key], i) for i, d in enumerate(dict_list) if key in d])
        print(values)
        # Calculate the z-scores
        z_scores = zscore(values)

        # Append the z-scores to the appropriate dictionaries
        for z, i in zip(z_scores, indices):
            dict_list[i]['z_' + key] = z
                
    return dict_list

            
def session_times(session):
    ''' given a session, send back ((start_time, end_time), (transition_start, noticed), unnoticed_timedelta, extra_timedelta, duration_timedelta) '''
    #session started
    start_time = session['timings'][session['timings']['event']=='START_TEST'].index.values[0]
    
    #additional time after noticing the light but before starting the survey.  For the second phase of each activity there was an option to continue playing tetris/flow activity
    extra_time = session['timings'][session['timings']['event']=='START_SURVEY'].index.values[-1]-session['timings'][session['timings']['event']=='NOTICED'].index.values[0]
    
    #timing of the start of the LED change
    transition_start = session['timings'][session['timings']['event']=='START_TRANSITION'].index.values[0]
    #time of noticing
    noticed = session['timings'][session['timings']['event']=='NOTICED'].index.values[-1]
    
    #session ends when noticed...
    end_time = noticed
    #...unless they actually took extra time (i.e. more than 30 sec). then set the end_time to when they start the survey instead.
    if datetime.timedelta(seconds=extra_time.astype('timedelta64[s]') / np.timedelta64(1, 's')) > datetime.timedelta(seconds=30):
        end_time = session['timings'][session['timings']['event']=='START_SURVEY'].index.values[-1]
        
    #calc duration and unnoticed time
    duration = end_time - start_time 
    unnoticed = noticed - transition_start
    
    return (start_time, end_time), (transition_start, noticed), unnoticed.astype("timedelta64[s]"), extra_time.astype("timedelta64[s]"), duration.astype("timedelta64[s]")

            
def get_p_data(user, folder):
    with open(folder + '/P' + str(user) + '_Processed.pickle', 'rb') as handle:
        return pickle.load(handle)

def individual_flow_analysis(surveys, durs):
    print(len(surveys))
    keys = ['startSurveyTime', 'durGuess', 'durTimeConfidence', 'guessTime', 'timeAtGuess', 'guessTimeConfidence', 
            'timeExp', 'focus', 'effort', 'deepest', 'flow', 'durFlow', 'durToFlow', 'percentFlow', 'flowDesc', 
            'FSS1', 'FSS2', 'FSS3', 'FSS4', 'FSS5', 'FSS6', 'FSS7', 'FSS8', 'FSS9', 
            'flowQ1', 'flowQ2', 'flowQ3', 'flowQ4', 'shallowFQ', 'deepFQ', 'BIT_score', 'BIT_std']
    
    surveys = [{key: d[key] for key in set(keys) & set(d.keys())} for d in surveys]
    
    for survey in surveys:
        try:
            survey['FSS_avg'] = (survey['FSS1'] + survey['FSS2'] + survey['FSS3'] + survey['FSS4'] + survey['FSS5'] + survey['FSS6'] + survey['FSS7'] + survey['FSS8'] + survey['FSS9']) / 9.0
        except:
            pass
        
    surveys = z_score_dicts(surveys, ['timeExp', 'focus', 'effort', 'deepest', 'durFlow', 'percentFlow', 'durTimeConfidence', 'guessTimeConfidence',
                                     'FSS1', 'FSS2', 'FSS3', 'FSS4', 'FSS5', 'FSS6', 'FSS7', 'FSS8', 'FSS9', 'FSS_avg',
                                     'flowQ1', 'flowQ2', 'flowQ3', 'flowQ4', 'shallowFQ', 'deepFQ', 'BIT_score', 'BIT_std'])
    #discussion of individual across conditions: let's z-score everything
    print('[1] LAB-TETRIS')
    for s in surveys[1]:
        print(f'\t{s}: {surveys[1][s]}')
    print('[2] LAB-FLOWACT')
    for s in surveys[2]:
        print(f'\t{s}: {surveys[2][s]}')
    print('[3] HOME-TETRIS')
    for s in surveys[3]:
        print(f'\t{s}: {surveys[3][s]}')
    print('[4] HOME-FLOWACT')
    for s in surveys[4]:
        print(f'\t{s}: {surveys[4][s]}')
    print('[5] HOME-FLOWLONG')
    for s in surveys[5]:
        print(f'\t{s}: {surveys[5][s]}')
    # tetris vs flow.  DIFF tetris/flow in lab; DIFF tetris/flow at home. Average.
    
    
    #home vs lab.  DIFF tetris/tetris, home/home.  Average.
    
    #home vs home unobserved. DIFF for flow activity.
    
    #flow activity vs all others.
    
    
    #RELATIONSHIP betwen FSS-9 and shallow/deep FQ and percent flow, deepest flow
    
    
    #two-way ANOVA; Z_Score before or not?  Justification for both.  Nothing individual.
    
    
    
    
def individual_time_analysis(surveys, durs):
    pass

def individual_state_analysis(surveys):
    #select only the keys we care about
    keys = ['nowAlertness', 'nowStress', 'nowEmotion', 'nowEmoIntensity', 'reactionTimesMs']
    surveys = [{key: d[key] for key in set(keys) & set(d.keys())} for d in surveys]
    
    # tetris vs flow.  DIFF tetris/flow in lab; DIFF tetris/flow at home. Average.
    
    
    #home vs lab.  DIFF tetris/tetris, home/home.  Average.
    
    #home vs home unobserved. DIFF for flow activity.
    
    #flow activity vs all others.
    
    
    #RELATIONSHIP betwen FSS-9 and shallow/deep FQ and percent flow, deepest flow
    
    
    for s in surveys:
        print(s)

def analyze_p_surveys(user, folder=FOLDER):
    
    #grab all data + a baseline assessment of personality and thriving + survey for session 3
    s_data, baseline, final_session = get_p_data(user, folder)
    #BASELINE has info about enjoyment of video games and tetris; it also has TAS, BIT, and FQ baselines
    
    print('-- last survey')
    print(final_session)
    ## Organize it into useful sections
    
    #labels for final surveys after activities.  If we're analyzing session flows, it's these surveys + final_session
    final_activity_titles = ['LabMid2Survey', 'LabFinalSurvey','HomeMidSurvey','HomeFinalSurvey']
    
    #if we're analyzing time estimation and duration to notice, we include these with the ones above
    time_guess_titles = ['LabMidActivitySurvey','HomeMidActivitySurvey']
    
    #if we're looking at all data over the course of the study (alertness/stress/emotion/reaction time) these are included with the above
    #these also include 'lastSeenTimeStartSurvey' -- the last clock value they should've seen before they started the task.
    pre_survey_titles = ['LabMid1Survey', 'HomeStartSurvey']
    
    final_activity_surveys, time_guess_surveys, user_state_surveys, durations = [], [], [], []
    
    for session in s_data:
        
        durs = {}
        try:
            _, _, unnoticed_dur, extra_dur, task_dur = session_times(session)
            durs['unnoticed'] = unnoticed_dur
            durs['extra'] = extra_dur
            durs['task'] = task_dur
        except:
            pass
        
        for survey in session['surveys']:
            print(survey)
            if survey in final_activity_titles:
                final_activity_surveys.append(session['surveys'][survey])
                time_guess_surveys.append(session['surveys'][survey])
                user_state_surveys.append(session['surveys'][survey])
                durations.append(durs)
            elif survey in time_guess_titles:
                time_guess_surveys.append(session['surveys'][survey])
                durations.append(durs)
            elif survey in pre_survey_titles:
                user_state_surveys.append(session['surveys'][survey])
    
    #BIT, flow ratings, overall session time estimates.  BIT and FQ can be compared to baseline
    individual_flow_analysis([baseline, *final_activity_surveys, final_session], durations)
    
    #duration estimates and time to notice light
    individual_time_analysis([*time_guess_surveys, final_session], durations)
    
    #alertness, stress, emotion, reaction_times
    individual_state_analysis([*user_state_surveys, final_session])
                    
                    

analyze_p_surveys(1)

-- last survey
{}
LabStartSurvey
LabMid1Survey
LabMidActivitySurvey
LabMid2Survey
LabMidActivitySurvey


KeyError: 'surveys'

In [6]:
user = 1
with open(FOLDER + '/P' + str(user) + '_Processed.pickle', 'rb') as handle:
    session_data, entrance, exit = pickle.load(handle)

In [10]:
TIMEZONE_OFFSET = 4
def process_ui_data(data, user):
    '''process the ui data from the session_data into a DF of important timestamps and a dictionary of survey data'''
    
    SURVEY_NAMES = ['LabStartSurvey', 
                    'LabMid1Survey', 
                    'LabMidActivitySurvey', 
                    'LabMid2Survey', 
                    'LabFinalSurvey',
                    'HomeStartSurvey', 
                    'HomeMidSurvey', 
                    'HomeMidActivitySurvey',  
                    'HomeFinalSurvey']          
                    
    # Define lists for two dataframes
    timestamps = []
    surveys = {}
    current_survey = 'UNKNOWN'
    # Traverse through data
    for row in data:
        if row[2] in ['START_TEST', 'STOP_TEST', 'START_TRANSITION', 'FINISHED_TRANSITION', 'NOTICED']:
            timestamps.append([pd.to_datetime(row[0])-datetime.timedelta(hours=TIMEZONE_OFFSET), row[2]])
        elif row[2] == 'SURVEY':
            if 'empaticaStartTime' == row[3]:
                timestamps.append([pd.to_datetime(row[4]+row[5]), 'EMPATICA_START_TIME'])
            elif 'empaticaEndTime' == row[3]:
                timestamps.append([pd.to_datetime(row[4]+row[5]), 'EMPATICA_END_TIME'])
            elif 'recordingStartTime' == row[3]:
                timestamps.append([pd.to_datetime(row[4]+row[5]), 'RECORDING_START_TIME'])
            elif row[4] in SURVEY_NAMES:
                timestamps.append([pd.to_datetime(int(row[3]), unit='ms')-datetime.timedelta(hours=TIMEZONE_OFFSET), 'START_SURVEY'])
                current_survey = row[4]
                surveys[current_survey]={}
            else:
                try:
                    surveys[current_survey][row[3]] = (row[4] if len(row)==5 else row[4:])
                except:
                    print(f'GOT unknown survey: {row}')
    
    # Create dataframes
    df_times = pd.DataFrame(timestamps, columns=['timestamp', 'event'])
    df_times['timestamp'] = pd.to_datetime(df_times['timestamp'], utc=True)
    df_times['timestamp'] = df_times['timestamp'].dt.tz_localize(None)
    df_times.set_index('timestamp', inplace=True)

    for s in surveys:
        print('\t>> processing ' + s)
        surveys[s] = codeBIT(surveys[s])
        surveys[s] = codeFlowDesc(surveys[s])
        print(surveys[s])
        surveys[s] = transformFlowPathPlot(surveys[s], folder, 'P' + str(user) + s)
        
    return df_times, surveys

df_times, survey_results = process_ui_data(session_data[4]['ui'], user)
            

IndexError: list index out of range

In [134]:
#PROCESS SURVEYS

# we want to z-score FlowQs per question, as is typically done
# we also want to z-score FSS.  We'll do a global FSS.

# LOOT AT -- individual: tetris vs flow
# LOOK AT -- individual: tetris diff home vs lab, flow diff home vs lab, flow diff home1 vs home2
# LOOK AT -- for a bunch of participants same as above

#flow experienced?  Errors in time/duration
#certainty of time.






Unnamed: 0_level_0,x,y,z,imu_tick_ms
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-04-14 14:10:07.736000+00:00,8.687500,-4.726562,-0.425781,36
2023-04-14 14:10:07.761000+00:00,8.609375,-4.726562,-0.425781,59
2023-04-14 14:10:07.784500+00:00,8.535156,-4.609375,-0.425781,53
2023-04-14 14:10:07.800500+00:00,8.535156,-4.687500,-0.386719,62
2023-04-14 14:10:07.820500+00:00,8.574219,-4.687500,-0.425781,86
...,...,...,...,...
2023-04-14 14:14:48.977500+00:00,9.414062,-2.617188,-0.042969,86
2023-04-14 14:14:48.995000+00:00,9.414062,-2.617188,-0.042969,5
2023-04-14 14:14:49.039000+00:00,9.378906,-2.578125,-0.003906,179
2023-04-14 14:14:49.058500+00:00,9.378906,-2.617188,-0.042969,99


In [127]:
for session in session_data:
    if session['session_type'] in ['LAB_TETRIS', 'TETRIS_CONTD', 'LAB_FLOW', 'FLOW_CONTD', 'HOME_TETRIS', 'HOME_FLOW']:
        
        print(session['session_type'])
        taskbounds, transitionbounds, unnoticed_dur, extra_dur, task_dur = session_times(session)
        
        print(f'UNNOTICED TIME: {unnoticed_dur}')
        print(f'    EXTRA TIME: {extra_dur}')
        print(f'      DURATION: {task_dur}')
    
        for k in session['surveys']:
            survey = session['surveys'][k]
            if (k == 'LabStartSurvey'):
                print('IGNORE Lab Start Survey (had people do it online')
                print('May need to move results from P1-3 to online CSV')
        
            else:
                print(survey)

LAB_TETRIS
UNNOTICED TIME: 54 seconds
    EXTRA TIME: 17 seconds
      DURATION: 780 seconds
{'nowAlertness': 5, 'nowStress': 5, 'nowEmotion': 3, 'nowEmoIntensity': 2, 'freeEmotion': ['Focused', ' mildly annoyed at controls', ' feel adrenaline pumping'], 'freeFood': 'Coffee a few hours ago', 'freeAdditional': '', 'reactionTimesMs': [376, 1259, 324, 323, 509, 323, 341, 410, 326, 381, 323, 323, 324], 'LastSeenTimeStartSurvey': datetime.datetime(2023, 4, 14, 14, 14, 48)}
{'duration': 7, 'actualTimeAtDuration': datetime.datetime(2023, 4, 14, 14, 28, 5)}
TETRIS_CONTD
UNNOTICED TIME: 19 seconds
    EXTRA TIME: 280 seconds
      DURATION: 993 seconds
{'startSurveyTime': datetime.datetime(2023, 4, 14, 14, 40, 10), 'durGuess': 25, 'timeAtDurGuess': datetime.datetime(2023, 4, 14, 14, 41, 14), 'durTimeConfidence': 2, 'guessTime': datetime.datetime(2023, 4, 14, 14, 45), 'timeAtGuess': datetime.datetime(2023, 4, 14, 14, 40, 35), 'guessTimeConfidence': 2, 'flowPath': '', 'flowCanvasSize': ['null', '

# Processing Time Series Data

In [None]:
def session_times(session):
    ''' given a session, send back ((start_time, end_time), (transition_start, noticed), unnoticed_timedelta, extra_timedelta, duration_timedelta) '''
    #session started
    start_time = session['timings'][session['timings']['event']=='START_TEST'].index.values[0]
    
    #additional time after noticing the light but before starting the survey.  For the second phase of each activity there was an option to continue playing tetris/flow activity
    extra_time = session['timings'][session['timings']['event']=='START_SURVEY'].index.values[-1]-session['timings'][session['timings']['event']=='NOTICED'].index.values[0]
    
    #timing of the start of the LED change
    transition_start = session['timings'][session['timings']['event']=='START_TRANSITION'].index.values[0]
    #time of noticing
    noticed = session['timings'][session['timings']['event']=='NOTICED'].index.values[-1]
    
    #session ends when noticed...
    end_time = noticed
    #...unless they actually took extra time (i.e. more than 30 sec). then set the end_time to when they start the survey instead.
    if datetime.timedelta(seconds=extra_time.astype('timedelta64[s]') / np.timedelta64(1, 's')) > datetime.timedelta(seconds=30):
        end_time = session['timings'][session['timings']['event']=='START_SURVEY'].index.values[-1]
        
    #calc duration and unnoticed time
    duration = end_time - start_time 
    unnoticed = noticed - transition_start
    
    return (start_time, end_time), (transition_start, noticed), unnoticed.astype("timedelta64[s]"), extra_time.astype("timedelta64[s]"), duration.astype("timedelta64[s]")


def chop_df(df, taskbounds):
    """
    This function filters a dataframe based on the provided start and end times.
    The dataframe should have timestamps as the index.
    
    Args:
        df (pd.DataFrame): Input dataframe with timestamps as the index.
        taskbounds: Tuple of (start_time, end_time), where each is an (np.datetime64)
        
    Returns:
        pd.DataFrame: A new dataframe with only the data between the provided start and end times.
    """
    start_time, end_time = taskbounds

    start_time = pd.to_datetime(start_time).tz_localize('UTC')
    end_time = pd.to_datetime(end_time).tz_localize('UTC')

    # Filter the dataframe
    df_filtered = df[(df.index >= start_time) & (df.index <= end_time)]

    new_length = len(df_filtered)
    chopped_beginning = df[df.index < start_time].shape[0]
    chopped_end = df[df.index > end_time].shape[0]

    # Calculate and print the number of samples chopped off the beginning and end of the dataframe
    print(f"Filtered dataframe length: {new_length}")
    print(f"Number of samples chopped off from the beginning: {chopped_beginning}")
    print(f"Number of samples chopped off from the end: {chopped_end}")

    return df_filtered
   

def pull_activity_data(session, df_name, timebounds=None):
    ''' given a session, return the data that falls between the start and end times of the task (started the task to the notice event, or the survey if they took extra time) if timebounds==None.
    if timebounds are passed, pull the data in between those timebounds.  df_name can take on values 'blinks','acc','gyro','quaternions','thermal','watch_temp','watch_lux' '''
    
    if timebounds == None:
        timebounds, _, _, _, _ = session_times(session)
    
    return chop_df(session[df_name], taskbounds)