In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
experiment_data_folders_path = os.path.join(os.getcwd(), "Data", "Pretest")
print(experiment_data_folders_path)

/home/jovyan/Masterarbeit/Data/Pretest


In [5]:
# for files with a single column of data
def read_lines_from_file_into_df(file_path, column_names):
    # Read all lines from the file
    with open(file_path, "r") as f:
        lines = f.readlines()
    
    # Strip any whitespace characters from the ends of the lines
    data = [line.strip() for line in lines]
    
    # Create a DataFrame from the data
    df = pd.DataFrame([data], columns=column_names)
    
    return df

# for files with multiple columns of data
def get_df_from_file(file_path, column_names):
    df = pd.read_csv(file_path, sep='\t', header=None, names = column_names)
    return df

# for files with a single data point
def read_first_line_from_file(file_path):
    lines = []
    with open(file_path, "r") as f:
        for line in f:
            lines.append(line)

    return lines[0]

def get_movement_timestamps_df_from_interaction_events_df(interaction_events_df):
    grab_events = interaction_events_df[interaction_events_df['EventType'] == 'Grab']
    release_events = interaction_events_df[interaction_events_df['EventType'] == 'Release']
    
    grab_events = grab_events.reset_index(drop=True)
    release_events = release_events.reset_index(drop=True)

    movement_timestamps_df = pd.DataFrame({
        'GrabTimestamp': grab_events['Timestamp'],
        'ReleaseTimestamp': release_events['Timestamp']
    })

    return movement_timestamps_df

def calculate_distance_and_speed(grab_timestamp, release_timestamp, movement_df):
    # Filter the movement data between the grab and release timestamps
    movement_data = movement_df[(movement_df['Timestamp'] >= grab_timestamp) & (movement_df['Timestamp'] <= release_timestamp)]
    
    distances = np.sqrt((movement_data['PositionX'].diff()**2) + 
                        (movement_data['PositionY'].diff()**2) + 
                        (movement_data['PositionZ'].diff()**2))
    
    total_distance = distances.sum()
    
    time_duration = release_timestamp - grab_timestamp
    
    # Calculate speed (distance / time)
    average_speed = total_distance / time_duration if time_duration > 0 else 0
    
    return total_distance, average_speed

def get_grip_pressure(grab_timestamp, release_timestamp, movement_df):
    # subtract a tolerance (seconds) from grab_timestamp, as grip pressure can rise before item is fully grabbed 
    tolerance = 0.5
    movement_data = movement_df[(movement_df['Timestamp'] >= grab_timestamp - tolerance) & (movement_df['Timestamp'] <= release_timestamp)]
    grip_pressure = movement_data['GripPressure'].mean()
    #print(grip_pressure)

    return grip_pressure

def get_round_type(grab_timestamp, round_data):
    for i, row in round_data.iterrows():
        if grab_timestamp <= row['RoundEndTimestamp']:
            return row['RoundType']
    return None

def get_mistakes_for_rounds(round_data_df, round_type):
    rounds = round_data_df[round_data_df['RoundType'] == round_type]
    mistakes = []
    for i in range(len(rounds)):  # Get all available rounds
        round_mistakes = rounds.iloc[i]['TotalItems'] - rounds.iloc[i]['CorrectItems']
        mistakes.append(round_mistakes)
    return mistakes

In [7]:
IS_BETWEEN_SUBJECTS = False

participants_df = pd.DataFrame()

# Iterate through each folder in the directory
for folder_name in os.listdir(experiment_data_folders_path):
    # Skip if it's not a directory or if it's a hidden/system directory (starts with '.')
    if not os.path.isdir(os.path.join(experiment_data_folders_path, folder_name)) or folder_name.startswith('.'):
        continue

    print('folder name: ' + folder_name)
    
    folder_path = os.path.join(experiment_data_folders_path, folder_name)

    interaction_events_df = get_df_from_file(os.path.join(folder_path, 'InteractionEvents.txt'), 
                                             ['Timestamp', 'EventType', 'Interactor', 'Interactable'])
    interaction_events_df['Timestamp'] = interaction_events_df['Timestamp'].map(lambda x: str(x).replace(',', '.') if isinstance(x, str) else x)
    interaction_events_df = interaction_events_df.astype({'Timestamp': 'float'})

    movement_timestamps_df = get_movement_timestamps_df_from_interaction_events_df(interaction_events_df)

    dominant_hand = read_first_line_from_file(os.path.join(folder_path, 'DominantHand.txt'))

    dominant_hand_movement_df = get_df_from_file(os.path.join(folder_path, dominant_hand + '.txt'), 
                                                ['Timestamp', 'PositionX', 'PositionY', 'PositionZ', 
                                                 'RotationX', 'Rotation Y', 'RotationZ', 'RotationW', 
                                                 'GripPressure'])

    dominant_hand_movement_df = dominant_hand_movement_df.apply(lambda col: col.map(lambda x: str(x).replace(',', '.') if isinstance(x, str) else x))
    dominant_hand_movement_df = dominant_hand_movement_df.astype('float')

    distances = []
    speeds = []
    grip_pressures = []
    
    # Iterate through each row in movement_timestamps_df
    for index, row in movement_timestamps_df.iterrows():
        grab_timestamp = row['GrabTimestamp']
        release_timestamp = row['ReleaseTimestamp']
        distance, speed = calculate_distance_and_speed(grab_timestamp, release_timestamp, dominant_hand_movement_df)
        grip_pressure = get_grip_pressure(grab_timestamp, release_timestamp, dominant_hand_movement_df)
        distances.append(distance)
        speeds.append(speed)
        grip_pressures.append(grip_pressure)

    
    # Add the results to the DataFrame
    movement_timestamps_df['Distance'] = distances
    movement_timestamps_df['Speed'] = speeds
    movement_timestamps_df['GripPressure'] = grip_pressures
    movement_timestamps_df['RoundType'] = round_types
    movement_timestamps_df['RoundNumber'] = round_numbers

    print(movement_timestamps_df)
    
    round_data_df = get_df_from_file(os.path.join(folder_path, 'RoundData.txt'), 
                                    ['RoundEndTimestamp', 'RoundNumber', 'RoundType', 'CorrectItems', 'TotalItems'])
    round_data_df['RoundEndTimestamp'] = round_data_df['RoundEndTimestamp'].map(lambda x: str(x).replace(',', '.') if isinstance(x, str) else x)
    round_data_df = round_data_df.astype({'RoundEndTimestamp': 'float'})
  
    movement_timestamps_low_cl_df = movement_timestamps_df[movement_timestamps_df['RoundType'] == 'LowCognitiveLoad']
    movement_timestamps_high_cl_df = movement_timestamps_df[movement_timestamps_df['RoundType'] == 'HighCognitiveLoad']
    
    # df containing raw survey answers
    survey_answers_raw_df = read_lines_from_file_into_df(os.path.join(folder_path, 'SurveyAnswers.txt'), 
                                                     ['GeneralCheck', 
                                                    'LowCLMentalDemand', 'LowCLPhysicalDemand', 'LowCLTemporalDemand', 
                                                    'LowCLPerformance', 'LowCLEffort', 'LowCLFrustration',                                     
                                                    'HighCLMentalDemand', 'HighCLPhysicalDemand', 'HighCLTemporalDemand', 
                                                    'HighCLPerformance', 'HighCLEffort', 'HighCLFrustration',
                                                    'LeftHandConsciousMovement', 'RightHandConsciousMovement',
                                                    'Presence1', 'Presence2', 'Presence3', 'Presence4', 'Presence5', 'Presence6',
                                                    'ExperienceVR1', 'ExperienceVR2', 'ExperienceVR3',
                                                    'NauseaNausea', 'NauseaDizziness', 
                                                    'VestibularDisorientation', 'VestibularImbalance',
                                                    'OculomotorFatigue', 'OculomotorDiscomfort',
                                                    'Gender', 'Education', 'EmployementStatus', 'Age'
                                                   ])


    # convert all to int except GeneralCheck, Gender, Education and EmploymentStatus
    survey_answers_raw_df = survey_answers_raw_df.astype({'LowCLMentalDemand': 'int32', 'LowCLPhysicalDemand': 'int32', 'LowCLTemporalDemand': 'int32', 
                                                          'LowCLPerformance': 'int32', 'LowCLEffort': 'int32', 'LowCLFrustration': 'int32', 
                                                          'HighCLMentalDemand': 'int32', 'HighCLPhysicalDemand': 'int32', 'HighCLTemporalDemand': 'int32', 
                                                          'HighCLPerformance': 'int32', 'HighCLEffort': 'int32', 'HighCLFrustration': 'int32',
                                                          'LeftHandConsciousMovement': 'int32', 'RightHandConsciousMovement': 'int32',
                                                          'Presence1': 'int32', 'Presence2': 'int32', 'Presence3': 'int32', 'Presence4': 'int32', 'Presence5': 'int32', 'Presence6': 'int32',
                                                          'ExperienceVR1': 'int32', 'ExperienceVR2': 'int32', 'ExperienceVR3': 'int32',
                                                          'NauseaNausea': 'int32', 'NauseaDizziness': 'int32', 'VestibularDisorientation': 'int32', 'VestibularImbalance': 'int32',
                                                          'OculomotorFatigue': 'int32', 'OculomotorDiscomfort': 'int32',
                                                          'Age': 'int32'
                                                         })
    
    # df containing calculated values of constructs
    survey_answers_df = pd.DataFrame()
    survey_answers_df['GeneralCheck'] = survey_answers_raw_df['GeneralCheck']
    survey_answers_df['NASA-TLX-LowCL'] = (survey_answers_raw_df['LowCLMentalDemand'] + survey_answers_raw_df['LowCLPhysicalDemand'] + survey_answers_raw_df['LowCLTemporalDemand'] + survey_answers_raw_df['LowCLPerformance'] + survey_answers_raw_df['LowCLEffort'] + survey_answers_raw_df['LowCLFrustration']) / 6
    survey_answers_df['NASA-TLX-HighCL'] = (survey_answers_raw_df['HighCLMentalDemand'] + survey_answers_raw_df['HighCLPhysicalDemand'] + survey_answers_raw_df['HighCLTemporalDemand'] + survey_answers_raw_df['HighCLPerformance'] + survey_answers_raw_df['HighCLEffort'] + survey_answers_raw_df['HighCLFrustration']) / 6
    survey_answers_df['LeftHandConsciousMovement'] = survey_answers_raw_df['LeftHandConsciousMovement']
    survey_answers_df['RightHandConsciousMovement'] = survey_answers_raw_df['RightHandConsciousMovement']
    survey_answers_df['Presence'] = (survey_answers_raw_df['Presence1'] + survey_answers_raw_df['Presence2'] + survey_answers_raw_df['Presence3'] + survey_answers_raw_df['Presence4'] + survey_answers_raw_df['Presence5'] + survey_answers_raw_df['Presence6']) / 6
    survey_answers_df['ExperienceVR'] = (survey_answers_raw_df['ExperienceVR1'] + survey_answers_raw_df['ExperienceVR2'] + survey_answers_raw_df['ExperienceVR3']) / 3
    survey_answers_df['CSQ-VR-Nausea'] = survey_answers_raw_df['NauseaNausea'] + survey_answers_raw_df['NauseaDizziness']
    survey_answers_df['CSQ-VR-Vestibular'] = survey_answers_raw_df['VestibularDisorientation'] + survey_answers_raw_df['VestibularImbalance']
    survey_answers_df['CSQ-VR-Oculomotor'] = survey_answers_raw_df['OculomotorFatigue'] + survey_answers_raw_df['OculomotorDiscomfort']
    survey_answers_df['CSQ-VR'] = survey_answers_df['CSQ-VR-Nausea'] + survey_answers_df['CSQ-VR-Vestibular'] + survey_answers_df['CSQ-VR-Oculomotor']
    survey_answers_df['Gender'] = survey_answers_raw_df['Gender']
    survey_answers_df['Education'] = survey_answers_raw_df['Education']
    survey_answers_df['EmployementStatus'] = survey_answers_raw_df['EmployementStatus']
    survey_answers_df['Age'] = survey_answers_raw_df['Age']
    
    participant_df = pd.DataFrame({'FolderName': folder_name,
                                   'MeanDistDomHandLowCL': movement_timestamps_low_cl_df['Distance'].mean(),
                                   'MeanSpeedDomHandLowCL': movement_timestamps_low_cl_df['Speed'].mean(),
                                   'MeanGripPressureDomHandLowCL': movement_timestamps_low_cl_df['GripPressure'].mean(),
                                   'MeanDistDomHandHighCL': movement_timestamps_high_cl_df['Distance'].mean(),
                                   'MeanSpeedDomHandHighCL': movement_timestamps_high_cl_df['Speed'].mean(),
                                   'MeanGripPressureDomHandHighCL': movement_timestamps_high_cl_df['GripPressure'].mean(),
                                  }, index=[0])

    low_cl_mistakes = get_mistakes_for_rounds(round_data_df, 'LowCognitiveLoad')
    high_cl_mistakes = get_mistakes_for_rounds(round_data_df, 'HighCognitiveLoad')
    
    # Add the mistakes columns dynamically
    for i in range(len(low_cl_mistakes)):
        participant_df[f'MistakesLowCL{i+1}'] = low_cl_mistakes[i]
    for i in range(len(high_cl_mistakes)):
        participant_df[f'MistakesHighCL{i+1}'] = high_cl_mistakes[i]

    participant_df['MeanMistakesLowCL'] = np.mean(low_cl_mistakes)
    participant_df['MeanMistakesHighCL'] = np.mean(high_cl_mistakes)
    
    participant_df = pd.concat([participant_df.reset_index(drop=True), survey_answers_df.reset_index(drop=True)], axis=1)
        
    participants_df = pd.concat([participants_df, participant_df], ignore_index = True)
    
#print(participants_df.head())
participants_df.to_csv('Pretest.csv', sep = ';', decimal = ',', index=False)

folder name: 3
    GrabTimestamp  ReleaseTimestamp  Distance     Speed  GripPressure  \
0        42.02405          45.72687  2.879395  0.777622      0.871680   
1        81.00586          85.28149  2.709428  0.633691      0.888474   
2       128.07750         131.40490  2.870104  0.862567      0.861574   
3       134.21800         136.97530  2.098817  0.761186      0.838541   
4       139.56420         142.54320  2.178718  0.731359      0.830077   
5       145.84250         149.04450  2.482629  0.775337      0.854608   
6       152.10860         154.62790  2.366741  0.939444      0.822463   
7       156.38230         158.55390  1.668464  0.768311      0.794344   
8       206.43300         209.46690  2.596169  0.855720      0.850994   
9       249.50840         252.40500  2.405031  0.830294      0.846195   
10      254.71530         257.80620  2.173843  0.703304      0.855473   
11      260.39590         262.69320  1.891099  0.823184      0.809965   
12      264.72570         266.42330 