# Preperation of the dataset exported from limesurvey

## Helper functions and imports

In [17]:
# Import necessary libraries
import pandas as pd

# Function to convert A1L-A7H to numbers
# A1L to 1, A1M to 2, A1H to 3, etc.
# The function takes a string as input and returns a number as output


def convert_tlx_likert(x):
    if pd.isnull(x):
        return x
    else:
        if x[2] == 'L':
            return int(x[1])+0
        elif x[2] == 'M':
            return int(x[1])+1
        elif x[2] == 'H':
            return int(x[1])+2
        else:
            raise ValueError(
                "Invalid input format. The input string should end with 'L', 'M', or 'H'.")

# Function to convert A1-A7 to numbers
# A1 to 1, A2 to 2, A3 to 3, etc.
# The function takes a string as input and returns a number as output


def convert_ueq_likert(x):
    if pd.isnull(x):
        return x
    else:
        return int(x[1])

# Function to convert A1 to yes and A2 to no
# The function takes a string as input and returns a boolean as output


def convert_yes_no(x):
    if pd.isnull(x):
        return x
    else:
        if x[1] == '1':
            return True
        elif x[1] == '2':
            return False
        else:
            raise ValueError(
                "Invalid input format. The input string should end with '1' or '2'.")

# Function to convert A1 to male, A2 to female and A3 to diverse


def convert_sex(x):
    if pd.isnull(x):
        return x
    else:
        if x[1] == '1':
            return 'male'
        elif x[1] == '2':
            return 'female'
        elif x[1] == '3':
            return 'diverse'
        else:
            raise ValueError("Invalid input format.")

# Function to convert A1 to male, A2 to female and A3 to diverse


def convert_dominant_hand(x):
    if pd.isnull(x):
        return x
    else:
        if x[1] == '1':
            return 'right'
        elif x[1] == '2':
            return 'left'
        elif x[1] == '3':
            return 'both'
        elif x[1] == '4':
            return 'neither'
        else:
            raise ValueError("Invalid input format.")

In [21]:

# Read data
test_results_df = pd.read_csv('./raw_data/test_results.csv')
survey_results_df = pd.read_csv('./raw_data/survey_results.csv')
calibration_df = pd.read_csv('./raw_data/calibration_results.csv')

# Fix column typo from lime survey
# iterate through the pre_results and change TXL to TLX
for col in survey_results_df.columns:
    if col.startswith('TXL'):
        new_col = col.replace('TXL', 'TLX')
        survey_results_df.rename(columns={col: new_col}, inplace=True)

# Initialize the final data list
final_data = []

# Iterate through the mapping
for _, calibration_row in calibration_df.iterrows():
    # Get data for corresponding SubjectID and Condition
    subject_id = calibration_row['Subject ID']
    condition = calibration_row['Group']
    data_row = test_results_df[(test_results_df['SubjectID'] == subject_id)
                       & (test_results_df['Condition'] == condition)].iloc[0]

   # Get the UEQ and TLX mapping from the mapping row
    mapping_number = calibration_row['Mapping']

    # Build the column names
    ueq_cols = [f"UEQS{mapping_number}[SQ00{i}]" for i in range(1, 9)]
    tlx_cols = [f"TLX{i}{mapping_number}[SQ001]" for i in range(1, 7)]

    other_cols = ["DS01", "DS02", "DS03", "IDRND",
                  "D01", "D02", "D03", "D04", "D05", "EMSEmpfindung1", "EMSEmpfindung2", "EMSEmpfindung3", "EMSEmpfindung4"]
    calibration_cols = ["Calibration Arm", "Calibration Hand"]

    # Get the corresponding row from pre_results
    pre_results_row = survey_results_df[survey_results_df['IDRND']
                                     == subject_id].iloc[0]

    # Extract the UEQ and TLX values
    ueq_values = [pre_results_row[col] for col in ueq_cols]
    tlx_values = [pre_results_row[col] for col in tlx_cols]
    other_values = [pre_results_row[col]
                    for col in other_cols if pre_results_row[col] is not None]
    calibration_values = [calibration_row[col]
                    for col in calibration_cols if calibration_row[col] is not None]

    # Combine into a single row
    final_row = data_row.tolist() + ueq_values + tlx_values + other_values + calibration_values
    final_data.append(final_row)

# Create the final DataFrame
final_columns = list(test_results_df.columns) + [f'UEQ Item {i + 1}' for i in range(8)] + [
    f'TLX Item {i + 1}' for i in range(6)] + other_cols + calibration_cols
final_df = pd.DataFrame(final_data, columns=final_columns)

In [11]:
# Define column names
column_names = {
    "DS01": "DataProtectionAgreement",
    "DS02": "LiabilityWaiverSigned",
    "DS03": "HasPacemaker",
    "IDRND": "ID",
    "D01": "Age",
    "D02": "Gender",
    "D03": "ExperienceWithEMS",
    "D04": "ExperienceWithVR",
    "D05": "HandDominance",
    "UEQ Item 1": "UEQ_behindernd_unterstützend",
    "UEQ Item 2": "UEQ_kompliziert_einfach",
    "UEQ Item 3": "UEQ_ineffizient_effizient",
    "UEQ Item 4": "UEQ_verwirrend_übersichtlich",
    "UEQ Item 5": "UEQ_langweilig_spannend",
    "UEQ Item 6": "UEQ_uninteressant_interessant",
    "UEQ Item 7": "UEQ_konventionell_originell",
    "UEQ Item 8": "UEQ_herkömmlich_neuartig",
    # TLX
    "TLX Item 1": "TLX_MentalDemand",
    "TLX Item 2": "TLX_PhysicalDemand",
    "TLX Item 3": "TLX_TimeDemand",
    "TLX Item 4": "TLX_Performance",
    "TLX Item 5": "TLX_Effort",
    "TLX Item 6": "TLX_Frustration",

    "EMSEmpfindung1": "EMS_Empfindung1",
    "EMSEmpfindung2": "EMS_Empfindung2",
    "EMSEmpfindung3": "EMS_Empfindung3",
    "EMSEmpfindung4": "EMS_Empfindung4",

    "interviewtime": "InterviewTime"
}

# Apply the conversion functions to the relevant columns
for col in final_df.columns:
    if 'UEQ' in col:
        final_df[col] = final_df[col].apply(convert_ueq_likert)
    elif 'TLX' in col or 'TXL' in col:
        final_df[col] = final_df[col].apply(convert_tlx_likert)
    elif col in ['DS01', 'DS02', 'DS03', 'D03', 'D04']:
        final_df[col] = final_df[col].apply(convert_yes_no)
    elif col == 'D02':
        final_df[col] = final_df[col].apply(convert_sex)
    elif col == 'D05':
        final_df[col] = final_df[col].apply(convert_dominant_hand)


# Rename columns
final_df.rename(columns=column_names, inplace=True)

final_df.to_csv('final_data.csv', index=False)
