In [1]:
"""
Data Format:
Time, Type=E, Gesture, Foot, Direction, Checkpoint
<gesuture>: one of the gesture names in our gesture list.
<foot>: bothfoot, leftfoot, rightfoot
<direction>: RecordingForth,RecordingBack
<checkpoint>: StartPoint, EndPoint, MidPoint1, MidPoint2, or None
Example data:
1675281183795, E, NormalWalking, bothfoot, RecordingForth
1675810328139, E, NormalWalking, leftfoot, RecordingForth, StartPoint


Time, Type=I, TrackerName, pos_x, pos_y, pos_z, ang_x, ang_y, ang_z
<pos_x>,<pos_y>,<pos_z> are tracking positions in Unity global coordinate frame in meter. 
<ang_x>,<ang_y>,<ang_z> are tracking angular rotations in Unity local transform.EulerAngles in degree.

TrackerName:
TrackerH: the raw data of tracker on the helmet
TrackerL: the raw data of tracker on the left foot
TrackerR: the raw data of tracker on the right foot
Head: the adjusted data of tracker on the helmet, regardless of how participants’ wear the helmet 
FootL: the adjusted data of participants’ actual left foot, regardless of participants’ foot size or foot height.
FootR: the adjusted data of participants’ actual right foot, regardless of participants’ foot size or foot height.

Example data:
1675281183796, I, TrackerL, -1.7858, 2.57837, 1.48311, 38.26742, 81.05367, 354.774
1675281183797, I, TrackerR, -0.81606, 1.90364, -5.77158, 277.1994, 292.46671, 245.3033
1675281183797, I, TrackerH, -0.81606, 1.90364, -5.77158, 277.1994, 292.46671, 245.3033
11675281183797, I, FootL, -1.7858, 2.57837, 1.48311, 38.26742, 81.05367, 354.774
1675281183797, I, FootR, -0.81606, 1.90364, -5.77158, 277.1994, 292.46671, 245.3033
1675281183797, I, Head, -0.81606, 1.90364, -5.77158, 277.1994, 292.46671, 245.30331

Data in txt file is separated by comma.
"""

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# import seaborn as sns
import math
from os import path

def parse_data(user_id):
    raw_data_dir_path = os.path.join('../StudyRawData/', user_id)
    parsed_data_dir_path = os.path.join('../ParsedLabelledData/', user_id)

    data_file_delimiter = ','

    # open folder and read all files end with .txt
    all_files = os.listdir(raw_data_dir_path)
    all_files = [file for file in all_files if file.endswith('.txt')]
    all_files.sort()
    print('All files: ', all_files)
    all_files_data = []

    for file in all_files:
        data_path = path.join(raw_data_dir_path, file)
        current_gesture, current_foot, current_direction, current_checkpoint = '', '', '', ''

        with open(data_path, 'r') as temp_file:
            lines = temp_file.readlines()
            lines = lines[1:]

            for line in lines:
                # line to list, delete the '\n' at the end of the line
                list_line = line[:-1].split(data_file_delimiter)
                list_line = [item.strip() for item in list_line]

                # make all the list items with thses attributes ['Time', 'Type', 'Gesture', 'Foot', 'Direction', 'Checkpoint', 'TrackerName', 'pos_x', 'pos_y', 'pos_z', 'ang_x', 'ang_y', 'ang_z']
                if list_line[1] == 'E':
                    # if it is a gesture event, then has ['Time', 'Type', 'Gesture', 'Foot', 'Direction', ('Checkpoint')], add 'None' to the rest of the list
                    list_line = list_line[:len(list_line)] + [''] * (
                        13 - len(list_line))
                    current_gesture, current_foot, current_direction, current_checkpoint = list_line[
                        2], list_line[3], list_line[4], list_line[5]
                elif list_line[1] == 'I':
                    # if it is a tracker data, then has ['Time', 'Type', 'TrackerName', 'pos_x', 'pos_y', 'pos_z', 'ang_x', 'ang_y', 'ang_z']
                    # 'pos_x', 'pos_y', 'pos_z', 'ang_x', 'ang_y', 'ang_z' are float
                    list_line[3:] = [float(item) for item in list_line[3:]]
                    list_line = list_line[:2] + [
                        current_gesture, current_foot, current_direction,
                        current_checkpoint
                    ] + list_line[2:]
                else:
                    print('Error: Type is not E or I')

                all_files_data.append(list_line)

    # sort all_files_data by time
    all_files_data.sort(key=lambda x: int(x[0]))
    print('Total number of lines in all files: ', len(all_files_data))

    last_dir = "RecordingBack"
    for i in range(len(all_files_data)):
        # "Type" column to G
        if all_files_data[i][1] == "E" and all_files_data[i][5] == "":
            if all_files_data[i][4] == last_dir:
                all_files_data[i][1] = "G"
        last_dir = all_files_data[i][4]

    # create a dataframe with all columns and rows with maximum length of all_files_data
    all_columns = [
        'Time', 'Type', 'Gesture', 'Foot', 'Direction', 'Checkpoint',
        'TrackerName', 'pos_x', 'pos_y', 'pos_z', 'ang_x', 'ang_y', 'ang_z'
    ]
    all_data = pd.DataFrame(all_files_data,
                            columns=all_columns,
                            index=range(len(all_files_data)))


    if path.exists(parsed_data_dir_path) == False:
        os.mkdir(parsed_data_dir_path)
    all_data.to_csv(path.join(parsed_data_dir_path, 'all_data.csv'), index=False)
    

for i in range(11, 27):
    user_id = 'User' + str(i)
    parse_data(user_id)

FileNotFoundError: [Errno 2] No such file or directory: './StudyRawData/User11'

In [2]:
all_data = pd.DataFrame()
for i in range(1, 27):
    user_id = 'User' + str(i) if i > 9 else 'User0' + str(i)
    parsed_data_dir_path = os.path.join('../ParsedLabelledData/', user_id)
    data = pd.read_csv(path.join(parsed_data_dir_path, 'all_data.csv'))
    data['UserID'] = user_id
    all_data = pd.concat([all_data, data], ignore_index=True)
    

all_data.to_csv('../ParsedLabelledData/all_data.csv', index=False)

  all_data = all_data.append(data, ignore_index=True)
  all_data = all_data.append(data, ignore_index=True)
  all_data = all_data.append(data, ignore_index=True)
  all_data = all_data.append(data, ignore_index=True)
  all_data = all_data.append(data, ignore_index=True)
  all_data = all_data.append(data, ignore_index=True)
  all_data = all_data.append(data, ignore_index=True)
  all_data = all_data.append(data, ignore_index=True)
  all_data = all_data.append(data, ignore_index=True)
  all_data = all_data.append(data, ignore_index=True)
  all_data = all_data.append(data, ignore_index=True)
  all_data = all_data.append(data, ignore_index=True)
  data = pd.read_csv(path.join(parsed_data_dir_path, 'all_data.csv'))
  all_data = all_data.append(data, ignore_index=True)
  all_data = all_data.append(data, ignore_index=True)
  all_data = all_data.append(data, ignore_index=True)
  all_data = all_data.append(data, ignore_index=True)
  all_data = all_data.append(data, ignore_index=True)
  all_data =

In [4]:
all_data = pd.read_csv('../ParsedLabelledData/all_data.csv')
all_data.sample(10)

  all_data = pd.read_csv('../ParsedLabelledData/all_data.csv')


Unnamed: 0,Time,Type,Gesture,Foot,Direction,Checkpoint,TrackerName,pos_x,pos_y,pos_z,ang_x,ang_y,ang_z,UserID
13790855,1677518660935,I,MidairRotationOutwards,leftfoot,RecordingBack,MiddlePoint2,Head,-0.03517,0.04215,0.42197,2.9715,178.74316,359.81567,User24
13521296,1677445130071,I,BigStep,rightfoot,RecordingBack,,Head,0.13012,0.04424,-1.0537,9.54329,192.30225,1.23194,User23
2652814,1676584111662,I,BigStep,leftfoot,RecordingBack,StartPoint,TrackerL,0.04292,0.14558,-4.571,311.95224,154.19771,10.48526,User05
15053736,1675969568578,I,Delay,rightfoot,RecordingBack,MiddlePoint1,TrackerL,0.14448,0.19213,-1.82279,311.34601,160.76807,7.738703,User26
1959150,1676565118327,I,ToeTapInFront,rightfoot,RecordingBack,StartPoint,TrackerH,0.08627,1.85114,-4.39816,276.00269,1.52232,3.33812,User04
10866202,1677085537043,I,ToeTapInFront,rightfoot,RecordingBack,EndPoint,FootL,0.00428,0.04576,1.60279,359.37943,174.94762,355.7678,User19
12911782,1677257976449,I,ToeTapInFront,rightfoot,RecordingBack,StartPoint,Head,-0.03116,-0.12419,-4.1316,358.17191,177.30261,359.36517,User22
2634483,1676583936766,I,KickForward,leftfoot,RecordingBack,StartPoint,TrackerH,-0.10804,1.97751,-4.44201,287.28885,273.1134,86.86815,User05
3964101,1676661268646,I,SmallStep,rightfoot,RecordingBack,,TrackerH,0.05452,1.84519,-0.79335,285.62796,326.6922,25.83615,User07
5664385,1676676468396,I,TapWithHeel,leftfoot,RecordingForth,,FootR,0.09239,0.21354,3.28332,42.7158,343.18219,346.9937,User10


In [5]:
all_data['Gesture'].unique()

array(['NormalWalking', 'SmallStep', 'TapWithFootRotatedInwards',
       'KickForward', 'TapInFrontOfTheOtherFoot', 'Delay', 'TapInward',
       'TapWithFootRotatedOutwards', 'ToeTapInFront', 'DraggingInFront',
       'BendingBehind', 'TapOuward', 'MidairRotationOutwards',
       'KickOutward', 'KickInward', 'Rush', 'TapWithHeel', 'LiftInFront',
       'ToeTapBehind', 'MidairRotationInwards', 'DraggingBehind', 'Click',
       'BigStep'], dtype=object)

In [7]:
all_data.describe()

Unnamed: 0,Time,pos_x,pos_y,pos_z,ang_x,ang_y,ang_z
count,15140170.0,15123080.0,15123080.0,15123080.0,15123080.0,15123080.0,15123080.0
mean,1676829000000.0,-0.0170044,0.4281035,-1.014777,219.3233,179.8014,169.2569
std,357709200.0,0.2131746,0.6703163,3.114807,140.7954,115.1867,158.9374
min,1675967000000.0,-5.59274,-6.78683,-8.76376,-0.00573,-0.00573,-0.00573
25%,1676591000000.0,-0.10481,0.04276,-3.71962,27.6037,75.49255,11.31545
50%,1676763000000.0,-0.0107,0.16739,-0.9693,294.7254,180.9082,100.9855
75%,1677086000000.0,0.07995,0.25745,1.72985,319.8613,285.3441,350.8404
max,1677612000000.0,4.2289,6.12788,8.72782,359.9943,359.9943,359.9943


In [9]:
all_data.shape

(15140167, 14)