In [1]:
import glob
import numpy as np
import pandas as pd
import os
import torch

In [None]:
!unzip Dataset/UserMovement-20231122T141046Z-001.zip

In [2]:
path = "/scratch/aa10350/FoV/User Movement"

In [3]:
test_files = glob.glob(path + '/ChenYongting*.txt')
val_files = glob.glob(path + '/fupingyu*.txt') + glob.glob(path + '/GuoYushan*.txt')
train_files = glob.glob(path + '/*.txt')
other_files = glob.glob(path + '/intersection*.txt')+ glob.glob(path + '/original*.txt')+glob.glob(path + '/output*.txt')
train_files = list(filter(lambda i: i not in test_files and (i not in val_files and i not in other_files), train_files))

In [4]:
def get_sine_cosine(roll, pitch, yaw):
    
    r_sine = np.sin(roll*np.pi/180)
    r_cosine = np.cos(roll*np.pi/180)
    p_sine = np.sin(pitch*np.pi/180)
    p_cosine = np.cos(pitch*np.pi/180)
    y_sine = np.sin(yaw*np.pi/180)
    y_cosine = np.cos(yaw*np.pi/180)


    return pd.Series({'r_sine': r_sine, 'r_cosine': r_cosine, 'p_sine': p_sine, 'p_cosine': p_cosine, 'y_sine': y_sine, 'y_cosine': y_cosine})

In [5]:
# Creates dataframe for the file and resample
def createDataframe(f):
    df = pd.read_csv(f,sep=' ',header=None)
    df.columns = map(lambda x: x.replace(',',''), df.iloc[0])
    df = df.iloc[1:].astype(float)
    df = df.iloc[:,0:8]
    sine_cosine_df = df.apply(lambda row: get_sine_cosine(row['HeadRX'], row['HeadRY'], row['HeadRZ']), axis=1)
    df = pd.concat([df.iloc[:,0:5], sine_cosine_df], axis=1)
    df.index = pd.to_timedelta(df.index,unit='s')
    df = df.resample('200ms').interpolate('akima') # upsample by 5 --> 5 * 144 Hz
    df = df.resample('2400ms').first() # downsample by 12 --> (5 * 144) / 12 = 60 Hz
    df = df.reset_index(drop = True) # drop the timestamp index added
    regex_pattern = '.*Timer|.*Frame'
    filtered_columns = df.filter(regex=regex_pattern, axis=1)
    df = df.drop(columns=filtered_columns.columns)
    return df

In [6]:
# Creates input and output numpy array for a given dataframe, history_size, target_size and
# step size (in sec)
def multivariate_data(df, history_size = 10, target_size = 10, step = 15, window_size=60):
    data = []
    labels = []
    start_index = history_size * window_size
    end_index = len(df) - target_size * window_size

    for i in range(start_index, end_index, step):
        indices = range(i-history_size * window_size, i)
        data.append(df.iloc[indices])
        labels.append(df.iloc[i:i+target_size * window_size])

    return np.array(data), np.array(labels)

In [7]:
def normalizeData(files, history_size = 10, target_size = 10, step = 15, window_size=60):
    concatenatedDf = pd.DataFrame()
    for f in files:
        print(f)
        df = createDataframe(f)
        if len(df) < (history_size + target_size) * (window_size):
            continue
        concatenatedDf = pd.concat([concatenatedDf, df], axis=0)
    HeadX_mean = concatenatedDf['HeadX'].mean()
    HeadY_mean = concatenatedDf['HeadY'].mean()
    HeadZ_mean = concatenatedDf['HeadZ'].mean()
    HeadX_std = concatenatedDf['HeadX'].std()
    HeadY_std = concatenatedDf['HeadY'].std()
    HeadZ_std = concatenatedDf['HeadZ'].std()
    return HeadX_mean, HeadY_mean, HeadZ_mean, HeadX_std, HeadY_std, HeadZ_std

In [8]:
HeadX_mean, HeadY_mean, HeadZ_mean, HeadX_std, HeadY_std, HeadZ_std = normalizeData(train_files, history_size = 10, target_size = 10, step = 15, window_size=60)

/scratch/aa10350/FoV/User Movement/sulehan_sweep.txt
/scratch/aa10350/FoV/User Movement/TuYuzhao_Pulling_trolley.txt
/scratch/aa10350/FoV/User Movement/Guozhaonian_Pulling_trolley.txt
/scratch/aa10350/FoV/User Movement/yuchen_chatting.txt
/scratch/aa10350/FoV/User Movement/RenHongyu_cleaning_whiteboard.txt
/scratch/aa10350/FoV/User Movement/LHJ_sweep.txt
/scratch/aa10350/FoV/User Movement/HKY_news_interviewing.txt
/scratch/aa10350/FoV/User Movement/FengXuanqi_Pulling_trolley.txt
/scratch/aa10350/FoV/User Movement/Guozhaonian_News_interviewing.txt
/scratch/aa10350/FoV/User Movement/liuxuya_cleaning_whiteboard.txt
/scratch/aa10350/FoV/User Movement/liuxuya_News_interviewing.txt
/scratch/aa10350/FoV/User Movement/huangrenyi_chatting.txt
/scratch/aa10350/FoV/User Movement/WangYan_Pulling_trolley.txt
/scratch/aa10350/FoV/User Movement/TuYuzhao_sweep.txt
/scratch/aa10350/FoV/User Movement/liuxuya_chatting.txt
/scratch/aa10350/FoV/User Movement/Sunqiran_Pulling_trolley.txt
/scratch/aa10350/Fo

In [9]:
# Creates sequences for all the files
def createSequence(files, history_size = 10, target_size = 10, step = 15, window_size=60):
    x_list = []
    y_list = []

    for f in files:
        print(f)
        df = createDataframe(f)
        len(df)
        if len(df) < (history_size + target_size) * (window_size):
            continue
        df['HeadX'] = (df['HeadX'] - HeadX_mean) / HeadX_std
        df['HeadY'] = (df['HeadY'] - HeadY_mean) / HeadY_std
        df['HeadZ'] = (df['HeadZ'] - HeadZ_mean) / HeadZ_std

        x_data, y_data = multivariate_data(df)
        x_list.append(x_data)
        y_list.append(y_data)
        
    x = np.concatenate(x_list, axis=0)
    y = np.concatenate(y_list, axis=0)
    return x, y

In [10]:
x_train,y_train = createSequence(train_files)

/scratch/aa10350/FoV/User Movement/sulehan_sweep.txt
/scratch/aa10350/FoV/User Movement/TuYuzhao_Pulling_trolley.txt
/scratch/aa10350/FoV/User Movement/Guozhaonian_Pulling_trolley.txt
/scratch/aa10350/FoV/User Movement/yuchen_chatting.txt
/scratch/aa10350/FoV/User Movement/RenHongyu_cleaning_whiteboard.txt
/scratch/aa10350/FoV/User Movement/LHJ_sweep.txt
/scratch/aa10350/FoV/User Movement/HKY_news_interviewing.txt
/scratch/aa10350/FoV/User Movement/FengXuanqi_Pulling_trolley.txt
/scratch/aa10350/FoV/User Movement/Guozhaonian_News_interviewing.txt
/scratch/aa10350/FoV/User Movement/liuxuya_cleaning_whiteboard.txt
/scratch/aa10350/FoV/User Movement/liuxuya_News_interviewing.txt
/scratch/aa10350/FoV/User Movement/huangrenyi_chatting.txt
/scratch/aa10350/FoV/User Movement/WangYan_Pulling_trolley.txt
/scratch/aa10350/FoV/User Movement/TuYuzhao_sweep.txt
/scratch/aa10350/FoV/User Movement/liuxuya_chatting.txt
/scratch/aa10350/FoV/User Movement/Sunqiran_Pulling_trolley.txt
/scratch/aa10350/Fo

In [11]:
x_val,y_val = createSequence(val_files)

/scratch/aa10350/FoV/User Movement/fupingyu_Pulling_trolley.txt
/scratch/aa10350/FoV/User Movement/fupingyu_presenting.txt
/scratch/aa10350/FoV/User Movement/fupingyu_News_interviewing.txt
/scratch/aa10350/FoV/User Movement/fupingyu_cleaning_whiteboard.txt
/scratch/aa10350/FoV/User Movement/fupingyu_chatting.txt
/scratch/aa10350/FoV/User Movement/fupingyu_sweep.txt
/scratch/aa10350/FoV/User Movement/GuoYushan_Pulling_trolley.txt
/scratch/aa10350/FoV/User Movement/GuoYushan_cleaning_whiteboard.txt
/scratch/aa10350/FoV/User Movement/GuoYushan_chatting.txt
/scratch/aa10350/FoV/User Movement/GuoYushan_presenting.txt
/scratch/aa10350/FoV/User Movement/GuoYushan_News_interviewing.txt
/scratch/aa10350/FoV/User Movement/GuoYushan_sweep.txt


In [12]:
x_test,y_test = createSequence(test_files)

/scratch/aa10350/FoV/User Movement/ChenYongting_chatting.txt
/scratch/aa10350/FoV/User Movement/ChenYongting_Pulling_trolley.txt
/scratch/aa10350/FoV/User Movement/ChenYongting_sweep.txt
/scratch/aa10350/FoV/User Movement/ChenYongting_presenting.txt
/scratch/aa10350/FoV/User Movement/ChenYongting_News_interviewing.txt
/scratch/aa10350/FoV/User Movement/ChenYongting_cleaning_whiteboard.txt


In [13]:
np.save('x_train.npy', x_train)
np.save('y_train.npy', y_train)
np.save('x_val.npy', x_val)
np.save('y_val.npy', y_val)
np.save('x_test.npy', x_test)
np.save('y_test.npy', y_test)