In [None]:
# SG12 - FBN
# Topic: Motion-Hand data preprocess & abstraction
# Done:  (1) Motion-Hand data preprocess & abstraction
#        (2) Fuse head & hand motion data
#        (3) Clip invalid rows (when no hand motion)

In [None]:
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import itertools 

In [None]:
# Path
source_file = os.walk("D:/vr-project/.../data/raw/SG12") #raw
destination_folder = "D:/vr-project/.../data/result" #result

path_clean = "D:/vr-project/.../data/clean/SG12" #clean
clean_file = os.walk(path_clean) 

In [None]:
#Preprocess Timestamp
def process_ts(value):
    if isinstance(value, str):  # Check if the value is a string
        return int(value.split(':')[-1])  # Split the string and convert the last part to an integer
    else:
        return value

In [None]:
# Check header & Errors
i = 0 
for path, dir_list, file_list in source_file:
  for file_name in file_list:
    
    print(file_name)
    filepath = os.path.join(path, file_name)
    raw = pd.read_csv(filepath)
    raw_data = raw.loc[:,"Timestamp":"AngularVelocity.z"] # 1 timestamp + 13 sensor columns
    raw_data["Timestamp"] = raw_data["Timestamp"].apply(process_ts)
    
    # clean data
    raw_data.drop_duplicates(subset="Timestamp", keep = 'last', inplace = True)
    raw_data.reset_index(drop = True, inplace = True)
    
    # check error  
    raw_data_copy = raw_data.copy()
    error_str = ["error", "Error", "alxr", "ALXR"]
    for index, row in raw_data.iterrows():
        for name in raw_data.columns:
            value = raw_data.at[index, name]
            for element in error_str:
                if element in str(value):
                    raw_data_copy.loc[index, name:] = 0 
    
    # save in new path
    save_clean_path = path_clean + '/' + file_name
    raw_data_copy.to_csv(save_clean_path, mode='w', index=False, header=True)
    
    # Counter
    i += 1
    print(i)

print("SUCCESS Import Files:" + str(i))

In [None]:
# Step 1: Build FBN Vector: FBN_list

#df_FBN: store the detailed block division info for each time-series
i = 0
df_FBN = pd.DataFrame(columns = range(7))  

for path, dir_list, file_list in clean_file:
  for file_name in file_list:
    
    print(file_name)
    filepath = path + '/' + file_name
    raw = pd.read_csv(filepath)

    # create info dataframe
    str2 = file_name.split(".")
    str3 = str2[0].split("_")
    id_list = [int(element) for element in str3]
    #
    LEN = raw.shape[0]
    TIME = (int(raw.at[LEN-1,"Timestamp"]) -int(raw.at[0,"Timestamp"]))/pow(10,9) # Unit: second
    FREQ = int(LEN/TIME) #floor
    id_list.append(LEN)
    id_list.append(TIME)
    id_list.append(FREQ)

    df_element = pd.DataFrame(id_list)
    df_element = df_element.T
    df_FBN = df_FBN.append(df_element)
    i = i + 1

In [None]:
# Count FBN_list based on info dataframe
df_FBN.columns = ["user_id", "game_id", "round_id", "device_id", "len", "time_slot", "frequency"]
FBN_list = []
FBN_list_max = []
FBN_list_min = []

for i in range(1,21,1): #step-size
    df_FBN_gi = df_FBN[df_FBN["game_id"] == i]
    time_gi = int(np.mean(df_FBN_gi["time_slot"]))
    time_gi_max = int(np.max(df_FBN_gi["time_slot"]))
    time_gi_min = int(np.min(df_FBN_gi["time_slot"]))
    FBN_list.append(time_gi)
    FBN_list_max.append(time_gi_max)
    FBN_list_min.append(time_gi_min)

In [None]:
# Create Blank csv
df1 = pd.DataFrame(columns = range(7))
df1.columns = ["user_id", "game_id", "round_id", "device_id", "time_slot", "sample_size", "block_number"]
save_info_path = destination_folder + '/SG12_info_FBN_1s.csv'
df1.to_csv(save_info_path, index=False, header=True) 

df2 = pd.DataFrame(columns = range(70)) # 5  + 5 * 13 = 70
df2_str_list = []
for j in ["max", "min", "mean", "std", "median"]:
    for k in ["Quatx", "Quaty", "Quatz", "Quatw", "Pos0",  "Pos1", "Pos2", "Lin0", "Lin1", "Lin2", "Ang0", "Ang1", "Ang2"]:
        df2_str_list.append(k + "_" + j)
df2_id_list = ["user_id", "game_id", "round_id", "device_id","block_id"]
df2_str_list = df2_id_list + df2_str_list 
df2.columns = df2_str_list
save_feat_path = destination_folder + '/SG12_ts_feature_FBN_1s.csv'  
df2.to_csv(save_feat_path, index=False, header=True)

In [None]:
# Step 2: Divide block and abstract the information

for path, dir_list, file_list in clean_file:
    for file_name in file_list:
        
        print(file_name)
        filepath = path + '/' + file_name
        raw = pd.read_csv(filepath)
        raw_data = raw.loc[:,"Orientation.x":"AngularVelocity.z"] # 13 features

        # Get ts info
        str2 = file_name.split(".")
        str3 = str2[0].split("_")
        id_list = [int(element) for element in str3]
        g_id = int(id_list[1])
        # FBN_g: get basic block amount for game 
        FBN_g = FBN_list[g_id-1]

        # Build ts blocks
        ratio = 1 #ratio: larger ratio (larger block amount), smaller unit 
        LEN = raw.shape[0]
        UNIT = int(raw.shape[0]/(FBN_g * ratio)) # Unit: How many records in a block
        ts_delta = (int(raw.at[LEN-1, "Timestamp"]) - int(raw.at[0, "Timestamp"])) / pow(10,9) # (unix-time)nano-sec 
        df_wr = pd.DataFrame() 

        # Traverse Blocks in a ts
        for i in range(int(FBN_g * ratio)):
            #set blocks
            raw_block = raw_data[UNIT * i : ( UNIT * (i + 1) - 1)]
      
            #compute statistics
            fea_block = [] # 5 statistics
            fea_block += list(raw_block.max())
            fea_block += list(raw_block.min())
            fea_block += list(raw_block.mean())
            fea_block += list(raw_block.std())
            fea_block += list(raw_block.median())
            df_block_fea = pd.DataFrame(fea_block)
            df_block_fea = df_block_fea.T
      
            #combine blocks feature
            str2 = file_name.split(".")
            str3 = str2[0].split("_")
            id_list = [int(element) for element in str3]
            id_list.append(i)
      
            #print(id_list)
            id_df = pd.DataFrame(id_list)
            id_df = id_df.T
            id_df.columns = ["user_id", "game_id", "round_id", "device_id", "block_id"]
            df_block_row = pd.concat([id_df, df_block_fea], axis = 1, join = 'outer')
            df_wr = df_wr.append(df_block_row)
        
        # Save One file info
        str2 = file_name.split(".")
        str3 = str2[0].split("_")
        id_list = [int(element) for element in str3]
 
        id_list.append(ts_delta) # sec info: how many seconds
        id_list.append(LEN) # how many samples
        id_list.append(i+1) # how many blocks
        id_df = pd.DataFrame(id_list)
        id_df = id_df.T
        id_df.columns = ["user_id", "game_id", "round_id", "device_id", "time_slot", "sample_size", "block_number"]
        id_df.to_csv(save_info_path, mode='a', index=False, header=False)
        #time_slot: how long for user playing the game: Unit-second
        #sample_size:how many ts data
        #block_number:timeseries is divided into how many blocks
    
        # Save file featues
        df_wr.to_csv(save_feat_path, mode='a', index=False, header=False)

In [None]:
# Check result file size
feature_path =  destination_folder + '/SG12_ts_feature_FBN_1s.csv' 
raw = pd.read_csv(feature_path)
print(raw.shape)
print("READ SUCCESS")

In [None]:
#(2) Fuse Motion Data

feature_path_head = destination_folder + "/SG11_ts_feature_FBN_1s.csv"
feature_path_hand = destination_folder + "/SG12_ts_feature_FBN_1s.csv"
raw_h1 = pd.read_csv(feature_path_head)
raw_h2 = pd.read_csv(feature_path_hand)
print(raw_h1.shape)
print(raw_h2.shape)
print("READ SUCCESS")

In [5]:
#Merge
df_0 = raw_h1[(raw_h1.device_id == 1)].drop(columns =["device_id"])
df_1 = raw_h2[(raw_h2.device_id == 2)].drop(columns =["device_id"])
df_2 = raw_h2[(raw_h2.device_id == 3)].drop(columns =["device_id"])

df_merge = pd.merge(df_0,df_1,how = "outer", on = ["user_id", "game_id", "round_id", "block_id"])
df_merge = pd.merge(df_merge,df_2,how = "outer", on = ["user_id", "game_id", "round_id", "block_id"])
df_merge = df_merge.fillna(0)
df_merge.isna().sum().sum() # Expectation:0
df_merge.columns = [
               "user_id", "game_id", "round_id", "block_id",
               "Quatx_max_1","Quaty_max_1", "Quatz_max_1", "Quatw_max_1", "Pos0_max_1", "Pos1_max_1", "Pos2_max_1",
               "Quatx_min_1","Quaty_min_1", "Quatz_min_1", "Quatw_min_1", "Pos0_min_1", "Pos1_min_1", "Pos2_min_1",
               "Quatx_mean_1","Quaty_mean_1", "Quatz_mean_1", "Quatw_mean_1", "Pos0_mean_1", "Pos1_mean_1", "Pos2_mean_1",
               "Quatx_std_1","Quaty_std_1", "Quatz_std_1", "Quatw_std_1", "Pos0_std_1", "Pos1_std_1", "Pos2_std_1",
               "Quatx_median_1","Quaty_median_1", "Quatz_median_1", "Quatw_median_1", "Pos0_median_1", "Pos1_median_1", "Pos2_median_1",
    
               "Quatx_max_2","Quaty_max_2", "Quatz_max_2", "Quatw_max_2", "Pos0_max_2", "Pos1_max_2", "Pos2_max_2", "Lin0_max_2", "Lin1_max_2", "Lin2_max_2","Ang0_max_2","Ang1_max_2","Ang2_max_2",
               "Quatx_min_2","Quaty_min_2", "Quatz_min_2", "Quatw_min_2", "Pos0_min_2", "Pos1_min_2", "Pos2_min_2", "Lin0_min_2", "Lin1_min_2", "Lin2_min_2","Ang0_min_2","Ang1_min_2","Ang2_min_2",
               "Quatx_mean_2","Quaty_mean_2", "Quatz_mean_2", "Quatw_mean_2", "Pos0_mean_2", "Pos1_mean_2", "Pos2_mean_2", "Lin0_mean_2", "Lin1_mean_2", "Lin2_mean_2","Ang0_mean_2","Ang1_mean_2","Ang2_mean_2",
               "Quatx_std_2","Quaty_std_2", "Quatz_std_2", "Quatw_std_2", "Pos0_std_2", "Pos1_std_2", "Pos2_std_2", "Lin0_std_2", "Lin1_std_2", "Lin2_std_2","Ang0_std_2","Ang1_std_2","Ang2_std_2",
               "Quatx_median_2","Quaty_median_2", "Quatz_median_2", "Quatw_median_2", "Pos0_median_2", "Pos1_median_2", "Pos2_median_2", "Lin0_median_2", "Lin1_median_2", "Lin2_median_2","Ang0_median_2","Ang1_median_2","Ang2_median_2",

               "Quatx_max_3","Quaty_max_3", "Quatz_max_3", "Quatw_max_3", "Pos0_max_3", "Pos1_max_3", "Pos2_max_3", "Lin0_max_3", "Lin1_max_3", "Lin2_max_3","Ang0_max_3","Ang1_max_3","Ang2_max_3",
               "Quatx_min_3","Quaty_min_3", "Quatz_min_3", "Quatw_min_3", "Pos0_min_3", "Pos1_min_3", "Pos2_min_3", "Lin0_min_3", "Lin1_min_3", "Lin2_min_3","Ang0_min_3","Ang1_min_3","Ang2_min_3",
               "Quatx_mean_3","Quaty_mean_3", "Quatz_mean_3", "Quatw_mean_3", "Pos0_mean_3", "Pos1_mean_3", "Pos2_mean_3", "Lin0_mean_3", "Lin1_mean_3", "Lin2_mean_3","Ang0_mean_3","Ang1_mean_3","Ang2_mean_3",
               "Quatx_std_3","Quaty_std_3", "Quatz_std_3", "Quatw_std_3", "Pos0_std_3", "Pos1_std_3", "Pos2_std_3", "Lin0_std_3", "Lin1_std_3", "Lin2_std_3","Ang0_std_3","Ang1_std_3","Ang2_std_3",
               "Quatx_median_3","Quaty_median_3", "Quatz_median_3", "Quatw_median_3", "Pos0_median_3", "Pos1_median_3", "Pos2_median_3", "Lin0_median_3", "Lin1_median_3", "Lin2_median_3","Ang0_median_3","Ang1_median_3","Ang2_median_3"
               ]
df_merge.shape
save_path = destination_folder + '/SG1_fuse_ts_feature_FBN_1s.csv'
df_merge.to_csv(save_path, index=False, header=True)
print("SUCCESS SAVED File!")

In [None]:
# Check result file size
feature_path =  destination_folder + '/SG1_fuse_ts_feature_FBN_1s.csv' 
raw = pd.read_csv(feature_path)
print(raw.shape)
print("READ SUCCESS")