In [None]:
# SG2 - FBN
# Topic: Eye-tracking data preprocess & abstraction
# Done:  (1) Eye-tracking data preprocess & abstraction
#        (2) Fuse left & right eye

In [None]:
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import itertools 

In [None]:
# Path
source_file = os.walk("D:/vr-project/.../data/raw/SG2") #raw
destination_folder = "D:/vr-project/.../data/result" #result

path_clean = "D:/vr-project/.../data/clean/SG2" #clean
clean_file = os.walk(path_clean) 

In [None]:
#Preprocess Timestamp
def process_ts(value):
    if isinstance(value, str):  # Check if the value is a string
        return int(value.split(':')[-1])  # Split the string and convert the last part to an integer
    else:
        return value

In [None]:
# Check Header & Error
i = 0 
for path, dir_list, file_list in source_file:
  for file_name in file_list:
    
    print(file_name)
    filepath = os.path.join(path, file_name)
    raw = pd.read_csv(filepath)
    raw_data = raw.loc[:,["Timestamp",
                          "Orientation.x", "Orientation.y", "Orientation.z", "Orientation.w",
                          "Position.x", "Position.y", "Position.z"]] # 1 Timestamp + 64 sensor columns
    #Timestamp format change
    raw_data["Timestamp"] = raw_data["Timestamp"].apply(process_ts)
    
    # check error  
    raw_data_copy = raw_data.copy() 
    error_str = ["error", "Error", "alxr", "ALXR"]
    for index, row in raw_data.iterrows():
        for name in raw_data.columns:
            value = raw_data.at[index, name]
            for element in error_str:
                if element in str(value):
                    raw_data_copy.loc[index, name:] = raw_data.loc[index-1, name:].apply(lambda x: float(x)) #float
    # clean data 
    raw_data_copy.drop_duplicates(subset="Timestamp", keep = 'first', inplace = True) #inplace change raw data copy
    raw_data_copy.reset_index(drop = True, inplace = True) #use new index 
    #drop = True, drop old index
    #inplace = True, change raw data copy itself
    
    # save in new path
    save_clean_path = path_clean + '/' + file_name
    raw_data_copy.to_csv(save_clean_path, mode='w', index=False, header=True)
    
    #Counter
    i += 1
    print(i)
    
print("SUCCESS Import Files:" + str(i))

In [None]:
# Step 1: Build FBN Vector: FBN_list

df_FBN = pd.DataFrame(columns = range(7))  

for path, dir_list, file_list in clean_file:
  for file_name in file_list:
    
    print(file_name)
    filepath = os.path.join(path, file_name)
    raw = pd.read_csv(filepath)
    #
    str2 = file_name.split(".")
    str3 = str2[0].split("_")
    id_list = [int(element) for element in str3]
    #
    LEN = raw.shape[0]
    TIME = (int(raw.at[LEN-1,"Timestamp"]) - int(raw.at[0,"Timestamp"]))/pow(10,9) 
    FREQ = int(LEN/TIME) #floor
    id_list.append(LEN)
    id_list.append(TIME)
    id_list.append(FREQ)

    df_element = pd.DataFrame(id_list)
    df_element = df_element.T
    df_FBN = df_FBN.append(df_element)

In [None]:
# Count FBN_list based on info dataframe
df_FBN.columns = ["user_id", "game_id", "round_id", "device_id", "len", "time_slot", "frequency"]
FBN_list = []
FBN_list_max = []
FBN_list_min = []

for i in range(1,21,1): #Game_id
    df_FBN_gi = df_FBN[df_FBN["game_id"] == i]
    time_gi = int(np.mean(df_FBN_gi["time_slot"]))
    time_gi_max = int(np.max(df_FBN_gi["time_slot"]))
    time_gi_min = int(np.min(df_FBN_gi["time_slot"]))
    FBN_list.append(time_gi)
    FBN_list_max.append(time_gi_max)
    FBN_list_min.append(time_gi_min)

In [None]:
# Create blank csv
df1 = pd.DataFrame(columns = range(7))
df1.columns = ["user_id", "game_id", "round_id", "device_id", "time_slot", "sample_size", "block_number"]
save_info_path = destination_folder + '/SG2_info_FBN_1s.csv'
df1.to_csv(save_info_path, index=False, header=True) #default: header=False

df2 = pd.DataFrame(columns = range(21)) # 5  + 5 * 3 + 1 + = 21
df2.columns = [
               "user_id", "game_id", "round_id", "device_id","block_id",
               "Quatx_max","Quaty_max", "Quatw_max",
               "Quatx_min","Quaty_min", "Quatw_min",
               "Quatx_mean","Quaty_mean", "Quatw_mean",
               "Quatx_std","Quaty_std", "Quatw_std", 
               "Quatx_median","Quaty_median", "Quatw_median",
               "IPD"
               ]
save_feat_path = destination_folder + '/SG2_ts_feature_FBN_1s.csv'   
df2.to_csv(save_feat_path, index=False, header=True)

In [None]:
# Step 2: Divide block and abstract the information
for path, dir_list, file_list in clean_file:
  for file_name in file_list:

    # print(os.path.join(path, file_name))
    filepath = os.path.join(path, file_name)
    raw = pd.read_csv(filepath)
    raw_data = raw.loc[:,["Orientation.x", "Orientation.y", "Orientation.w"]] # 3 features
    IPD = float(abs(raw.iloc[0]["Position.x"] * 2)) #narray -> float

    # get FBN_g 
    # FBN_g : how many blocks this ts need to be divided into
    str2 = file_name.split(".")
    str3 = str2[0].split("_")
    id_list = [int(element) for element in str3]
    g_id = int(id_list[1])
    FBN_g = FBN_list[g_id-1]

    # Build ts blocks
    ratio = 1 #ratio: larger ratio (larger block amount), smaller UNIT; 
    LEN = raw.shape[0]
    UNIT = int(raw.shape[0]/(FBN_g * ratio)) #unit: How many records in a block
    ts_delta = (int(raw.at[LEN-1, "Timestamp"]) - int(raw.at[0, "Timestamp"])) / pow(10,9)  
    df_wr = pd.DataFrame() #store all the block info

    # Traverse Blocks in a ts
    for i in range(int(FBN_g * ratio)):
      
      #set block
      raw_block = raw_data[UNIT * i : ( UNIT * (i + 1) - 1)]
      
      #compute statistics
      fea_block = [] # 5 statistics
      fea_block += list(raw_block.max())
      fea_block += list(raw_block.min())
      fea_block += list(raw_block.mean())
      fea_block += list(raw_block.std())
      fea_block += list(raw_block.median())
      fea_block += [IPD] #no append
      df_block_fea = pd.DataFrame(fea_block)
      df_block_fea = df_block_fea.T
      
      #combine blocks feature
      str2 = file_name.split(".")
      str3 = str2[0].split("_")
      id_list = [int(element) for element in str3]
      id_list.append(i)
      
      id_df = pd.DataFrame(id_list)
      id_df = id_df.T
      id_df.columns = ["user_id", "game_id", "round_id", "device_id", "block_id"]
      df_block_row = pd.concat([id_df, df_block_fea], axis = 1, join = 'outer')
      df_wr = df_wr.append(df_block_row)
      

    # Save One file info
    str2 = file_name.split(".")
    str3 = str2[0].split("_")
    id_list = [int(element) for element in str3]

    id_list.append(ts_delta) # sec info: how many seconds
    id_list.append(LEN) # how many samples
    id_list.append(i+1) # how many blocks
    id_df = pd.DataFrame(id_list)
    id_df = id_df.T
    id_df.columns = ["user_id", "game_id", "round_id", "device_id", "time_slot", "sample_size", "block_number"]
    id_df.to_csv(save_info_path, mode='a', index=False, header=False)
    #time_slot: how long for user playing the game: Unit-second
    #sample_size:how many ts data
    #block_number:timeseries is divided into how many blocks
    
    # Save file featues
    df_wr.to_csv(save_feat_path, mode='a', index=False, header=False)
    

In [None]:
# Step 3: Fuse to CSV
feature_path =  destination_folder + '/SG2_ts_feature_FBN_1s.csv' 
raw = pd.read_csv(feature_path)
df_4 = raw[(raw.device_id == 4)].drop(columns =["device_id"]) #left eye
df_6 = raw[(raw.device_id == 6)].drop(columns =["device_id"]) #right-eye
df_merge = pd.merge(df_4,df_6,how = "outer", on = ["user_id", "game_id", "round_id", "block_id","IPD"]) 
pd.set_option('display.max_columns', None)

In [None]:
save_path = destination_folder + '/SG2_fuse_ts_feature_FBN_1s.csv' 
df_merge.to_csv(save_path, index=False, header=True)
print("SUCCESS SAVED File!")

In [None]:
# Check result file size
feature_path =  destination_folder + '/SG2_fuse_ts_feature_FBN_1s.csv' 
raw = pd.read_csv(feature_path)
print(raw.shape)
print("READ SUCCESS")