# Prepare Data From CSV Recording

## Load File

Data is loaded from a CSV recording file, accepted through an input prompt. This includes all positional data related to the 6 trackers (HMD, Left Controller, Right Controller, Waist, Left Foot, Right Foot).

'Data is loaded into a Pandas dataframe. The primary tracking data is then extracted, leaving extraneous data such as booleans for button presses.

The extracted columns are then concatenated into a new dataframe, and the columns are renamed for ease of reading.

The columns are reorded in the order of head/r_controller/l_controller/waist/r_foot/l_foot.

The new trimmed file is written to a directory (/test_data or /train_data), for further manipulation and loading into the model.


In [None]:
#import all modules needed in the notebook
from datetime import datetime
import pandas as pd
import os
import numpy as np
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
#import seaborn as sb
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from keras.callbacks import ModelCheckpoint
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, BatchNormalization
import tensorflow as tf
from keras.layers import LSTM, GRU
from tensorflow.keras.optimizers import SGD
from keras.layers.core import Dense, Activation, Dropout
from keras.losses import SparseCategoricalCrossentropy

tf.keras.backend.set_floatx('float64')


In [None]:

#Read in CSV
def GetRecording(path):
    recording_path = "../recordings/"
    file_name = input("Input Recording File Name")
    try:
        dataframe = pd.read_csv(recording_path + file_name + ".csv")
        return dataframe, file_name
    except: 
        print("Error Reading File: Check Spelling and Try Again")
        return 0
    
    
#Seperate each tracker to seperate dataframe
def GetColByName(dataframe):
    HMD = dataframe.loc[:, ["HMD0_tx", "HMD0_ty", "HMD0_tz"]]
    
    controller_1 = dataframe.loc[:, ['controller3_tx', 'controller3_ty', 'controller3_tz']]

    controller_2 = dataframe.loc[:, ['controller6_tx', 'controller6_ty', 'controller6_tz']]

    tracker_1 = dataframe.loc[:, ['generic7_tx', 'generic7_ty', 'generic7_tz']]

    tracker_2 = dataframe.loc[:, ['generic8_tx', 'generic8_ty', 'generic8_tz']]

    tracker_3 = dataframe.loc[:, ['generic9_tx', 'generic9_ty', 'generic9_tz']]

    joined = pd.concat([HMD,controller_1, controller_2, tracker_1 ,tracker_2 ,tracker_3], axis=1)
    return joined

#rename the now isolated trackers to a conventional naming scheme
def AssignTracker(dataframe):
    display(dataframe.iloc[0:1,:])
    trackerNum = int(input('Input Tracker Number'))
    for x in range(3):
        trackerStr = str(trackerNum)
        tracker = input('assign generic' + trackerStr)
        dataframe.rename(columns={'generic' + trackerStr + '_tx': tracker + '_x', 'generic' + trackerStr + '_ty': tracker + "_y", 'generic' + trackerStr + '_tz': tracker + '_z'}, inplace=True)
        trackerNum+=1
        
    controllerNum = 3
    for x in range(2):
        controllerStr = str(controllerNum)
        controller = input('assign controller' + controllerStr)
        dataframe.rename(columns={'controller' + controllerStr + '_tx': controller + '_x', 'controller' + controllerStr + '_ty': controller + "_y", 'controller' + controllerStr + '_tz': controller + '_z'}, inplace=True)
        controllerNum += 3
    dataframe.rename(columns={'HMD0_tx': 'head_x', 'HMD0_ty': 'head_y', 'HMD0_tz': 'head_z'}, inplace=True)
    return dataframe

#assign newly formatted data to test or train directory
def GetDirectory():
    choice = input("train or test data:")
    if choice == "test":
        output_path = "../test_data/"
    else:
        output_path = "../train_data/"
    return output_path

#re-order features to head / r_controller / l_controller / waist / r_foot / l_foot
def OrderFeatures(dataframe):
    head = dataframe.loc[:, ['head_x', 'head_y', 'head_z']]
    l_controller = dataframe.loc[:, ['l_controller_x', 'l_controller_y', 'l_controller_z']]
    r_controller = dataframe.loc[:, ['r_controller_x', 'r_controller_y', 'r_controller_z']]
    waist = dataframe.loc[:, ['waist_x', 'waist_y', 'waist_z']]
    r_foot = dataframe.loc[:, ['r_foot_x', 'r_foot_y', 'r_foot_z']]
    l_foot = dataframe.loc[:, ['l_foot_x', 'l_foot_y', 'l_foot_z']]
    reordered = pd.concat([head , r_controller, l_controller, waist, r_foot, l_foot], axis=1)
    return reordered


# write dataframe to csv
def WriteOutput(path, dataframe, filename):
    output_file = path + filename + "_trimmed.csv"
    dataframe.to_csv(output_file, index = False)
    print(file_name + " output to " + path)
    
    

# Run data trimming functions

In [None]:
recording_path = " ../recordings"
dataframe, file_name = GetRecording(recording_path)
joined = GetColByName(dataframe)
renamed = AssignTracker(joined)
path = GetDirectory()
reordered = OrderFeatures(renamed)
WriteOutput(path, reordered, file_name)


# Data Normalization

## Data Scaling

The new CSV is loaded into memory, chosen through an input prompt
The data is then split between the features (the HMD and controller tracking data), and the labels (the waist and foot trackers).
These are loaded into Numpy arrays to peform normaliztion. The output from OpenVR Recorder is upscaled by 100. To correct this the array is divided by 100

In [None]:
output_path = "../trim_output/"

#read in formatted CSV
def ReadCSV(path):
    file_name = input("Input File Name")
    file_list = []
    file_list.append(file_name)
    try:
        dataframe = pd.read_csv(path + file_name + ".csv")
        print("Dataframe created")
    except:
        print("Error Reading File")
    return dataframe, file_list

def SplitFeaturesLabels(dataframe):
    x = dataframe.iloc[:, 0:9]
    y = dataframe.iloc[:, 9:18]
    return x, y

#Load data into Numpy array
def LoadArray(x, y):
    x_array = np.array(x)
    y_array = np.array(y)
    return x_array, y_array


def NormalizeValues (x, y):
    x =  np.divide(x, 100)
    y =  np.divide(y, 100)
    return x, y

def SampleSize(x, y):
    x_samples = x[0:600,:]
    y_samples = y[0:600,:]
    return x_samples, y_samples

def RoundValues(x, y): 
    x_rounded = np.around(x, 3)
    y_rounded = np.around(y, 3)
    return x_rounded, y_rounded


# Create Training Data

In [None]:
train_path = "../train_data/"
#load train data from csv
train_dataframe, files = ReadCSV(train_path)

print(files)
#split features and labels into seperate dataframes
x_train_df, y_train_df = SplitFeaturesLabels(train_dataframe)

#convert features and labels to numpy array
x_train, y_train = LoadArray(x_train_df, y_train_df)

#Divide values in array by 100
x_samples, y_samples = NormalizeValues(x_train, y_train)

print(x_samples.shape, x_samples)
print(y_samples.shape, y_samples)

#x_train, x_test, y_train, y_test = train_test_split(x_train_normalized, y_train_normalized)





# Scale Values

In [None]:
print(x_samples.max())
print(x_samples.min())

print(y_samples.max())
print(y_samples.min())

scaler =MinMaxScaler()
print(x_samples[0:1])
scaled = scaler.fit(x_samples)
print(scaler.transform(x_samples[0:1]))
print(scaler.inverse_transform(x_samples[0:1]))


# Reshape Training Data

In [None]:

def ReshapeData(x, y):
    x_reshaped = np.expand_dims(x, axis=1)
    y_reshaped = np.expand_dims(y, axis=1)

    return x_reshaped, y_reshaped


In [None]:
x, y = ReshapeData(x_samples, y_samples)

print(x.shape, y.shape)

print(x.shape[1])

print(x.shape[2])


# Create Test / Validation Data

In [None]:
#Create a single test data file

test_path = "../test_data/"


test_dataframe = ReadCSV(test_path)

#split features and labels into seperate dataframes
x_test_df, y_test_df = SplitFeaturesLabels(test_dataframe)

#convert features and labels to numpy array
x_test, y_test = LoadArray(x_test_df, y_test_df)

#Divide values in array by 100
x_test_normalized, y_test_normalized = NormalizeValues(x_test, y_test)

x_test_samples, y_test_samples = SampleSize(x_test_normalized, y_test_normalized)

x, y = ReshapeData(x_test_samples, y_test_samples)

print(x.shape, y_test.shape)

print(x_test.shape[1])

# Combine Test and Train datasets

Combine all data sets in /train_data and /test_data into one, for more samples when training 

In [None]:

#Combine all datasets in a directory into one dataframe
def CombineDatasets(path):
    data_list = []
    for file in os.listdir(path):
        filename = os.fsdecode(file)
        print(filename)
        if filename.endswith(".csv"):
            df = pd.read_csv(path + filename)
            
            data_list.append(df)
         
    
    data_df = pd.concat(data_list, axis=0, ignore_index=True)
    display(data_df)

    return data_df



In [None]:

#combine data in a directory into two lists of x and y features
def DatasetsLists(path):
    x_list = []
    y_list = []
    file_list = []
    for file in os.listdir(path):
        filename = os.fsdecode(file)
        print(filename)
        if filename.endswith(".csv"):
            file_list.append(filename)
            df = pd.read_csv(path + filename)
            x_features, y_features = SplitFeaturesLabels(df)
            x_train, y_train = LoadArray(x_features, y_features)
            x_normalized, y_normalized = NormalizeValues(x_train, y_train)
            x_reshape, y_reshape = ReshapeData(x_normalized, y_normalized)
            x_list.append(x_reshape)
            y_list.append(y_reshape)
    return x_list, y_list, file_list



In [None]:
train_path = "../train_data/"
x, y, files = DatasetsLists(train_path)


Combine all data in the training data directory 

In [None]:
train_path = "../train_data/"

combined_train_dataframe = CombineDatasets(train_path)

x_train, y_train = SplitFeaturesLabels(combined_train_dataframe)
        
x_train_arr, y_train_arr = LoadArray(x_train, y_train)

#Divide values in array by 100
x_train_normalized, y_train_normalized = NormalizeValues(x_train_arr, y_train_arr)




x_train, y_train = ReshapeData(x_train_normalized, y_train_normalized)

print(x_train.shape, y_train.shape)


Combine all data in the test data directory

In [None]:
test_path = "../test_data/"

combined_test_dataframe = CombineDatasets(test_path)

x_test, y_test = SplitFeaturesLabels(combined_test_dataframe)
        
x_test_arr, y_test_arr = LoadArray(x_test, y_test)

x_test_normalized, y_test_normalized = NormalizeValues(x_test_arr, y_test_arr)


x_test, y_test = ReshapeData(x_test_normalized, y_test_normalized)

# Model Creation and Training

# Model Functions

In [None]:
#Fit model to two equally sized lists of x features and y labels 

def FitToList(x, y, model, epoch, b_size, verbose):
    for i in range(len(x)):
        print(i)
        print(x[i].shape)
        print(y[i].shape)
        model.fit(x[i], y[i], epochs=epoch,batch_size=b_size, verbose=verbose)
    return epoch, b_size
        
 # fit the model to a given set of features (x) and labels (y)    
def FitModel(x, y, model, epoch, b_size):
    model.fit(x, y, validation_split=0.33, epochs=epoch,batch_size = b_size)
    return epoch, b_size

    
def EvaluateModel(x, y, model):
    metrics = model.evaluate(x, y, batch_size=256)
    print("test loss, test acc:", metrics)
    return metrics
    
    
def PredictModel(x, y, model):
    predictions = model.predict(x)
    y_reshaped = y.reshape(-1,9)
    x_reshaped = x.reshape(-1,9)
    predictions = predictions.reshape(-1,9)
    print(y_reshaped.shape)
    print(x_reshaped.shape)
    print(predictions.shape)
    return predictions,  y_reshaped, x_reshaped


def DisplayPredictions(prediction, actual, predicted_on, range_1, range_2):
    if range_2 <= 0:
        r_1 = 0
        r_2 = len(prediction)
    else:
        r_1 = range_1
        r_2 = range_2
    print("predictions shape:", prediction.shape)
    predicted_on_df = pd.DataFrame(predicted_on, columns=["Head_X", "Head_Y", "Head_Z", "R_Controller_X", "R_Controller_Y", "R_Controller_Z", "L_Controller_X", "L_Controller_Y", "L_Controller_Z"])
    prediction_df = pd.DataFrame(prediction, columns=["Waist_X", "Waist_Y", "Waist_Z", "R_Foot_X", "R_Foot_Y", "R_Foot_Z", "L_Foot_X", "L_Foot_Y", "L_Foot_Z"])
    actual_df = pd.DataFrame(actual, columns=["Waist_X", "Waist_Y", "Waist_Z", "R_Foot_X", "R_Foot_Y", "R_Foot_Z", "L_Foot_X", "L_Foot_Y", "L_Foot_Z"])
    print("Actual Values")
    display(actual_df[r_1:r_2])
    print("Predicited Values")
    display(prediction_df[r_1:r_2])
    return actual_df, prediction_df, predicted_on_df
    

        

# Model 1

Initial test model

In [None]:
sgd = SGD(learning_rate=0.001, momentum=0.8, decay=0.999, nesterov=False)

model = Sequential()
model.add(GRU(16, return_sequences=True, input_shape=(5, 9)))
model.add(Dropout(0.2))
model.add(GRU(16, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(9, activation = "linear"))

model.compile(loss='mse', optimizer="adam")

print ('model compiled')

print (model.summary())

# Model 2 

Primary model

In [None]:
model_2 = Sequential()

opt = SGD(learning_rate = 0.01)

compile_params = ["mse", "sgd", "accuracy"]
#add one GRU layer of 64 cellss with input shape 1,9
model_2.add(GRU(28,return_sequences=True, input_shape=(1, 9)))

#add dropout layer
model_2.add(Dropout(0.2))

#add second gru layer 
#model_2.add(GRU(units=28))


#Add Dense layer with 9 outputs
model_2.add(Dense(9))

print(model_2.summary())


#Compile model
model_2.compile(
    loss = compile_params[0],
    optimizer = opt,
    metrics = [compile_params[2]],
)


print(model_2.compiled_loss._losses)

In [None]:
epochs, batch_size = FitModel(x, y, model_2, 1, 256)

In [None]:
#Fit list of datasets to model

#training_x, training_y, model to train on, epoch count, batch size, ?verbose
epochs, batch_size = FitToList(x, y, model_2, 20, 1, 1)

In [None]:
results = EvaluateModel(x_test, y_test, model_2)

In [None]:
predicted_values, actual_values, predicted_on_values = PredictModel(x_test, y_test, model_2)

toWriteActual, toWritePred, toWritePredictedOn = DisplayPredictions(predicted_values, actual_values, predicted_on_values, 0, 0)

display(toWritePredictedOn)


# Write Results to File 

In [None]:

def WriteResultToFile(path, pred, actual, model, metrics, file_list, epochs, b_size, predicted_on):
    try:
        #get current date for folder naming
        now = datetime.now()
        dt_string = now.strftime("%d_%m_%Y_%H_%M")
    
        #create new folder
        new_folder = "run_" + dt_string
        os.makedirs(path + new_folder)

        #loop over columns to split between actual and predicted
        for column in actual.columns:
            new_column = "A_" + column
            actual = actual.rename(columns={column : new_column})
        for column in pred.columns:
            new_column = "P_" + column
            pred = pred.rename(columns={column : new_column})

        #combine both actual and predicted dataframes
        results = pd.concat([actual, pred], axis=1)
    
        #create output path for csv write
        output_folder = results_path + new_folder
    
        predicted_on.to_csv(output_folder + "/test_values.csv", index = False, float_format = '%.6f')
        results.to_csv(output_folder + "/prediction.csv", index = False, float_format='%.6f')
        metric_labels = ["Loss: ", "Accuracy: "]    
        compile_params_list = ["Loss Function: ", "Optimizer: ", "Metrics: "]
        #create new txt file to output model and training data summary
        with open(output_folder + '/summary.txt','w') as fh:
            model.summary(print_fn=lambda x: fh.write(x + '\n'))
            fh.write("Epochs: ")
            fh.write(str(epochs) + "\n")
            fh.write("Batch Size: ")
            fh.write(str(b_size) + "\n")
            fh.write("Metrics:\n")
            for i in range (len(metrics)):
                fh.write(metric_labels[i])
                fh.write(str(metrics[i]) + "\n")
            for i in range (len(compile_params)):
                fh.write(compile_params_list[i])
                fh.write(compile_params[i] + "\n")
            fh.write("Trained On:\n")
            for file in file_list:
                fh.write(file + "\n")
           
        print("File Output to " + new_folder)
    except Exception as e:
        
        print(e)
        print("Error Writing Results To File!")
        
        
        
    
    


In [None]:
results_path = "../results/"

WriteResultToFile(results_path, toWritePred, toWriteActual, model_2, results, files, epochs, batch_size, toWritePredictedOn) 

