The idea here is that more training examples can be created from our measured dataset. By taking the end of one example and beginning of the next (given that the examples share labels), a new 60 second example can be created and used for training.

In [2]:
import torch
import numpy as np
import matplotlib.pyplot as plt

sensor_names = ['Acc_x', 'Acc_y', 'Acc_z', 'Gyr_x', 'Gyr_y', 'Gyr_z']
label_names = ["DNE", "Resting", "Walking", "Running", "Driving"]
train_suffix = '_train_1.csv'
test_suffix = '_train_2.csv'

In [3]:
labels = np.loadtxt('labels_train_1.csv', dtype='int')
data_slice_0 = np.loadtxt(sensor_names[0] + '_train_1.csv',
                            delimiter=',')
data = np.empty((data_slice_0.shape[0], data_slice_0.shape[1],
                    len(sensor_names)))
data[:, :, 0] = data_slice_0
del data_slice_0
for sensor_index in range(1, len(sensor_names)):
    data[:, :, sensor_index] = np.loadtxt(
        sensor_names[sensor_index] + '_train_1.csv', delimiter=',')

In [4]:
# create groups of row indices that are senquential and share the same labels

index_groups = []
temp_group = [0]

for i in range(1, len(labels)):
    if labels[i] == labels[i - 1]:
        temp_group.append(i)
    else:
        index_groups.append(temp_group)
        temp_group = [i]

index_groups.append(temp_group)

In [5]:
# now we actually create the new rows
num_features = len(data[0,:,0])

new_training_examples = []
new_labels = []

# where to join the current and next rows
split_point = num_features // 2

for group in index_groups:
    group_label = labels[group[0]]

    # iterate through each row to create a new training example from it and the row after it (skip the final row)
    for i in range(len(group) - 1):

        idx = group[i]

        curr_row = data[idx]
        next_row = data[idx+1]

        # creation of a new row
        new_row = np.concatenate((curr_row[-split_point:], next_row[:split_point]))

        new_training_examples.append(new_row)
        new_labels.append(group_label)

new_training_examples = np.array(new_training_examples)
new_labels = np.array(new_labels)

In [6]:
# now to add some noise to the generated data for addional independence

# these values were eyeballed while looking at the 
acc_noise_magnitude = 0.0
gyr_noise_magnitude = 0.0



acc_part = new_training_examples[:, :, :3]
gyr_part = new_training_examples[:, ::, 3:]

acc_noise = acc_noise_magnitude * np.random.randn(*acc_part.shape)
gyr_noise = gyr_noise_magnitude * np.random.randn(*gyr_part.shape)

acc_part = acc_part + acc_noise
gyr_part = gyr_part + gyr_noise

noisy_new_training_examples = np.concatenate([acc_part, gyr_part], axis=2)

In [7]:
# save the generated data into a similar file format

np.savetxt("generated_labels_train_1.csv", new_labels, delimiter=",", fmt="%d")

num_channels = len(data[0,0,:])

for i in range(num_channels):
    file_name = 'generated_' + sensor_names[i] + '_train_1.csv'
    np.savetxt(file_name, noisy_new_training_examples[:,:,i], delimiter=",")