In [1]:
"""
Window.ipyb

This notebook performs a trailing window on specified time series data with the following parameters:
- window size
- hops
In our context, our data is collected on an average of 1 second per movement. Thus, setting our window
 frame as 1s, with a sampling rate of 50Hz would result in 50 samples per window.
"""

import pandas as pd
import tensorflow as tf
import numpy as np
import xarray
from tqdm import tqdm

def count_samples(window_size, overlaps, sample_size):
    if overlaps == 0:
        data_size = 1 + (sample_size - window_size)// (window_size - overlaps)
    else:
        data_size = 1 + (sample_size - window_size)// (window_size - overlaps)
    return data_size

In [2]:
print(count_samples(50,0,272800))
print(count_samples(50,25,272800))
print(count_samples(100,0,272800))
print(count_samples(100,25,272800))
print(count_samples(100,50,272800))

5456
10911
2728
3637
5455


In [3]:
##change sampling parameters here
total_samples = 3100*22*4
window = 100
overlap = 50
num_gestures = 4
data_size = count_samples(window,overlap,total_samples) # per gesture class
print(data_size)

5455


In [4]:
'''
Create folders if it doesn't exist
'''
import os
location = 'ready_data/window-'+str(window)+'-'+str(overlap)
folder = ['ready_data', location]

for path in tqdm(folder):
    if not os.path.exists(path):
        os.makedirs(path)

100%|██████████| 2/2 [00:00<00:00, 2006.36it/s]


# PART 1: Processing and saving data into 1 file
## Skip to PART 2 if you have ready_data/numpy-window.txt and window & overlap are unchanged

In [5]:
"""
Sliding window function and params
"""
from skimage.util.shape import view_as_windows
import warnings

csv1 = 'raw_data/combined_buddhaClap.csv'
csv2 = 'raw_data/combined_crankLeft.csv'
csv3 = 'raw_data/combined_crankRight.csv'
csv4 = 'raw_data/combined_knobLeft.csv'
csv5 = 'raw_data/combined_knobRight.csv'
csv6 = 'raw_data/combined_pushback.csv'
csv7 = 'raw_data/combined_swipe.csv'


def sliding_window(a, L, overlap=1):
    if L==overlap:
        raise Exception("Overlap arg must be smaller than length of windows")
    S = L - overlap
    nd0 = ((len(a)-L)//S)+1
    if nd0*S-S!=len(a)-L:
        warnings.warn("Not all elements were covered")
    output = view_as_windows(a, (L,a.shape[1]), step=S)[:,0,:,:]
    print(output.shape)
    return output


total_array = np.empty((0,window,6))

## Select relevant gesture files
if num_gestures == 4:
    csv_list = [csv1, csv5, csv6, csv7] #4 classes
elif num_gestures == 5:
    csv_list = [csv1, csv3, csv5, csv6, csv7] # 5 classes
elif num_gestures == 7:
    csv_list = [csv1, csv2, csv3, csv4, csv5, csv6, csv7] #7 classes

print('start!')
for csv in tqdm(csv_list):
    df = pd.read_csv(csv)
    window_array = sliding_window(df.to_numpy(), window, overlap=overlap)
    total_array = np.append(total_array, window_array, axis = 0)
print('ok!')
total_array

  0%|          | 0/4 [00:00<?, ?it/s]start!
 25%|██▌       | 1/4 [00:00<00:00,  4.83it/s](5455, 100, 6)
 50%|█████     | 2/4 [00:00<00:00,  4.84it/s]
 75%|███████▌  | 3/4 [00:00<00:00,  4.75it/s]
100%|██████████| 4/4 [00:00<00:00,  4.67it/s](5455, 100, 6)
ok!



array([[[-1.7500e+00,  1.9000e-01, -3.3000e-01,  2.2770e+01,
          2.4999e+02,  2.4999e+02],
        [-1.6500e+00, -1.0000e-02, -1.1000e-01,  9.0100e+01,
          2.4999e+02,  2.4552e+02],
        [-1.3400e+00,  4.0000e-02,  1.3000e-01,  1.2638e+02,
          2.4999e+02,  2.2218e+02],
        ...,
        [-1.6400e+00,  1.7400e+00, -2.0000e+00, -1.2742e+02,
         -2.5000e+02, -1.6490e+02],
        [-1.3800e+00,  1.8000e+00, -2.1800e+00, -7.3120e+01,
         -2.5000e+02, -1.3055e+02],
        [-1.1600e+00,  2.1400e+00, -2.2200e+00, -5.4180e+01,
         -2.1140e+02, -1.1552e+02]],

       [[-1.9400e+00,  2.0700e+00, -2.5700e+00,  3.1700e+00,
          2.4999e+02,  1.3746e+02],
        [-2.2300e+00,  1.8600e+00, -2.1500e+00, -2.0940e+01,
          2.4999e+02,  1.5142e+02],
        [-2.5100e+00,  1.6500e+00, -1.9200e+00, -4.0050e+01,
          2.4999e+02,  1.6670e+02],
        ...,
        [-1.3700e+00,  5.8000e-01, -4.2000e-01, -1.6342e+02,
         -2.5000e+02, -1.5569e+02],
  

In [6]:
len(df.to_numpy())

272800

In [7]:
total_array.shape

(21820, 100, 6)

In [8]:
"""
Save into txt file
"""
arr = total_array
  
# reshaping the array from 3D 
# matrice to 2D matrice. 
arr_reshaped = arr.reshape(arr.shape[0], -1) 
  
# saving reshaped array to file. 
np.savetxt("ready_data/numpy-window.txt", arr_reshaped) 
  
# retrieving data from file. 
loaded_arr = np.loadtxt("ready_data/numpy-window.txt") 
  
# This loadedArr is a 2D array, therefore 
# we need to convert it to the original 
# array shape.reshaping to get original 
# matrice with original shape. 
load_original_arr = loaded_arr.reshape( 
    loaded_arr.shape[0], loaded_arr.shape[1] // arr.shape[2], arr.shape[2]) 
  
# check the shapes: 
print("shape of arr: ", arr.shape) 
print("shape of load_original_arr: ", load_original_arr.shape) 
  
# check if both arrays are same or not: 
if (load_original_arr == arr).all(): 
    print("Yes, both the arrays are same") 
else: 
    print("No, both the arrays are not same") 

shape of arr:  (21820, 100, 6)
shape of load_original_arr:  (21820, 100, 6)
Yes, both the arrays are same


In [9]:
loaded_arr.shape

(21820, 600)

# PART 2: Loading Data and Splitting

In [10]:
"""
Uncomment and Run if you skipped Part 1
"""
# """
# Load data contents
# """
# print('start!')
# import numpy as np
# loaded_arr = np.loadtxt("ready_data/numpy-window.txt")


'\nUncomment and Run if you skipped Part 1\n'

In [11]:
x_array = loaded_arr
print(x_array.shape)
print(x_array[0:2])

(21820, 600)
[[-1.7500e+00  1.9000e-01 -3.3000e-01 ... -5.4180e+01 -2.1140e+02
  -1.1552e+02]
 [-1.9400e+00  2.0700e+00 -2.5700e+00 ... -1.7601e+02 -2.5000e+02
  -1.5324e+02]]


In [12]:
"""
Create gesture class labels according to index
0: buddha clap
1: crank left
2: crank right
3: knob left
4: knob right
5: pushback
6: swipe
~~~~~~~~~
0: buddha clap
1: crank right
2: knob right
3: pushback
4: swipe
~~~~~~~~~
0: buddha clap
1: knob right
2: pushback
3: swipe
"""
y_array = np.empty((0,1), int)
for i in range(num_gestures):
    label_array = np.full((data_size,1), i)
    y_array = np.append(y_array, label_array, axis = 0)
print(y_array.shape)
y_array

(21820, 1)


array([[0],
       [0],
       [0],
       ...,
       [3],
       [3],
       [3]])

In [13]:
"""
Split into train-test data
"""
train_ratio = 0.7
class_size = data_size ## number of samples per class
split = round(train_ratio*(class_size)) # Round to nearest integer
x_train = []
y_train = []
x_test = []
y_test = []
for i in tqdm(range(num_gestures)):
    ##loop through each gesture class index range and split into train-test
    train_start = i*(class_size)
    train_stop = train_start + split
    test_stop = (i+1)*(class_size)

    x_train.extend(x_array[train_start:train_stop])
    y_train.extend(y_array[train_start:train_stop])
    x_test.extend(x_array[train_stop:test_stop])
    y_test.extend(y_array[train_stop:test_stop])

print()
print(len(x_train))
print(len(x_test))
print(len(y_train))
print(len(y_test))


100%|██████████| 4/4 [00:00<00:00, 571.45it/s]
15272
6548
15272
6548



In [14]:
print(type(x_train[0]))
print(x_test[0:2])

<class 'numpy.ndarray'>
[array([-5.3000e-01,  3.9000e-01, -3.4000e-01, -7.0370e+01,  1.9668e+02,
       -2.7700e+00, -5.8000e-01,  4.2000e-01,  1.2000e-01, -2.3020e+01,
        2.4710e+02,  2.1870e+01, -4.1000e-01,  4.1000e-01,  1.9000e-01,
        1.1570e+01,  2.4999e+02,  7.9520e+01, -4.6000e-01,  2.4000e-01,
        2.8000e-01, -1.2222e+02,  2.3036e+02,  8.1380e+01, -1.9000e-01,
       -3.0000e-02,  6.5000e-01, -1.9043e+02, -1.1010e+01,  8.0960e+01,
        3.4000e-01, -1.7000e-01,  5.3000e-01, -1.4471e+02,  2.4999e+02,
        9.1680e+01,  4.5000e-01, -2.0000e-02,  1.0000e+00,  2.4999e+02,
        1.5325e+02,  6.0960e+01,  6.6000e-01,  5.3000e-01,  1.6000e+00,
        2.4999e+02,  2.4999e+02,  4.2770e+01, -1.7000e-01,  3.0000e-01,
        2.3000e+00, -1.6920e+02,  2.4999e+02,  5.8530e+01,  3.5000e-01,
       -6.0000e-01,  2.6200e+00, -2.3340e+02,  1.1681e+02,  9.3720e+01,
        9.0000e-01, -1.6600e+00,  4.0000e+00,  1.3821e+02, -2.5000e+02,
        1.0497e+02, -1.1200e+00, -6.800

In [15]:
'''
Convert Lists into Numpy Arrays
'''
for _ in tqdm(range(1)):
    x_train_arr = np.array(x_train)
    y_train_arr = np.array(y_train)
    x_test_arr = np.array(x_test)
    y_test_arr = np.array(y_test)

100%|██████████| 1/1 [00:00<00:00, 20.00it/s]


In [16]:
print(x_train_arr.shape)
print(y_train_arr.shape)
print(x_test_arr.shape)
print(y_test_arr.shape)
print(x_train_arr[0:2])

(15272, 600)
(15272, 1)
(6548, 600)
(6548, 1)
[[-1.7500e+00  1.9000e-01 -3.3000e-01 ... -5.4180e+01 -2.1140e+02
  -1.1552e+02]
 [-1.9400e+00  2.0700e+00 -2.5700e+00 ... -1.7601e+02 -2.5000e+02
  -1.5324e+02]]


In [17]:
"""
SAVE 2D Matrices into text & numpy
"""
for _ in tqdm(range(1)):
    # saving reshaped array to file. 
    np.savetxt(location+'/x_train.txt', x_train_arr)
    np.savetxt(location+'/y_train.txt', y_train_arr)
    np.savetxt(location+'/x_test.txt', x_test_arr)
    np.savetxt(location+'/y_test.txt', y_test_arr)

100%|██████████| 1/1 [00:17<00:00, 17.65s/it]
