In [1]:
"""
Window.ipyb

This notebook performs a trailing window on specified time series data with the following parameters:
- window size
- hops
In our context, our data is collected on an average of 1 second per movement. Thus, setting our window
 frame as 1s, with a sampling rate of 50Hz would result in 50 samples per window.
"""

import pandas as pd
import tensorflow as tf
import numpy as np
import xarray

def count_samples(window_size, overlaps, sample_size):
    if overlaps == 0:
        data_size = 1 + (sample_size - window_size)// (window_size - overlaps)
    else:
        data_size = 1 + (sample_size - window_size)// (window_size - overlaps)
    return data_size

In [2]:
print(count_samples(50,0,272800))
print(count_samples(50,25,272800))
print(count_samples(100,0,272800))
print(count_samples(100,25,272800))
print(count_samples(100,50,272800))

5456
10911
2728
3637
5455


In [3]:
##change sampling parameters here
total_samples = 3100*22*4
window = 50
overlap = 25
num_gestures = 4
data_size = count_samples(window,overlap,total_samples) # per gesture class
print(data_size)

10911


# PART 1: Processing and saving data into 1 file
## Skip to PART 2 if you have the file

In [4]:
"""
Sliding window function and params
"""
from tqdm import tqdm
from skimage.util.shape import view_as_windows
import warnings

csv1 = 'raw_data/combined_buddhaClap.csv'
csv2 = 'raw_data/combined_crankLeft.csv'
csv3 = 'raw_data/combined_crankRight.csv'
csv4 = 'raw_data/combined_knobLeft.csv'
csv5 = 'raw_data/combined_knobRight.csv'
csv6 = 'raw_data/combined_pushback.csv'
csv7 = 'raw_data/combined_swipe.csv'


def sliding_window(a, L, overlap=1):
    if L==overlap:
        raise Exception("Overlap arg must be smaller than length of windows")
    S = L - overlap
    nd0 = ((len(a)-L)//S)+1
    if nd0*S-S!=len(a)-L:
        warnings.warn("Not all elements were covered")
    output = view_as_windows(a, (L,a.shape[1]), step=S)[:,0,:,:]
    print(output.shape)
    return output


# total_array = np.empty((0,6))
total_array = np.empty((0,window,6))

## Select relevant gesture files
if num_gestures == 4:
    csv_list = [csv1, csv5, csv6, csv7] #4 classes
elif num_gestures == 5:
    csv_list = [csv1, csv3, csv5, csv6, csv7] # 5 classes
elif num_gestures == 7:
    csv_list = [csv1, csv2, csv3, csv4, csv5, csv6, csv7] #7 classes

print('start!')
for csv in tqdm(csv_list):
    df = pd.read_csv(csv)
    window_array = sliding_window(df.to_numpy(), window, overlap=overlap)
    # total_array += window_df.to_numpy()
    # window_array = window_df.to_xarray().to_array()
    total_array = np.append(total_array, window_array, axis = 0)
print('ok!')
total_array

  0%|          | 0/4 [00:00<?, ?it/s]start!
 25%|██▌       | 1/4 [00:00<00:00,  5.95it/s](10911, 50, 6)
 50%|█████     | 2/4 [00:00<00:00,  5.88it/s](10911, 50, 6)
(10911, 50, 6)
 75%|███████▌  | 3/4 [00:00<00:00,  5.75it/s](10911, 50, 6)
100%|██████████| 4/4 [00:00<00:00,  5.63it/s]ok!



array([[[-1.7500e+00,  1.9000e-01, -3.3000e-01,  2.2770e+01,
          2.4999e+02,  2.4999e+02],
        [-1.6500e+00, -1.0000e-02, -1.1000e-01,  9.0100e+01,
          2.4999e+02,  2.4552e+02],
        [-1.3400e+00,  4.0000e-02,  1.3000e-01,  1.2638e+02,
          2.4999e+02,  2.2218e+02],
        ...,
        [-7.9000e-01,  2.4900e+00, -3.6100e+00, -9.9390e+01,
          5.4380e+01,  2.3390e+01],
        [-1.1200e+00,  2.1900e+00, -3.4100e+00, -4.0690e+01,
          1.4500e+02,  9.3510e+01],
        [-1.6200e+00,  2.2000e+00, -3.0000e+00,  1.0530e+01,
          2.2304e+02,  1.2435e+02]],

       [[-2.3000e-01,  2.5000e-01,  2.4000e-01, -1.3695e+02,
         -2.0723e+02, -8.3020e+01],
        [-3.9000e-01,  1.4000e-01,  1.1000e-01, -1.1826e+02,
         -2.0531e+02, -9.0680e+01],
        [-5.0000e-01,  8.0000e-02,  5.0000e-02, -8.7340e+01,
         -2.1457e+02, -9.5150e+01],
        ...,
        [-1.8200e+00,  1.4000e-01,  1.2800e+00,  1.1925e+02,
         -8.8540e+01,  1.2853e+02],
  

In [5]:
len(df.to_numpy())

272800

In [6]:
total_array.shape

(43644, 50, 6)

In [7]:
"""
Save into txt file
"""
arr = total_array
  
# reshaping the array from 3D 
# matrice to 2D matrice. 
arr_reshaped = arr.reshape(arr.shape[0], -1) 
  
# saving reshaped array to file. 
np.savetxt("numpy-window.txt", arr_reshaped) 
  
# retrieving data from file. 
loaded_arr = np.loadtxt("numpy-window.txt") 
  
# This loadedArr is a 2D array, therefore 
# we need to convert it to the original 
# array shape.reshaping to get original 
# matrice with original shape. 
load_original_arr = loaded_arr.reshape( 
    loaded_arr.shape[0], loaded_arr.shape[1] // arr.shape[2], arr.shape[2]) 
  
# check the shapes: 
print("shape of arr: ", arr.shape) 
print("shape of load_original_arr: ", load_original_arr.shape) 
  
# check if both arrays are same or not: 
if (load_original_arr == arr).all(): 
    print("Yes, both the arrays are same") 
else: 
    print("No, both the arrays are not same") 

shape of arr:  (43644, 50, 6)
shape of load_original_arr:  (43644, 50, 6)
Yes, both the arrays are same


In [8]:
loaded_arr.shape

(43644, 300)

# PART 2: Loading Data and Splitting

In [9]:
# """
# Load data contents
# """
# print('start!')
# import numpy as np
# loaded_arr = np.loadtxt("numpy-window.txt")


In [10]:
x_array = loaded_arr
print(x_array.shape)
print(x_array[0:2])

(43644, 300)
[[-1.7500e+00  1.9000e-01 -3.3000e-01  2.2770e+01  2.4999e+02  2.4999e+02
  -1.6500e+00 -1.0000e-02 -1.1000e-01  9.0100e+01  2.4999e+02  2.4552e+02
  -1.3400e+00  4.0000e-02  1.3000e-01  1.2638e+02  2.4999e+02  2.2218e+02
  -1.0600e+00 -1.0000e-02  2.6000e-01  1.3048e+02  2.4999e+02  1.8718e+02
  -8.3000e-01 -1.4000e-01  3.0000e-01  1.4704e+02  2.4503e+02  1.6122e+02
  -6.2000e-01 -2.7000e-01  3.3000e-01  1.8921e+02  2.3178e+02  1.3690e+02
  -5.4000e-01 -1.9000e-01  4.0000e-01  2.4999e+02  2.2418e+02  1.1510e+02
  -2.7000e-01 -2.0000e-02  6.3000e-01  2.4999e+02  2.1301e+02  1.0772e+02
   3.0000e-02 -8.0000e-02  9.9000e-01  2.4999e+02  2.1532e+02  1.0066e+02
   1.8000e-01  5.0000e-02  1.3900e+00  2.4999e+02  2.0455e+02  6.7020e+01
   1.4000e-01  5.0000e-01  1.7300e+00  2.0731e+02  1.4995e+02  1.8060e+01
   2.9000e-01  6.0000e-01  1.8300e+00  4.7160e+01  1.0445e+02  6.7800e+00
   1.2000e-01  6.8000e-01  2.2500e+00 -1.1939e+02  7.1970e+01  1.7090e+01
   2.9100e+00  1.3400e+00

In [11]:
"""
Create gesture class labels according to index
0: buddha clap
1: crank left
2: crank right
3: knob left
4: knob right
5: pushback
6: swipe
~~~~~~~~~
0: buddha clap
1: crank right
2: knob right
3: pushback
4: swipe
~~~~~~~~~
0: buddha clap
1: knob right
2: pushback
3: swipe
"""
y_array = np.empty((0,1), int)
for i in range(num_gestures):
    label_array = np.full((data_size,1), i)
    y_array = np.append(y_array, label_array, axis = 0)
print(y_array.shape)
y_array

(43644, 1)


array([[0],
       [0],
       [0],
       ...,
       [3],
       [3],
       [3]])

In [12]:
"""
Split into train-test data
"""
from tqdm import tqdm
train_ratio = 0.7
class_size = data_size ## number of samples per class
split = round(train_ratio*(class_size)) # Round to nearest integer
x_train = []
y_train = []
x_test = []
y_test = []
for i in tqdm(range(num_gestures)):
    ##loop through each gesture class index range and split into train-test
    train_start = i*(class_size)
    train_stop = train_start + split
    test_stop = (i+1)*(class_size)

    x_train.extend(x_array[train_start:train_stop])
    y_train.extend(y_array[train_start:train_stop])
    x_test.extend(x_array[train_stop:test_stop])
    y_test.extend(y_array[train_stop:test_stop])

print()
print(len(x_train))
print(len(x_test))
print(len(y_train))
print(len(y_test))


100%|██████████| 4/4 [00:00<00:00, 307.71it/s]
30552
13092
30552
13092



In [13]:
print(type(x_train[0]))
print(x_test[0:2])

<class 'numpy.ndarray'>
[array([-7.3000e-01,  3.3000e-01,  1.8000e-01, -2.3380e+01,  2.4999e+02,
        7.8930e+01, -5.7000e-01,  5.0000e-02,  3.0000e-01, -8.9550e+01,
        1.8013e+02,  1.0487e+02, -4.1000e-01, -1.4000e-01,  5.1000e-01,
       -1.0596e+02,  9.8450e+01,  9.9220e+01,  1.3000e-01, -2.1000e-01,
        8.4000e-01, -6.8480e+01,  1.2749e+02,  8.2980e+01, -2.7000e-01,
       -1.4000e-01,  8.6000e-01,  2.4999e+02,  2.4607e+02,  5.2930e+01,
        3.7000e-01,  4.5000e-01,  1.4700e+00,  2.4999e+02,  2.4999e+02,
        3.3070e+01, -1.1000e-01,  3.6000e-01,  2.3500e+00, -1.8182e+02,
        2.4999e+02,  5.9080e+01, -4.0000e+00, -1.6300e+00,  4.0000e+00,
       -2.5000e+02,  2.4999e+02, -7.0020e+01,  4.0000e+00, -2.5600e+00,
       -4.0000e+00,  1.3516e+02, -2.5000e+02,  1.7750e+02, -1.2000e-01,
        4.7000e-01,  1.1300e+00,  2.4999e+02,  2.4999e+02,  5.7100e+00,
       -1.5600e+00, -5.6000e-01,  2.1100e+00, -2.5000e+02, -2.5000e+02,
       -8.5930e+01,  1.9100e+00, -2.020

In [14]:
'''
Convert Lists into Numpy Arrays
'''
for _ in tqdm(range(1)):
    x_train_arr = np.array(x_train)
    y_train_arr = np.array(y_train)
    x_test_arr = np.array(x_test)
    y_test_arr = np.array(y_test)

100%|██████████| 1/1 [00:00<00:00, 14.93it/s]


In [15]:
print(x_train_arr.shape)
print(y_train_arr.shape)
print(x_test_arr.shape)
print(y_test_arr.shape)
print(x_train_arr[0:2])

(30552, 300)
(30552, 1)
(13092, 300)
(13092, 1)
[[-1.7500e+00  1.9000e-01 -3.3000e-01  2.2770e+01  2.4999e+02  2.4999e+02
  -1.6500e+00 -1.0000e-02 -1.1000e-01  9.0100e+01  2.4999e+02  2.4552e+02
  -1.3400e+00  4.0000e-02  1.3000e-01  1.2638e+02  2.4999e+02  2.2218e+02
  -1.0600e+00 -1.0000e-02  2.6000e-01  1.3048e+02  2.4999e+02  1.8718e+02
  -8.3000e-01 -1.4000e-01  3.0000e-01  1.4704e+02  2.4503e+02  1.6122e+02
  -6.2000e-01 -2.7000e-01  3.3000e-01  1.8921e+02  2.3178e+02  1.3690e+02
  -5.4000e-01 -1.9000e-01  4.0000e-01  2.4999e+02  2.2418e+02  1.1510e+02
  -2.7000e-01 -2.0000e-02  6.3000e-01  2.4999e+02  2.1301e+02  1.0772e+02
   3.0000e-02 -8.0000e-02  9.9000e-01  2.4999e+02  2.1532e+02  1.0066e+02
   1.8000e-01  5.0000e-02  1.3900e+00  2.4999e+02  2.0455e+02  6.7020e+01
   1.4000e-01  5.0000e-01  1.7300e+00  2.0731e+02  1.4995e+02  1.8060e+01
   2.9000e-01  6.0000e-01  1.8300e+00  4.7160e+01  1.0445e+02  6.7800e+00
   1.2000e-01  6.8000e-01  2.2500e+00 -1.1939e+02  7.1970e+01  1

In [16]:
'''
Create folders if it doesn't exist
'''
from tqdm import tqdm
import os
# folder = ['ready_data', 'ready_data/window-50-25', 'ready_data/window-100-25', 'ready_data/window-100-50', 'ready_data/no-overlap-50', 'ready_data/no-overlap-100']
folder = ['ready_data', 'ready_data/window-50-25', 'ready_data/window-100-50']

for path in tqdm(folder):
    if not os.path.exists(path):
        os.makedirs(path)

100%|██████████| 3/3 [00:00<00:00, 1496.18it/s]


In [17]:
"""
SAVE 2D Matrices into text & numpy
"""
for _ in tqdm(range(1)):
    ## save into numpy file
    # np.save('ready_data/x_train', x_train_arr)
    # np.save('ready_data/y_train', y_train_arr)
    # np.save('ready_data/x_test', x_test_arr)
    # np.save('ready_data/y_test', y_test_arr)

    # saving reshaped array to file. 
    if overlap == 25 and window==50:
        np.savetxt("ready_data/window-50-25/x_train.txt", x_train_arr)
        np.savetxt("ready_data/window-50-25/y_train.txt", y_train_arr)
        np.savetxt("ready_data/window-50-25/x_test.txt", x_test_arr)
        np.savetxt("ready_data/window-50-25/y_test.txt", y_test_arr)

    elif overlap == 25 and window==100:
        np.savetxt("ready_data/window-100-25/x_train.txt", x_train_arr)
        np.savetxt("ready_data/window-100-25/y_train.txt", y_train_arr)
        np.savetxt("ready_data/window-100-25/x_test.txt", x_test_arr)
        np.savetxt("ready_data/window-100-25/y_test.txt", y_test_arr)

    elif overlap == 50 and window==100:
        np.savetxt("ready_data/window-100-50/x_train.txt", x_train_arr)
        np.savetxt("ready_data/window-100-50/y_train.txt", y_train_arr)
        np.savetxt("ready_data/window-100-50/x_test.txt", x_test_arr)
        np.savetxt("ready_data/window-100-50/y_test.txt", y_test_arr)

    elif overlap == 0 and window==50:
        np.savetxt("ready_data/no-overlap-50/x_train.txt", x_train_arr)
        np.savetxt("ready_data/no-overlap-50/y_train.txt", y_train_arr)
        np.savetxt("ready_data/no-overlap-50/x_test.txt", x_test_arr)
        np.savetxt("ready_data/no-overlap-50/y_test.txt", y_test_arr)

    elif overlap == 0 and window==100:
        np.savetxt("ready_data/no-overlap-100/x_train.txt", x_train_arr)
        np.savetxt("ready_data/no-overlap-100/y_train.txt", y_train_arr)
        np.savetxt("ready_data/no-overlap-100/x_test.txt", x_test_arr)
        np.savetxt("ready_data/no-overlap-100/y_test.txt", y_test_arr)

100%|██████████| 1/1 [00:18<00:00, 18.60s/it]
