In [1]:
import scipy.io
import numpy as np
from sklearn.preprocessing import scale, LabelBinarizer
from sklearn.model_selection import train_test_split

Understanding the dataset

In [5]:
def load_data():
    emg_data = []
    for day in range(1, 4): # range(1, 4) for reading all the data files
        fnames = "data/wrist_fdt_day" + str(day) + ".mat"
        mat = scipy.io.loadmat(fnames)
        emg_data.append(mat.get("FeatSet"))

    emg_data = np.array(emg_data)
    print('EMG_DATA SHAPE: ', emg_data.shape)

    print("Data has:       ", str(emg_data.shape[0]), "Days", str(emg_data.shape[1]), "Subjects",
          str(emg_data.shape[2]),
          "Gestures")
    print("Each Day has:   ", str(emg_data.shape[0]), "x", str(emg_data.shape[1]), "=",
          str(emg_data.shape[0] * emg_data.shape[1]), "Trials")
    print("Each Trial has: ", str(emg_data[0, 0, 0].shape[0]), "Samples", str(emg_data[0, 0, 0].shape[1]), "Features")
    # return emg_data
load_data()

EMG_DATA SHAPE:  (3, 43, 16)
Data has:        3 Days 43 Subjects 16 Gestures
Each Day has:    3 x 43 = 129 Trials
Each Trial has:  233 Samples 36 Features


(Option 1) Run below code if train test split are random. 
Concate data from all trials into one. New data is (3*43, 16)

In [6]:
emg_data = np.empty((0, 16))
for day in range(1, 4):
    fnames = "data/wrist_fdt_day" + str(day) + ".mat"
    mat = scipy.io.loadmat(fnames)
    reading = mat.get("FeatSet")
    emg_data = np.append(emg_data, reading, axis = 0)

In [7]:
print(emg_data.shape)
print(emg_data[0,0].shape)
print(type(emg_data))

(129, 16)
(233, 36)
<class 'numpy.ndarray'>


In [43]:
# Full dataset with 16 gestures
scipy.io.savemat('data/data_16.mat', {'feat': emg_data})

In [44]:
# partial dataset with 4 gestures
partial_emg_data = emg_data[:, 0:4]
scipy.io.savemat('data/data_4.mat', {'feat': partial_emg_data})

(129, 4)


(Option 2) Run this if training is done for 35 participants and tested on 8 participants.

In [3]:
emg_data_train = np.empty((0, 16))
emg_data_test = np.empty((0, 16))
for day in range(1, 4):
    fnames = "data/wrist_fdt_day" + str(day) + ".mat"
    mat = scipy.io.loadmat(fnames)
    reading = mat.get("FeatSet")
    emg_data_train = np.append(emg_data_train, reading[:35,:], axis = 0) # shape is (105, 16)
    emg_data_test = np.append(emg_data_test, reading[-8:,:], axis = 0) # shape is (24, 16)

In [6]:
emg_data_train.shape

(105, 16)

In [14]:
#  Dataset with 16 gestures
scipy.io.savemat('data/train_16.mat', {'feat': emg_data_train})
scipy.io.savemat('data/test_16.mat', {'feat': emg_data_test})
# partial train dataset with 4 gestures
scipy.io.savemat('data/train_4.mat', {'feat': emg_data_train[:, 0:4]})
scipy.io.savemat('data/test_4.mat', {'feat': emg_data_test[:, 0:4]})


Read each cell. Column number represents the class label(gesture)

In [18]:
#which dataset? Either option 1 or option 2. Then select correct variable based on number of features you want to train on. Either 4 or 16
dataset = emg_data_train[:, 0:4] # or use emg_data_train for using all 16 gestures
trainX = []
trainY = []
for i, entry in np.ndenumerate(dataset):
    # i is a tuple of (row, column)
    # save readings into data array
    trainX.append(dataset[i[0],i[1]])
    # gesture class is put into labels array
    trainY.append(i[1])

# similarly do for the test data
dataset = emg_data_test[:, 0:4] # or use emg_data_test for using all 16 gestures
testX = []
testY = []
for i, entry in np.ndenumerate(dataset):
    testX.append(dataset[i[0],i[1]])
    testY.append(i[1])

# USE THIS for Option 1
'''
dataset = partial_emg_data # or emg_data
data = []
labels = []
for i, entry in np.ndenumerate(dataset):
    data.append(dataset[i[0],i[1]])
    labels.append(i[1])
'''

'\ndataset = partial_emg_data # or emg_data\ndata = []\nlabels = []\nfor i, entry in np.ndenumerate(dataset):\n    data.append(dataset[i[0],i[1]])\n    labels.append(i[1])\n'

DOES THE DATA NEED NORMALIZATION ?

Train and Test data when using Option 1- full dataset

In [61]:
# Split the data into trainign and testing 
# (trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.2)