In [1]:
import os, sys
import pickle
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import torch
import pandas as pd

### filename = D<#day>M<#motion/label>T<#trail>.csv
* myo dataset
    * total participants = 5
    * total gestures = 22
    * trails per parti = 4
    * days per parti = 30
    * total sessions = 3
        * 10 days for each session
    * each session = day
   
   
* 3DC dataset
    * total participants = 22
    * total gestures = 11
    * trails per parti = 4
    * days (sessions) per parti = 4
    * dataloaders per parti = 4 
        * second cycle (max activation) is excluded

In [2]:
data_dir = "/home/laiy/gitrepos/msr_final/Wearable_Sensor_Long-term_sEMG_Dataset/data"
processed_data_dir = "/home/laiy/gitrepos/msr_final/LongTermEMG_myo/Processed_datasets"
code_dir = "/home/laiy/gitrepos/msr_final/LongTermEMG_myo"
save_dir = "/home/laiy/gitrepos/msr_final/LongTermEMG_myo/Results"

pos = ['N', 'I', 'O', 'N', 'I', 'I', 'O', 'O', 'N', 'N',      # 1-10th day 
      'O', 'N', 'N', 'O', 'O', 'I', 'I', 'I', 'N', 'O',       # 11-20th day
      'O', 'I', 'O', 'I', 'I', 'N', 'N', 'I', 'N', 'O']       # 21-30th day

pos_label = [1, 2, 3, 1, 2, 2, 3, 3, 1, 1,
            3, 1, 1, 3, 3, 2, 2, 2, 1, 3,
            3, 2, 3, 2, 2, 1, 1, 2, 1, 3] # N: 1, I: 2, O: 3

day_num = 30
sub_num = 5
mov_num = 22
fs = 200
ch_num = 8
trial_num = 4

fs_pass = 15
fil_order = 5

win_size = 50            # 250ms window
win_inc = 10             # 50ms overlap

In [3]:
# days correspond to N, I, and O positions
sessions_idx = [[],[],[]]
for idx, pl in enumerate(pos_label):
    sessions_idx[pl-1].append(idx+1)
sessions_idx

[[1, 4, 9, 10, 12, 13, 19, 26, 27, 29],
 [2, 5, 6, 16, 17, 18, 22, 24, 25, 28],
 [3, 7, 8, 11, 14, 15, 20, 21, 23, 30]]

### Process Data

In [9]:
def format_examples(emg_examples, window_size=50, size_non_overlap=10):
    ''' 
    emg_examples: list of emg signals, each row represent one recording of a 8 channel emg
    feature_set_function
    window_size: analysis window size
    size_non_overlap: length of non-overlap portion between each analysis window
    '''
    formated_examples = []
    example = []
    for emg_vector in emg_examples:
        if len(example) == 0:
            example = emg_vector
        else:
            example = np.row_stack((example, emg_vector))
        
        # store one window_size of signal
        if len(example) >= window_size:
            formated_examples.append(example.copy())
            # Remove part of the data of the example according to the size_non_overlap variable
            example = example[size_non_overlap:]
            
    return formated_examples

In [13]:
def read_files_to_format_training_session(path_folder_examples, day_num,
                                          number_of_cycles, number_of_gestures, window_size,
                                          size_non_overlap):
    """
    path_folder_examples: path to load training data
    feature_set_function
    number_of_cycles: number of trials recorded for each motion
    number_of_gestures
    window_size: analysis window size
    size_non_overlap: length of non-overlap portion between each analysis window
    
    shape(formated_example) = (26, 50, 8)
    """
    examples_training, labels_training = [], []
    
    for cycle in range(1, number_of_cycles+1):
        examples, labels = [], []
        for gesture_index in range(1, number_of_gestures+1):
            read_file = path_folder_examples + "/D" + str(day_num) + "M" + str(gesture_index) + "T" + str(cycle) + ".csv"
#             print("      READ ", read_file)
            examples_to_format = pd.read_csv(read_file, header=None).to_numpy()
            # each file contains 15s (300 rows) of 8 channel signals 
#             print("            data = ", np.shape(examples_to_format))
            
            examples_formatted = format_examples(examples_to_format,
                                     window_size=window_size,
                                     size_non_overlap=size_non_overlap)
#             print("            formated = ", np.shape(examples_formatted))

            examples.extend(examples_formatted)
            labels.extend(np.ones(len(examples_formatted)) * gesture_index)
            
#         print("   SHAPE SESSION ", cycle, " EXAMPLES: ", np.shape(examples))
        examples_training.append(examples)
        labels_training.append(labels)
#         print("   SHAPE ALL SESSION EXAMPLES: ", np.shape(examples_training))  

    return examples_training, labels_training

In [14]:
path = data_dir
store_path = processed_data_dir
number_of_cycles = trial_num
number_of_gestures = mov_num
window_size = win_size
size_non_overlap = win_inc

In [15]:
# load one participant for now
for index_participant in range(1,2):
    # load one participant data 
    folder_participant = "sub" + str(index_participant)
    examples_participant_training_sessions, labels_participant_training_sessions = [], []
    for days_of_current_session in sessions_idx:
        print("process data in days ", days_of_current_session)
        examples_per_session, labels_per_session = [], []
        for day_num in days_of_current_session:
            path_folder_examples = path + "/" + folder_participant + "/day" + str(day_num)
#             print("current dir = ", day_num)
            
            examples_training, labels_training  = \
                read_files_to_format_training_session(path_folder_examples=path_folder_examples,
                                                      day_num = day_num,
                                                      number_of_cycles=number_of_cycles,
                                                      number_of_gestures=number_of_gestures,
                                                      window_size=window_size,
                                                      size_non_overlap=size_non_overlap)
            examples_per_session.extend(examples_training)
            labels_per_session.extend(labels_training)
        examples_participant_training_sessions.append(examples_per_session)
        labels_participant_training_sessions.append(labels_per_session)
        print("@ traning sessions = ", np.shape(examples_participant_training_sessions))

process data in days  [1, 4, 9, 10, 12, 13, 19, 26, 27, 29]
@ traning sessions =  (1, 40, 572, 50, 8)
process data in days  [2, 5, 6, 16, 17, 18, 22, 24, 25, 28]
@ traning sessions =  (2, 40, 572, 50, 8)
process data in days  [3, 7, 8, 11, 14, 15, 20, 21, 23, 30]
@ traning sessions =  (3, 40, 572, 50, 8)


In [16]:
# store processed data to dictionary

# participants_num x sessions_num(3) x days_per_session(10)*trail_per_day(4) x #examples_window*#mov(26*22=572) x window_size x channel_num
print('traning examples ', np.shape(examples_participant_training_sessions))
examples_training_sessions_datasets = []
examples_training_sessions_datasets.append(examples_participant_training_sessions)
print('all traning examples ', np.shape(examples_training_sessions_datasets))

# participants_num x sessions_num(3) x days_per_session(10)*trail_per_day(4) x #examples_window*#mov(26*22=572)
print('traning labels ', np.shape(labels_participant_training_sessions))
labels_training_sessions_datasets = []
labels_training_sessions_datasets.append(labels_participant_training_sessions)
print('all traning labels ', np.shape(labels_training_sessions_datasets))

dataset_dictionnary = {"examples_training": np.array(examples_training_sessions_datasets, dtype=object),
                       "labels_training": np.array(labels_training_sessions_datasets, dtype=object)}

traning examples  (3, 40, 572, 50, 8)
all traning examples  (1, 3, 40, 572, 50, 8)
traning labels  (3, 40, 572)
all traning labels  (1, 3, 40, 572)


In [17]:
# store dictionary to pickle
training_session_dataset_dictionnary = {}
training_session_dataset_dictionnary["examples_training"] = dataset_dictionnary["examples_training"]
training_session_dataset_dictionnary["labels_training"] = dataset_dictionnary["labels_training"]

with open(store_path + "raining_session.pickle", 'wb') as f:
    pickle.dump(training_session_dataset_dictionnary, f, pickle.HIGHEST_PROTOCOL)

In [18]:
# check stored pickle 
with open(store_path + "raining_session.pickle", 'rb') as f:
    dataset_training = pickle.load(file=f)

examples_datasets_train = dataset_training['examples_training']
print('traning examples ', np.shape(examples_datasets_train))
labels_datasets_train = dataset_training['labels_training']
print('traning labels ', np.shape(labels_datasets_train))

traning examples  (1, 3, 40, 572, 50, 8)
traning labels  (1, 3, 40, 572)
