In [1]:
import sys,os,json
import collections,math
import time,datetime,pytz
import numpy as np
import pandas as pd
import pickle
from scipy import interpolate, io
from scipy.io import loadmat
import matplotlib.pyplot as plt
from collections import Counter
from matplotlib.colors import ListedColormap
import seaborn as sns
import operator
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

############################################
########## Plot Style Declaration ##########
# Set the style globally
# Alternatives include bmh, fivethirtyeight, ggplot,
# dark_background, seaborn-deep, etc
# plt.style.use('ggplot')
plt.style.use('seaborn-white')

plt.rcParams['font.family'] = 'times new roman'
# plt.rcParams['font.serif'] = 'Ubuntu'
# plt.rcParams['font.monospace'] = 'Ubuntu Mono'
plt.rcParams['font.size'] = 15
plt.rcParams['axes.labelsize'] = 15
plt.rcParams['axes.labelweight'] = 'bold'
plt.rcParams['axes.titlesize'] = 15
plt.rcParams['xtick.labelsize'] = 15
plt.rcParams['ytick.labelsize'] = 15
plt.rcParams['legend.fontsize'] = 14
plt.rcParams['figure.titlesize'] = 15
plt.rcParams['lines.linewidth'] = 3
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42

##################################
########## End of Setup ##########

## Resample the DB2 from 2000Hz to 200Hz with total 8 features

In [2]:
def downSampleEmg(X,y,prev_freq,new_freq):
    X_new = []
    y_new = []
    curr_label = y[0]
    
    cnt = 0
    start_i = 0
    
    for i in range(len(y)):
        # when we encounter the new label different from the current label,
        # we initialize cnt and start_i, which is the same as
        # (1) drop the last few samples of the current label,
        # (2) start linear interpolation from the beginning of the new label.
        if y[i] != curr_label:
            curr_label = y[i]
            cnt = 1
            start_i = i
            continue
            
        if cnt % prev_freq == 0 and i != 0:
            # perform linear interpolation
            f = interpolate.interp1d(x=np.arange(prev_freq), y=X[start_i+cnt-prev_freq:start_i+cnt], axis=0)
            timeNew = np.linspace(0.0, prev_freq-1, new_freq)
            eleListNew = f(timeNew)

            # add interpolated data, eleListNew, to X_new
            X_new.extend(eleListNew)
            y_new.extend([curr_label]*new_freq)
        cnt+=1
    
    return X_new, y_new

In [3]:
path = 'data/emg/ninapro/db2/'
target_gest_set = {18,21,22,25,26,27,28}
prev_freq = 2000
new_freq = 200
for subject in range(40):
    X_tot = []
    y_tot = []
    # data loading
    for e in [1, 2]:
        data = io.loadmat(path+"DB2_s"+str(subject+1)+"/"+"S"+str(subject+1) + "_E" + str(e) + "_A1.mat")
        # remove gestures other than target_gest_set
        target_idx = []
        for i in range(data['restimulus'].shape[0]):
            if data['restimulus'][i,0] in target_gest_set:
                target_idx.append(i)
        X = data['emg'][target_idx,:]
        y = data['restimulus'][target_idx,0]
        X_tot.extend(X)
        y_tot.extend(y)

    # resample = downsample from prev_freq=2000hz to new_freq=200hz
    X_new, y_new = downSampleEmg(X=X_tot, y=y_tot, prev_freq=int(prev_freq/10), new_freq=int(new_freq/10))
    # normalize to zero mean and unit variance
    X_new = StandardScaler().fit_transform(X_new)
    # X_new Dim: [N x 1] with 2 empty columns which are [8,9] out of [0, 1, ..., 15]
    # Thus remove columns [8,9]
    saved_columns_l = [0,1,2,3,4,5,6,7]
    X_new = X_new[:,saved_columns_l]
    # save the preprocessed data
    print('subject {} y_new_Length {}  X_new describe below'.format(subject, len(y_new)))
    print('store data as mat file... ')
    io.savemat(path+"processed_c7_f8/"+"S"+str(subject+1)+"_"+str(new_freq)+"Hz.mat", {
        'emg': X_new,
        'restimulus': y_new
        })

subject 0 y_new_Length 31380  X_new describe below
store data as mat file... 
subject 1 y_new_Length 38540  X_new describe below
store data as mat file... 
subject 2 y_new_Length 47400  X_new describe below
store data as mat file... 
subject 3 y_new_Length 49720  X_new describe below
store data as mat file... 
subject 4 y_new_Length 27840  X_new describe below
store data as mat file... 
subject 5 y_new_Length 30200  X_new describe below
store data as mat file... 
subject 6 y_new_Length 48060  X_new describe below
store data as mat file... 
subject 7 y_new_Length 35860  X_new describe below
store data as mat file... 
subject 8 y_new_Length 40440  X_new describe below
store data as mat file... 
subject 9 y_new_Length 46760  X_new describe below
store data as mat file... 
subject 10 y_new_Length 52440  X_new describe below
store data as mat file... 
subject 11 y_new_Length 45940  X_new describe below
store data as mat file... 
subject 12 y_new_Length 36040  X_new describe below
store data

## Resample the DB3 from 2000Hz to 200Hz with total 8 features

In [4]:
path = 'data/emg/ninapro/db3/'
target_gest_set = {18,21,22,25,26,27,28}
prev_freq = 2000
new_freq = 200
for subject in range(11):
    X_tot = []
    y_tot = []
    # data loading
    for e in [1, 2]:
        data = io.loadmat(path+"DB3_s"+str(subject+1)+"/"+"S"+str(subject+1) + "_E" + str(e) + "_A1.mat")
        # remove gestures other than target_gest_set
        target_idx = []
        for i in range(data['restimulus'].shape[0]):
            if data['restimulus'][i,0] in target_gest_set:
                target_idx.append(i)
        X = data['emg'][target_idx,:]
        y = data['restimulus'][target_idx,0]
        X_tot.extend(X)
        y_tot.extend(y)

    # resample = downsample from prev_freq=2000hz to new_freq=200hz
    X_new, y_new = downSampleEmg(X=X_tot, y=y_tot, prev_freq=int(prev_freq/10), new_freq=int(new_freq/10))
    # normalize to zero mean and unit variance
    X_new = StandardScaler().fit_transform(X_new)
    # X_new Dim: [N x 1] with 2 empty columns which are [8,9] out of [0, 1, ..., 15]
    # Thus remove columns [8,9]
    saved_columns_l = [0,1,2,3,4,5,6,7]
    X_new = X_new[:,saved_columns_l]
    # save the preprocessed data
    print('subject {} y_new_Length {}  X_new describe below'.format(subject, len(y_new)))
    print('store data as mat file... ')
    io.savemat(path+"processed_c7_f8/"+"S"+str(subject+1)+"_"+str(new_freq)+"Hz.mat", {
        'emg': X_new,
        'restimulus': y_new
        })

subject 0 y_new_Length 38940  X_new describe below
store data as mat file... 
subject 1 y_new_Length 44680  X_new describe below
store data as mat file... 
subject 2 y_new_Length 34280  X_new describe below
store data as mat file... 
subject 3 y_new_Length 54620  X_new describe below
store data as mat file... 
subject 4 y_new_Length 45160  X_new describe below
store data as mat file... 
subject 5 y_new_Length 54240  X_new describe below
store data as mat file... 
subject 6 y_new_Length 52840  X_new describe below
store data as mat file... 
subject 7 y_new_Length 48920  X_new describe below
store data as mat file... 
subject 8 y_new_Length 31280  X_new describe below
store data as mat file... 
subject 9 y_new_Length 47640  X_new describe below
store data as mat file... 
subject 10 y_new_Length 44520  X_new describe below
store data as mat file... 
