# Ninapro DB2 Dataset Preparation Pipeline

In [1]:
import pandas as pd
import numpy as np
from scipy.io import loadmat
from sklearn.preprocessing import OneHotEncoder
from tqdm.notebook import tqdm

In [2]:
path = "Ninapro_DB2"

In [3]:
subject_info = pd.read_csv(path+"/Ninapro_DB2_SubjectsInformation.csv")
subject_info

Unnamed: 0,Title,Hand,Laterality,Gender,Age,Height,Weight,zip files
0,1,Intact,Right Handed,Male,29,187,75,DB2_s1.zip
1,2,Intact,Right Handed,Male,29,183,75,DB2_s2.zip
2,3,Intact,Right Handed,Male,31,174,69,DB2_s3.zip
3,4,Intact,Left Handed,Female,30,154,50,DB2_s4.zip
4,5,Intact,Right Handed,Male,25,175,70,DB2_s5.zip
5,6,Intact,Right Handed,Male,35,172,79,DB2_s6.zip
6,7,Intact,Right Handed,Male,27,187,92,DB2_s7.zip
7,8,Intact,Right Handed,Male,45,173,73,DB2_s8.zip
8,9,Intact,Right Handed,Male,23,172,63,DB2_s9.zip
9,10,Intact,Right Handed,Male,34,173,84,DB2_s10.zip


# Ninapro Help Functions

In [4]:
def read_mat(file):
    mat = loadmat(file)
    inclin = mat['inclin']
    inclin_x = inclin[:,0]
    inclin_y = inclin[:,1]
    stimulus = mat['stimulus']
    repetition = mat['repetition']
    emg = mat['emg']
    emg_col = []
    for i in range(1,13):
        emg_col.append('emg'+str(i))
    df = pd.DataFrame(columns=['repetition','inclin_x','inclin_y']+emg_col+['stimulus'])
    df['repetition'] = mat['repetition'].reshape((mat['repetition'].shape[0],))
    df['stimulus'] = stimulus
    df['inclin_x'] = inclin_x
    df['inclin_y'] = inclin_y
    df[emg_col] = mat['emg']
    return df

def one_hot(cat_idx,y_all):
    y_hot = []
    for y in y_all:
        if y in cat_idx:
            idx = list(cat_idx).index(y)
            hot = np.zeros(len(cat_idx))
            hot[idx] = 1
            y_hot.append(hot)
    return np.array(y_hot)

def extract_move(df,move,with_rest=True):
    indexes = []
    rep_dfs = []
    ois = []
    s_rate = 2150
    one_rep_steps = (5+3)*s_rate
    if with_rest==False:
        one_rep_steps = 5*s_rate
    stim = df['stimulus']
    rep = df['repetition']
    i = 0
    for k in range(6):
#         print(k)
        while True:
            if(stim[i]==move and rep[i]==k+1):
                ois.append(i)
#                 print(i)
                i = i+one_rep_steps
                break
            i=i+1
#     print(ois)
    
    for idx in ois:
        rep_dfs.append(df.loc[idx:idx+one_rep_steps])
        
    all_rep_df = rep_dfs[0]
    
    for d in rep_dfs[1:]:
        all_rep_df = all_rep_df.append(d)
    all_rep_df = all_rep_df.reset_index()
    
    if with_rest==True:
        for i in range(all_rep_df.shape[0]):
            if(all_rep_df['stimulus'][i]!=move and all_rep_df['stimulus'][i]!=0):
                all_rep_df.drop(i)
                print('Non Move Found ',all_rep_df['stimulus'][i])
    if with_rest==False:
            if(all_rep_df['stimulus'][i]!=move):
                all_rep_df.drop(i)
                print('Non Move Found ',all_rep_df['stimulus'][i])
                
    return all_rep_df

def extract_multi_moves(file,moves,with_rest=True):
    df = read_mat(file)
    all_moves_df_list = []
    for m in moves:
        all_moves_df_list.append(extract_move(df,m,with_rest))
    all_moves_df = all_moves_df_list[0]
    for d in all_moves_df_list[1:]:
        all_moves_df = all_moves_df.append(d)
    all_moves_df = all_moves_df.reset_index()
    return all_moves_df

def extract_subject_e1(subject,path,moves,with_rest=True):
    file = path + '/S'+str(subject)+'_E1_A1.mat'
    return extract_multi_moves(file,moves,with_rest)

def extracts_subjects_e1(subjects,path,moves,with_rest=True):
    all_df = extract_subject_e1(subjects[0],path,moves,with_rest)
    for s in subjects[1:]:
        all_df = all_df.append(extract_subject_e1(s,path,moves,with_rest))
    return all_df
    
pd.set_option("display.max_rows", None, "display.max_columns", None)

## Rest/Sup/Pro Data Prep

In [192]:
prosup = extracts_subjects_e1([1,2,3,4],path,[9,10,11,12])

In [193]:
prosup.head()

Unnamed: 0,level_0,index,repetition,inclin_x,inclin_y,emg1,emg2,emg3,emg4,emg5,emg6,emg7,emg8,emg9,emg10,emg11,emg12,stimulus
0,0,859880,1,60.36375,-2.86875,4e-06,3.576222e-07,-1.224552e-06,9.989841e-07,-4.281166e-07,-7.940087e-07,-1.2e-05,-7e-06,2.541723e-06,-2.8e-05,6e-06,3e-06,9
1,1,859881,1,60.365036,-2.870037,3e-06,-1.458975e-07,-1.56025e-06,4.956104e-07,1.211436e-07,-2.304278e-06,-1.1e-05,-7e-06,6.954194e-07,-2.8e-05,5e-06,2e-06,9
2,2,859882,1,60.368088,-2.873087,3e-06,8.612258e-07,1.182418e-07,1.502881e-06,2.178858e-08,-2.303903e-06,-1.1e-05,-6e-06,-9.830344e-07,-2.5e-05,5e-06,3e-06,9
3,3,859883,1,60.371136,-2.876137,4e-06,8.61253e-07,2.300282e-06,3.852945e-06,-2.602581e-07,-1.296432e-06,-1.1e-05,-3e-06,-1.990092e-06,-1.7e-05,4e-06,3e-06,9
4,4,859884,1,60.374187,-2.879187,6e-06,1.898822e-07,1.628885e-06,5.86731e-06,-3.82263e-07,4.673569e-08,-1.2e-05,-2e-06,-9.829571e-07,-4e-06,2e-06,4e-06,9


In [194]:
labels = prosup['stimulus']
n_labels = []
for l in labels:
    if(l==0):
        n_labels.append(0)
    else:
        if(l==9 or l==11):
            n_labels.append(1)
        elif(l==10 or l==12):
            n_labels.append(2)
prosup['labels'] = n_labels
prosup['t[s]'] = prosup['index']
columns = ['t[s]','emg1','emg2','emg3','emg4','emg5','emg6','emg7','emg8','labels']
prosup[columns].to_csv('prepared_data/restprosup_nina_raw.csv',index=False)