# Formatting Functions

In [1]:
import scipy.io as sio
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
from skimage.feature import hog
import matplotlib.pyplot as plt
from sklearn.svm import SVC
import os
import scipy.sparse as sp
import scipy.io as sio
import time
import cuml
from cuml.svm import SVC as cuSVC
import cudf
import scipy.stats
import librosa
import cupy as cp
import h5py
import gc
from scipy.stats import mode
import string

def rolling_window(arr, window_len, step):

    num_windows = (len(arr) - window_len) // step + 1
    windows = np.zeros((num_windows, window_len), dtype=arr.dtype)

    for i in range(num_windows):
        start = i * step
        end = start + window_len
        windows[i] = arr[start:end]

    return windows

def rolling_window_electrodes(arr, window_len, step):

    num_windows = (arr.shape[1] - window_len) // step + 1
    windows = np.zeros((arr.shape[0], num_windows, window_len), dtype=arr.dtype)

    for i in range(num_windows):
      start = i * step
      end = start + window_len
      windows[:, i] = arr[:, start:end]  # Slice along columns

    return windows

def one_hot_encoder(labels, gestures):

        one_hot = np.zeros((len(labels), gestures))

        for index, value in enumerate(labels):
            label_encode = int(value)
            one_hot[index][label_encode] = 1

        return one_hot


# To run

In [2]:
import numpy as np
from google.colab import drive

# ONLY DB1 evaulated here
database = 'DB1'

# CHANGE AS APPRORIATE
evaluation = 'STFT'

# ok, so E2 and E3 adjusted means to adjust them gesture back to 1-N gestures, otherwise it is say 18-40 but depending on what gesture set went first
data_dict = {
        'DB1': {'E1': 13, 'E2': 18, 'E3': 24, 'E1_adjusted': 0, 'E2_adjusted': 0, 'E3_adjusted': 0, 'fs': 100, 'electrodes': 10, 'subjects': 27, 'train': [1, 3, 4, 6, 7, 8, 9], 'test': [2, 5, 10], 'window length': 20, 'step': 1},
        'DB2': {'E1': 18, 'E2': 24, 'E3': 10, 'E1_adjusted': 0, 'E2_adjusted': -17, 'E3_adjusted': -40, 'fs': 100, 'electrodes': 12, 'subjects': 40, 'train': [1, 3, 4, 6], 'test': [2, 5], 'window length': 20, 'step': 1},
        'DB3': {'E1': 18, 'E2': 24, 'E3': 10, 'E1_adjusted': 0, 'E2_adjusted': -17, 'E3_adjusted': -40, 'fs': 200, 'electrodes': 12, 'subjects': 11, 'train': [1, 3, 4, 6], 'test': [2, 5], 'window length': 40, 'step': 2},
        'DB4': {'E1': 13, 'E2': 18, 'E3': 24, 'E1_adjusted': 0, 'E2_adjusted': 0, 'E3_adjusted': 0,'fs': 200, 'electrodes': 12, 'subjects': 10, 'train': [1, 3, 4, 6], 'test': [2, 5], 'window length': 40, 'step': 2},
        'DB5': {'E1': 13, 'E2': 18, 'E3': 24, 'E1_adjusted': 0, 'E2_adjusted': 0, 'E3_adjusted': 0,'fs': 200, 'electrodes': 16, 'subjects': 10, 'train': [1, 3, 4, 6], 'test': [2, 5], 'window length': 40, 'step': 2}
        }

num_subjects = data_dict[database]['subjects']
fs = data_dict[database]['fs']
num_electrodes = data_dict[database]['electrodes']

train_trials =  data_dict[database]['train']
test_trials = data_dict[database]['test']
M, step = data_dict[database]['window length'], data_dict[database]['step']
num_freq_bins = int((fs / 2) / (1 / (1/fs * M)))
freq_bins = np.linspace(0, fs/2, num_freq_bins)

drive.mount('/content/drive')


Mounted at /content/drive


In [3]:
# javasript to simulate button click
%%javascript
function ClickConnect(){
    console.log("Clicked on connect button");
    document.querySelector("colab-connect-button").click()
}
setInterval(ClickConnect,60000)

<IPython.core.display.Javascript object>

# Training

In [4]:
import gc
from sklearn.metrics import accuracy_score
import seaborn as sns
import scipy.sparse as sp

tot_num_gestures = data_dict[database]['E1'] + data_dict[database]['E2'] + data_dict[database]['E3'] - 2
combined_cm = np.zeros((tot_num_gestures, tot_num_gestures), dtype=int)
accuracy_dict = {'E1': [], 'E2': [], 'E3': []}

for exercise in ['E1', 'E2', 'E3']:

  label_dict = {f'S{num}': [] for num in range(1, num_subjects+1)}
  features_dict = {f'S{num}': [] for num in range(1, num_subjects+1)}
  num_gestures = data_dict[database][exercise]

  # for all subjects
  for subject in range(1,(num_subjects+1)):
      gc.collect()

      # load EMG data
      file = sio.loadmat(f'/content/drive/My Drive/uni/{database}/Electrode Data/S{subject}_A1_{exercise}.mat')
      label = file['restimulus'].flatten()
      trials = np.int8(file['rerepetition']).flatten()


      windowed_trials = rolling_window(trials, M, step)
      trials_arr = [np.max(arr) for arr in windowed_trials]

      train_feature_path = f'/content/drive/My Drive/uni/{database}/Features_down/DATA2_{exercise}_S{subject}_features.h5'
      with h5py.File(train_feature_path, 'r') as feature_file:

          mav = feature_file['MAV'][:]
          mavs = feature_file['MAVS'][:]
          wap = feature_file['WAP'][:]
          zcr = feature_file['ZC'][:]
          ar1 = feature_file['ar1'][:]
          ar2 = feature_file['ar2'][:]
          ar3 = feature_file['ar3'][:]
          ar4 = feature_file['ar4'][:]
          wl = feature_file['WL'][:]
          ssc = feature_file['SSC'][:]
          var = feature_file['VAR'][:]
          iemg = feature_file['IEMG'][:]
          rms = feature_file['RMS'][:]

      if evaluation == 'HHT':
          hht_train_path = f'/content/drive/My Drive/uni/{database}/HHT/{exercise}_S{subject}_hht.h5'
      elif evaluation == 'STFT':
          hht_train_path = f'/content/drive/My Drive/uni/{database}/STFT/{exercise}_S{subject}_stft.h5'

      with h5py.File(hht_train_path, 'r') as hht_file:
          mean_freq = hht_file['mean freq'][:]
          skew_freq = hht_file['skew freq'][:]
          psr = hht_file['psr'][:]
          #train_imfs = hht_file['num imfs'][:]
          peak_freq = hht_file['peak freq'][:]
          mean_power = hht_file['mean power'][:]
          kurt_freq = hht_file['kurt freq'][:]
          var_freq = hht_file['var freq'][:]

      if evaluation == 'HHT':
          mean_power = np.squeeze(mean_power)
          mean_freq = np.squeeze(mean_freq)
          psr = np.squeeze(psr)
          mean_power = np.squeeze(mean_power)
          mean_freq = np.squeeze(mean_freq)
          psr = np.squeeze(psr)

      # feature set 1
      #features = np.concatenate([mean_freq.T, psr.T, wl.T], axis=1)

      # feature set 2
      #features = np.concatenate([mean_power.T, wl.T], axis=1)

      # feature set 3
      #features = np.concatenate([mean_power.T, wl.T, mav.T], axis=1)

      # feature set 4
      #features = np.concatenate([iemg.T, var.T, wap.T, wl.T, ssc.T, zcr.T, mean_power.T], axis=1)

      # feature set 5
      #features = np.concatenate([iemg.T, var.T, wap.T, wl.T, ssc.T, zcr.T], axis=1)

      # feature set 6
      #features = np.concatenate([mav.T, wl.T, ssc.T, zcr.T], axis=1)

      # feature set 7
      #features = np.concatenate([mav.T, wl.T, ssc.T, zcr.T, mean_power.T], axis=1)

      # feature set 8
      features = np.concatenate([mav.T, mavs.T,  wap.T, zcr.T, ar1.T, ar2.T, ar3.T, ar4.T, wl.T, mean_freq.T, psr.T], axis=1)

      # window labels
      label_arr = rolling_window(label, M, step)
      label_arr = np.array([np.max(arr) for arr in label_arr])

      # split training / test trials
      train_index = []
      test_index = []
      for index, val in enumerate(trials):
        if val in train_trials:
          train_index.append(index)
        elif val in test_trials:
          test_index.append(index)

      train_features = features[train_index, :]
      train_label_one = label_arr[train_index]

      test_features = features[test_index, :]
      test_label_one = label_arr[test_index]

      print(train_features.shape, test_features.shape)

      # adjust gestures back to 1-N - it's a list
      print("fitting model now")

      # SVM
      X_cudf = cudf.DataFrame(train_features, dtype=np.float32)
      y_cudf = cudf.Series(train_label_one, dtype=np.float32)

      clf = cuSVC(kernel='rbf', C=5.0)
      clf.fit(X_cudf, y_cudf)
      x_test = cudf.DataFrame(test_features, dtype=np.float32)
      y_test = cudf.Series(test_label_one, dtype=np.float32)

      # SVM predicition
      label_prediction = clf.predict(x_test)
      accuracy = accuracy_score(y_test.to_numpy(), label_prediction.to_numpy())
      print(f"{exercise}, S{subject} Accuracy: {(accuracy*100):.2f}")

      accuracy_dict[exercise].append(accuracy*100)
      del clf

for key in accuracy_dict.keys():
  print(f'Average accuracy for {key}: {np.mean(accuracy_dict[key])}, and std: {np.std(accuracy_dict[key], ddof=1)}')
  print(f'num elements {len(accuracy_dict[key])}')

ye = []
for val in accuracy_dict.values():
  ye.extend(val)
# print overall accuracy and sample standard deviation
print(f'Overall average: {np.mean(ye)} and std: {np.std(ye, ddof=1)}')


(26052, 110) (11648, 110)
fitting model now
E1, S1 Accuracy: 67.27
(28780, 110) (13011, 110)
fitting model now
E1, S2 Accuracy: 59.90
(26997, 110) (11277, 110)
fitting model now


KeyboardInterrupt: 