In [535]:
import os
import scipy.io
from scipy import signal
import statistics as st
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.decomposition import PCA
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import KFold, cross_val_score,cross_validate

In [536]:
data_dir = "Dataset/DB1_s2/S2_A1_E3.mat"
mat = scipy.io.loadmat(data_dir)

In [537]:
emg = mat['emg']
stimulus = mat['stimulus']

In [538]:
labels_E3 = [
    (0,"Rest",""),
    (1,"Large diameter grasp",""),
    (2,"Small diameter grasp (power grip)", ""),
    (3,"Fixed hook grasp",""),
    (4,"Index finger extension grasp",""),
    (5,"Medium wrap","X"),
    (6,"Ring grasp",""),
    (7,"Prismatic four fingers grasp",""),
    (8,"Stick grasp",""),
    (9,"Writing tripod grasp","X"),
    (10,"Power sphere grasp",""),
    (11,"Three finger sphere grasp",""),
    (12,"Precision sphere grasp",""),
    (13,"Tripod grasp",""),
    (14,"Prismatic pinch grasp",""),
    (15,"Tip pinch grasp",""),
    (16,"Quadpod grasp",""),
    (17,"Lateral grasp",""),
    (18,"Parallel extension grasp",""),
    (19,"Extension type grasp",""),
    (20,"Power disk grasp",""),
    (21,"Open a bottle with a tripod grasp","X"),
    (22,"Turn a screw (grasp the screwdriver with a stick grasp",""),
    (23,"Cut something (grasp the knife with an index finger extension grasp)","X")
]

In [539]:
df = pd.DataFrame({'values': stimulus.reshape(-1)})
counts = df['values'].value_counts()

df_count = pd.DataFrame({'Nome': counts.index, 'Contagem': counts.values})

df_sorted = df_count.sort_values('Nome')

In [540]:
dict_data = {'Index': [x[0] for x in labels_E3], 'Exercício C (3)': [x[1] for x in labels_E3], 'Selecionados': [x[2] for x in labels_E3], 'Quantidade': df_sorted['Contagem']}
df = pd.DataFrame(dict_data)

In [541]:
pd.set_option('max_colwidth', 1000)
style = df.style.set_properties(**{'text-align': 'left'}).set_table_styles([{'selector': 'th', 'props': [('text-align', 'left')]}]).format_index(lambda v: "")
display(style)

Unnamed: 0,Index,Exercício C (3),Selecionados,Quantidade
,0,Rest,,110312
,1,Large diameter grasp,,5138
,2,Small diameter grasp (power grip),,5176
,3,Fixed hook grasp,,5136
,4,Index finger extension grasp,,5165
,5,Medium wrap,X,5147
,6,Ring grasp,,5134
,7,Prismatic four fingers grasp,,5169
,8,Stick grasp,,5178
,9,Writing tripod grasp,X,5176


In [542]:
col_names = ['Electrode 1', 'Electrode 2', 'Electrode 3', 'Electrode 4', 'Electrode 5', 'Electrode 6', 'Electrode 7', 'Electrode 8', 'Electrode 9', 'Electrode 10']
df = pd.DataFrame(emg, columns=col_names)
df['Stimulus'] = stimulus
display(df)

Unnamed: 0,Electrode 1,Electrode 2,Electrode 3,Electrode 4,Electrode 5,Electrode 6,Electrode 7,Electrode 8,Electrode 9,Electrode 10,Stimulus
0,0.0977,0.1758,0.0146,0.0049,0.0024,0.0024,0.0024,0.0928,0.0024,0.1636,0
1,0.1099,0.2319,0.0171,0.0024,0.0024,0.0024,0.0024,0.0952,0.0024,0.1514,0
2,0.1147,0.2832,0.0195,0.0024,0.0024,0.0024,0.0024,0.0879,0.0024,0.1294,0
3,0.1172,0.2856,0.0146,0.0049,0.0024,0.0024,0.0024,0.0781,0.0024,0.1050,0
4,0.1196,0.2856,0.0171,0.0024,0.0024,0.0024,0.0024,0.0684,0.0024,0.0879,0
...,...,...,...,...,...,...,...,...,...,...,...
229079,0.1440,0.3955,0.0293,0.0024,0.0024,0.0024,0.0024,0.0098,0.0024,0.0024,0
229080,0.1392,0.3809,0.0244,0.0024,0.0024,0.0024,0.0024,0.0073,0.0024,0.0024,0
229081,0.1465,0.3955,0.0220,0.0049,0.0024,0.0024,0.0024,0.0098,0.0024,0.0024,0
229082,0.1367,0.3662,0.0146,0.0024,0.0024,0.0024,0.0024,0.0098,0.0024,0.0024,0


In [543]:
filtered_df = df[df['Stimulus'].isin([4,8,21,23])]

In [544]:
filtered_df

Unnamed: 0,Electrode 1,Electrode 2,Electrode 3,Electrode 4,Electrode 5,Electrode 6,Electrode 7,Electrode 8,Electrode 9,Electrode 10,Stimulus
29408,0.2954,0.5469,0.0635,0.0024,0.0024,0.0024,0.0024,0.0366,0.0024,0.0024,4
29409,0.2759,0.5469,0.0610,0.0024,0.0024,0.0024,0.0024,0.0342,0.0024,0.0024,4
29410,0.2734,0.6348,0.0708,0.0024,0.0024,0.0024,0.0024,0.0391,0.0024,0.0024,4
29411,0.2661,0.6567,0.0732,0.0024,0.0024,0.0024,0.0024,0.0415,0.0024,0.0024,4
29412,0.2881,0.6372,0.0781,0.0024,0.0024,0.0024,0.0024,0.0415,0.0024,0.0024,4
...,...,...,...,...,...,...,...,...,...,...,...
228753,1.1304,0.1831,0.0293,0.0903,0.0317,0.0464,0.1074,0.7153,0.1636,0.2637,23
228754,1.0669,0.1636,0.0244,0.0708,0.0269,0.0415,0.1392,0.6934,0.1294,0.3345,23
228755,0.9741,0.1392,0.0171,0.0537,0.0171,0.0366,0.1611,0.6445,0.0977,0.4150,23
228756,0.8789,0.1099,0.0098,0.0366,0.0073,0.0293,0.1880,0.6079,0.0708,0.5029,23


In [545]:
# Definir frequência de amostragem do sinal (assumindo que é igual para todos os eletrodos)
fs = 100
window_size = 200  # em ms
overlap = 0.5  # 50% de overlap

window_samples = int(window_size * fs / 1000)
overlap_samples = int(window_samples * overlap)

In [546]:
window_starts = np.arange(0, len(filtered_df) - window_samples, overlap_samples)
window_starts

array([    0,    10,    20, ..., 20620, 20630, 20640])

In [547]:
windows = [filtered_df.iloc[i:i+window_samples] for i in window_starts]

In [549]:
rms_amplitude = np.sqrt(np.mean(np.square(windows), axis=1))

In [550]:
for i in range(len(rms_amplitude)):
    rms_amplitude[i][-1] = st.mode(windows[i]['Stimulus'])

In [551]:
col_names = ['Electrode 1', 'Electrode 2', 'Electrode 3', 'Electrode 4', 'Electrode 5', 'Electrode 6', 'Electrode 7', 'Electrode 8', 'Electrode 9', 'Electrode 10', 'Stimulus']
df = pd.DataFrame(rms_amplitude, columns=col_names)
df['Stimulus'] = df['Stimulus'].astype(int)

In [552]:
df

Unnamed: 0,Electrode 1,Electrode 2,Electrode 3,Electrode 4,Electrode 5,Electrode 6,Electrode 7,Electrode 8,Electrode 9,Electrode 10,Stimulus
0,0.292768,0.702768,0.074078,0.002400,0.002400,0.002400,0.002400,0.035068,0.002400,0.002400,4
1,0.303104,0.781821,0.072701,0.002400,0.002400,0.002400,0.002400,0.037080,0.002400,0.002400,4
2,0.320383,0.777988,0.078659,0.002400,0.002400,0.002400,0.003205,0.043490,0.002400,0.002400,4
3,0.327390,0.759015,0.074595,0.002400,0.002400,0.002400,0.005345,0.049486,0.002400,0.002400,4
4,0.313659,0.715810,0.058101,0.002400,0.002583,0.002400,0.005087,0.051509,0.002400,0.002583,4
...,...,...,...,...,...,...,...,...,...,...,...
2060,1.403181,0.625892,0.200507,0.086042,0.039411,0.038174,0.145815,0.743947,0.348754,0.540981,23
2061,1.128202,0.615232,0.144349,0.054505,0.041073,0.014084,0.051262,0.466063,0.246385,0.291739,23
2062,0.933705,0.536881,0.075672,0.019235,0.030662,0.002400,0.010067,0.262457,0.165188,0.223429,23
2063,0.889268,0.384560,0.184791,0.144383,0.023323,0.008156,0.024390,0.301682,0.441135,0.242161,23


In [553]:
X = df.iloc[:, :-1].values
Y = df.iloc[:, -1].values

In [554]:
# normalizar os dados para ter média 0 e desvio padrão 1
X = (X - X.mean(axis=0)) / X.std(axis=0)
pca = PCA(n_components=2)
pca.fit(X)
X_pca = pca.transform(X)

# imprimir a proporção da variância explicada pelos componentes principais
print(pca.explained_variance_ratio_)

# obter os pesos dos eletrodos nos componentes principais
eletrodos_pca = pca.components_

print(eletrodos_pca)

[0.69434316 0.13050357]
[[ 0.32059405  0.33876404  0.31513587  0.29973327  0.20269624  0.34597552
   0.33586196  0.33510369  0.30420509  0.33894605]
 [-0.3268467  -0.16696231 -0.14883701  0.46776398  0.68456651  0.07857945
  -0.15164933 -0.13251182  0.24188502 -0.22465066]]


In [555]:
best_elet = [np.argmax(best_feat)+1 for best_feat in eletrodos_pca]
best_elet.sort()

In [556]:
def cross_validation(model, _X, _y, _cv=4):
    _scoring = ['accuracy', 'precision', 'recall', 'f1']
    results = cross_validate(estimator=model,
                             X=_X,
                             y=_y,
                             cv=_cv,
                             scoring=_scoring,
                             return_train_score=True)

    return {"Training Accuracy scores": results['train_accuracy'],
            "Mean Training Accuracy": results['train_accuracy'].mean()*100,
            "Training Precision scores": results['train_precision'],
            "Mean Training Precision": results['train_precision'].mean(),
            "Training Recall scores": results['train_recall'],
            "Mean Training Recall": results['train_recall'].mean(),
            "Training F1 scores": results['train_f1'],
            "Mean Training F1 Score": results['train_f1'].mean(),
            "Validation Accuracy scores": results['test_accuracy'],
            "Mean Validation Accuracy": results['test_accuracy'].mean()*100,
            "Validation Precision scores": results['test_precision'],
            "Mean Validation Precision": results['test_precision'].mean(),
            "Validation Recall scores": results['test_recall'],
            "Mean Validation Recall": results['test_recall'].mean(),
            "Validation F1 scores": results['test_f1'],
            "Mean Validation F1 Score": results['test_f1'].mean()
            }


In [557]:
def plot_result(x_label, y_label, plot_title, train_data, val_data):
        '''Function to plot a grouped bar chart showing the training and validation
          results of the ML model in each fold after applying K-fold cross-validation.
         Parameters
         ----------
         x_label: str, 
            Name of the algorithm used for training e.g 'Decision Tree'
          
         y_label: str, 
            Name of metric being visualized e.g 'Accuracy'
         plot_title: str, 
            This is the title of the plot e.g 'Accuracy Plot'
         
         train_result: list, array
            This is the list containing either training precision, accuracy, or f1 score.
        
         val_result: list, array
            This is the list containing either validation precision, accuracy, or f1 score.
         Returns
         -------
         The function returns a Grouped Barchart showing the training and validation result
         in each fold.
        '''
        
        # Set size of plot
        plt.figure(figsize=(12,6))
        labels = ["1st Fold", "2nd Fold", "3rd Fold", "4th Fold", "5th Fold"]
        X_axis = np.arange(len(labels))
        ax = plt.gca()
        plt.ylim(0.40000, 1)
        plt.bar(X_axis-0.2, train_data, 0.4, color='blue', label='Training')
        plt.bar(X_axis+0.2, val_data, 0.4, color='red', label='Validation')
        plt.title(plot_title, fontsize=30)
        plt.xticks(X_axis, labels)
        plt.xlabel(x_label, fontsize=14)
        plt.ylabel(y_label, fontsize=14)
        plt.legend()
        plt.grid(True)
        plt.show()

In [558]:
X = df[[f'Electrode {i}' for i in best_elet]].values
y = df.iloc[:, -1].values

In [559]:
label_encoder = LabelEncoder()
encoded_y = label_encoder.fit_transform(y)

label_encoder_name_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))

In [560]:
print("Mapping of Label Encoded Classes", label_encoder_name_mapping, sep="\n")
print("Label Encoded Target Variable", encoded_y, sep="\n")

Mapping of Label Encoded Classes
{4: 0, 8: 1, 21: 2, 23: 3}
Label Encoded Target Variable
[0 0 0 ... 3 3 3]


In [570]:
kf = KFold(n_splits=4, shuffle=True, random_state=42)
model = xgb.XGBClassifier(n_estimators=300, max_depth=10, learning_rate=0.01)
scores = cross_val_score(model, X, encoded_y, cv=kf, scoring='accuracy')

print(f"Accuracy: {np.mean(scores):.2f} (+/- {np.std(scores):.2f})")

Accuracy: 0.52 (+/- 0.01)
