# Organizing the Data

## Set up Label Folders

In [1]:
from pathlib import Path

import os
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np

from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
master_folder = Path("EMG_data_for_gestures-master")
output_master_folder = master_folder / "Processed_Data"

In [3]:
labels = {
    1: "1_hand_at_rest",
    2: "2_hand_clenched_in_a_fist",
    3: "3_wrist_flexion",
    4: "4_wrist_extension",
    5: "5_radial_deviations",
    6: "6_ulnar_deviations",
    7: "7_extended_palm"
}

In [4]:
# Create output folders
output_master_folder.mkdir(exist_ok=True)
for label_name in labels.values():
    (output_master_folder / label_name).mkdir(exist_ok=True)

### Cleaning up Processed Data Folders

In [34]:
import shutil

# Step to clean up output folders before processing
def clean_output_folders(output_master_folder):
    if output_master_folder.exists():
        for label in labels.values():
            label_folder = output_master_folder / label
            if label_folder.exists():
                # Remove all files in the label folder
                for file in label_folder.iterdir():
                    if file.is_file():
                        file.unlink()  # Delete file
    else:
        # Create the master folder if it doesn't exist
        output_master_folder.mkdir(parents=True)

# Clean the output folders
clean_output_folders(output_master_folder)

# Recreate empty output folders for each label
for label in labels.values():
    (output_master_folder / label).mkdir(exist_ok=True)

In [20]:
for i in range(1, 37):
    f_num = i
    subfolder = os.path.join(master_folder, str(f_num))
    if f_num < 10:
        subfolder = os.path.join(master_folder, "0" + str(f_num))

    for filename in os.listdir(subfolder):
        if filename.endswith(".txt"):
            filepath = os.path.join(subfolder, filename)
            # Step 1: Read the file
            df = pd.read_csv(filepath, delim_whitespace=True)

            # Step 2: Remove rows with class label NaN and 0
            df = df[df["class"].notna()]
            df = df[df["class"] != 0]

            # Step 3: Group data by contiguous blocks of the same label
            current_label = None
            segment_data = []
            segment_count = {label: 0 for label in labels.values()}

            for _, row in df.iterrows():
                if row["class"] != current_label:
                    # Save the previous segment if it exists
                    if segment_data:
                        segment_df = pd.DataFrame(segment_data, columns=df.columns)
                        label_name = labels[int(current_label)]
                        segment_count[label_name] += 1
                        segment_path = output_master_folder / label_name / f"subject_{i}_segment_{segment_count[label_name]}.csv"
                        segment_df.to_csv(segment_path, index=False)
                        segment_data = []
                    current_label = row["class"]

                segment_data.append(row)

            # Save the final segment
            if segment_data:
                segment_df = pd.DataFrame(segment_data, columns=df.columns)
                label_name = labels[int(current_label) - 1]
                segment_count[label_name] += 1
                segment_path = output_master_folder / label_name / f"subject_{i}_segment_{segment_count[label_name]}.csv"
                segment_df.to_csv(segment_path, index=False)

## Preprocessing Data

In [21]:
def loadData(folder):
    data = []
    labels = []
    
    for class_folder in os.listdir(folder):
        class_path = os.path.join(output_master_folder, class_folder)
        if os.path.isdir(class_path):
            class_label = int(class_folder.split("_")[0])
            for file in os.listdir(class_path):
                file_path = os.path.join(class_path, file)
                if file.endswith(".csv"):
                    trial = pd.read_csv(file_path)
                    # Drop 'time' column for now; focus on channels and class
                    data.append(trial.drop(columns=["time", "class"]).to_numpy())
                    labels.append(class_label)
    
    return data, labels

In [22]:
# Load data from processed data folder
all_trials, all_labels = loadData(output_master_folder)

In [26]:
# Remap labels to be zero-based
all_labels = [label - 1 for label in all_labels]

In [27]:
# Split trials (randomized at the trial level)
train_trials, test_trials, train_labels, test_labels = train_test_split(
    all_trials, all_labels, test_size=0.2, random_state=42
)
train_trials, val_trials, train_labels, val_labels = train_test_split(
    train_trials, train_labels, test_size=0.25, random_state=42  # 60% train, 20% val, 20% test
)

In [28]:
# Creating Sliding Windows
def create_sliding_windows(data, labels, window_size, step_size):
    X, y = [], []

    for trial, label in zip(data, labels):
        num_samples = trial.shape[0]
        for start in range(0, num_samples - window_size + 1, step_size):
            end = start + window_size
            X.append(trial[start:end])
            y.append(label)
            
    return np.array(X), np.array(y)

In [29]:
window_size = 100 # Number of timesteps per window
step_size = 50

# Create windows for training, validation, and testing
X_train, y_train = create_sliding_windows(train_trials, train_labels, window_size, step_size)
X_val, y_val = create_sliding_windows(val_trials, val_labels, window_size, step_size)
X_test, y_test = create_sliding_windows(test_trials, test_labels, window_size, step_size)

# Convert labels to one-hot encoding
num_classes = len(set(all_labels))  # Number of unique classifications
y_train = to_categorical(y_train, num_classes=num_classes)
y_val = to_categorical(y_val, num_classes=num_classes)
y_test = to_categorical(y_test, num_classes=num_classes)

# Print shapes for sanity check
print("X_train shape:", X_train.shape)  # (num_windows, window_size, num_channels)
print("y_train shape:", y_train.shape)  # (num_windows, num_classes)

X_train shape: (8609, 100, 8)
y_train shape: (8609, 7)


## Building and Training the LSTM Model

In [30]:
def build_lstm_model(input_shape, num_classes):
    model = Sequential([
        LSTM(64, return_sequences=False, input_shape=input_shape),
        Dense(32, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [31]:
input_shape = (window_size, X_train.shape[2])  # (timesteps, num_channels)
model = build_lstm_model(input_shape, num_classes)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 64)                18688     
                                                                 
 dense (Dense)               (None, 32)                2080      
                                                                 
 dense_1 (Dense)             (None, 7)                 231       
                                                                 
Total params: 20999 (82.03 KB)
Trainable params: 20999 (82.03 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [32]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=32,
    callbacks=[early_stopping]
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50


## Evaluation

In [33]:
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

Test Loss: 1.8373682498931885
Test Accuracy: 0.21227364242076874
