In [1]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.utils import class_weight
import matplotlib.pyplot as plt
import tensorflow as tf
from keras import layers, regularizers, backend as K

In [8]:
# Threading setup (must come before any TensorFlow usage)
os.environ["OMP_NUM_THREADS"] = "22"
os.environ["TF_NUM_INTRAOP_THREADS"] = "22"
os.environ["TF_NUM_INTEROP_THREADS"] = "2"
tf.config.set_visible_devices([], 'GPU')
tf.config.threading.set_intra_op_parallelism_threads(24)
tf.config.threading.set_inter_op_parallelism_threads(2)
# --- CONFIG ---
train_files = [
    "parsed_emg/subject_1/trial_12(left).csv",
    "parsed_emg/subject_1/trial_10(left).csv",
    "parsed_emg/subject_1/trial_8(left).csv",
    "parsed_emg/subject_1/trial_6(left).csv",
    "parsed_emg/subject_1/trial_4(left).csv"
]
test_file = "parsed_emg/subject_1/trial_2(left).csv"
label_col = 5
sequence_length = 40
batch_size = 64
learning_rate = 3e-4
epochs = 10
num_classes = 3

# --- LOAD + NORMALIZE + SEQUENCE ---
def load_csv(path):
    df = pd.read_csv(path, skiprows=1)
    X = df.iloc[:, 1:5].values.astype("float32") * 1_000_000  # amplify signal
    y = df.iloc[:, label_col].values.astype("int32")
    return X, y

def create_sequences(X, y, seq_len):
    X_seq, y_seq = [], []
    for i in range(len(X) - seq_len):
        X_seq.append(X[i:i + seq_len])
        y_seq.append(y[i + seq_len])
    return np.array(X_seq), np.array(y_seq)

def focal_loss(gamma=2., alpha=.25):
    def loss_fn(y_true, y_pred):
        y_true = tf.cast(y_true, tf.int32)
        y_true_one_hot = tf.one_hot(y_true, depth=tf.shape(y_pred)[-1])
        cross_entropy = tf.keras.losses.categorical_crossentropy(y_true_one_hot, y_pred)
        probs = tf.reduce_sum(y_true_one_hot * y_pred, axis=-1)
        focal_factor = alpha * tf.pow(1. - probs, gamma)
        return focal_factor * cross_entropy
    return loss_fn

In [None]:
raw_X_all, raw_y_all = [], []
for f in train_files:
    X, y = load_csv(f)
    raw_X_all.append(X)
    raw_y_all.append(y)
raw_X_all = np.concatenate(raw_X_all)
raw_y_all = np.concatenate(raw_y_all)

# Normalize training data
scaler = StandardScaler()
raw_X_all = scaler.fit_transform(raw_X_all)

# Create sequences
X_train_seq, y_train_seq = create_sequences(raw_X_all, raw_y_all, sequence_length)

# Compute class weights
class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train_seq),
    y=y_train_seq
)
class_weight_dict = {i: w for i, w in enumerate(class_weights)}

# Train/val split
X_train, X_val, y_train, y_val = train_test_split(
    X_train_seq, y_train_seq, test_size=0.1, random_state=42, stratify=y_train_seq
)

# Datasets
train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(10000).batch(batch_size)
val_ds = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(batch_size)

# --- TEST SET ---
X_test_raw, y_test_raw = load_csv(test_file)
X_test_raw = scaler.transform(X_test_raw)
X_test, y_test = create_sequences(X_test_raw, y_test_raw, sequence_length)

# --- MODEL ---
model = tf.keras.Sequential([
    layers.Input(shape=(sequence_length, 4)),

    # Conv Block 1
    layers.Conv1D(64, kernel_size=3, padding='same'),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv1D(64, kernel_size=3, padding='same'),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.MaxPooling1D(pool_size=2),
    layers.Dropout(0.25),

    # Conv Block 2
    layers.Conv1D(128, kernel_size=3, padding='same'),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv1D(128, kernel_size=3, padding='same'),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.MaxPooling1D(pool_size=2),
    layers.Dropout(0.25),

    # Bidirectional LSTM stack
    layers.Bidirectional(layers.LSTM(64, return_sequences=True)),
    layers.Bidirectional(layers.LSTM(32, return_sequences=False)),
    layers.Dropout(0.5),

    # Dense Layers
    layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(1e-3)),
    layers.Dropout(0.3),
    layers.Dense(64, activation='relu', kernel_regularizer=regularizers.l2(1e-3)),
    layers.Dropout(0.2),
    layers.Dense(32, activation='relu', kernel_regularizer=regularizers.l2(1e-3)),
    layers.Dense(num_classes, activation='softmax')
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
    loss=focal_loss(gamma=2.0, alpha=0.25),
    metrics=['accuracy']
)

# --- TRAINING ---
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=epochs,
    class_weight=class_weight_dict
)

# --- EVALUATION ---
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=1)
print(f"\nTest accuracy: {test_acc:.2f}, Test loss: {test_loss:.2f}")

# --- PLOTS ---
# 1) Loss curves
plt.figure()
plt.plot(history.history['loss'], label='train loss')
plt.plot(history.history['val_loss'], label='val loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.title('Training vs. Validation Loss')
plt.show()

# 2) Confusion matrix
y_pred = model.predict(X_test).argmax(axis=1)
cm = confusion_matrix(y_test, y_pred, labels=range(num_classes))
disp = ConfusionMatrixDisplay(cm, display_labels=range(num_classes))
plt.figure()
disp.plot(cmap=plt.cm.Blues, values_format='d')
plt.title('Confusion Matrix on Test Set')
plt.show()

Epoch 1/10
[1m33636/33636[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m449s[0m 13ms/step - accuracy: 0.5773 - loss: 0.1044 - val_accuracy: 0.7429 - val_loss: 0.0571
Epoch 2/10
[1m33636/33636[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m445s[0m 13ms/step - accuracy: 0.6938 - loss: 0.0644 - val_accuracy: 0.7611 - val_loss: 0.0497
Epoch 3/10
[1m12242/33636[0m [32m━━━━━━━[0m[37m━━━━━━━━━━━━━[0m [1m4:53[0m 14ms/step - accuracy: 0.7186 - loss: 0.0586