LIBSVM

In [3]:
import numpy as np
import os
from sklearn.model_selection import train_test_split
from libsvm.svmutil import *

def load_data(data_folder, test_size=0.2, random_state=42):
    """
    加载 `feature_matrix.npy` 和 `labels.npy` 数据，并划分训练集和测试集。

    参数：
    - data_folder: 存放数据的文件夹路径
    - test_size: 测试集比例 (默认 20%)
    - random_state: 随机种子，保证可复现性

    返回：
    - X_train: 训练集特征，形状 (train_samples, num_features)
    - X_test: 测试集特征，形状 (test_samples, num_features)
    - y_train: 训练集标签，形状 (train_samples,)
    - y_test: 测试集标签，形状 (test_samples,)
    """
    feature_path = os.path.join(data_folder, "feature_matrix.npy")
    label_path = os.path.join(data_folder, "labels.npy")

    if not os.path.exists(feature_path) or not os.path.exists(label_path):
        raise FileNotFoundError("特征文件或标签文件未找到，请检查路径！")

    X = np.load(feature_path)  # 形状 (num_batches, num_windows, 15, num_channels)
    y = np.load(label_path)  # 形状 (num_batches,)

    print(f"Loaded features from {feature_path}, shape: {X.shape}")
    print(f"Loaded labels from {label_path}, shape: {y.shape}")

    # **展平数据 (num_batches, num_windows, 15, num_channels) -> (num_batches, num_windows * 15 * num_channels)**
    num_batches, num_windows, height, num_channels = X.shape
    X = X.reshape(num_batches, -1)  # 变成 (num_batches, num_windows * 15 * num_channels)

    # **划分训练集和测试集**
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    print(f"Training set: X_train: {X_train.shape}, y_train: {y_train.shape}")
    print(f"Testing set: X_test: {X_test.shape}, y_test: {y_test.shape}")

    return X_train, X_test, y_train, y_test

# **数据路径**
data_folder = r"E:\MSC\Spring\AML\GestureLink\data\G\windowed_data"
X_train, X_test, y_train, y_test = load_data(data_folder)

# **转换数据格式，适应 LIBSVM**
X_train_list = [dict(enumerate(x, 1)) for x in X_train]  # 将 NumPy 数组转换为 LIBSVM 格式的字典列表
X_test_list = [dict(enumerate(x, 1)) for x in X_test]  # 同样转换测试集

# **训练 SVM 模型**
svm_model = svm_train(y_train.tolist(), X_train_list, '-s 0 -t 2 -c 1')  # -s 0: C-SVC, -t 2: RBF 核, -c 1: 惩罚参数C

# **在测试集上评估**
pred_labels, acc, vals = svm_predict(y_test.tolist(), X_test_list, svm_model)

print(f"测试集准确率: {acc[0]:.2f}%")


Loaded features from E:\MSC\Spring\AML\GestureLink\data\G\windowed_data\feature_matrix.npy, shape: (390, 49, 15, 10)
Loaded labels from E:\MSC\Spring\AML\GestureLink\data\G\windowed_data\labels.npy, shape: (390,)
Training set: X_train: (312, 7350), y_train: (312,)
Testing set: X_test: (78, 7350), y_test: (78,)
Accuracy = 8.97436% (7/78) (classification)
测试集准确率: 8.97%


In [6]:
!pip install libsvm

Collecting libsvm
  Downloading libsvm-3.23.0.4.tar.gz (170 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: libsvm
  Building wheel for libsvm (setup.py): started
  Building wheel for libsvm (setup.py): finished with status 'done'
  Created wheel for libsvm: filename=libsvm-3.23.0.4-py3-none-any.whl size=150399 sha256=83d0191b1006ddf8067560b38067847a4fa28a20494ab01843bd192d07d7df0b
  Stored in directory: c:\users\user\appdata\local\pip\cache\wheels\c1\ce\25\0d50035499973fcbcc407fcb897d53e47b6eb4601308789aa6
Successfully built libsvm
Installing collected packages: libsvm
Successfully installed libsvm-3.23.0.4


KNN

In [8]:
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

def load_data(data_folder, test_size=0.2, random_state=42):
    """ 加载 MyoWare EMG 特征数据，并拆分训练集和测试集 """
    feature_path = os.path.join(data_folder, "feature_matrix.npy")
    label_path = os.path.join(data_folder, "labels.npy")

    if not os.path.exists(feature_path) or not os.path.exists(label_path):
        raise FileNotFoundError("特征文件或标签文件未找到！")

    X = np.load(feature_path)  # 形状: (num_batches, num_windows, 15, num_channels)

    # X = X[:,:,:,:6]
    y = np.load(label_path)    # 形状: (num_batches,)

    print(f"加载特征矩阵: {X.shape}")
    print(f"加载标签: {y.shape}")

    # **展平数据**: (num_batches, num_windows, 15, num_channels) → (num_batches, num_windows * 15 * num_channels)
    num_batches, num_windows, num_rows, num_channels = X.shape
    X = X.reshape(num_batches, -1)  # 变成 (num_batches, num_features)

    # **确保每个类别都有足够样本**
    num_classes = len(np.unique(y))
    test_size = max(num_classes, int(len(y) * test_size))  # 至少保证每个类别有样本

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state, stratify=y
    )

    print(f"训练集: {X_train.shape}, 测试集: {X_test.shape}")
    return X_train, X_test, y_train, y_test

# **加载数据**
data_folder = r"E:\MSC\Spring\AML\GestureLink\data\G\windowed_data"
X_train, X_test, y_train, y_test = load_data(data_folder)

# **检查并去除 NaN**
print(f"NaNs in X_train: {np.isnan(X_train).sum()} / {X_train.size}")
print(f"NaNs in X_test: {np.isnan(X_test).sum()} / {X_test.size}")

# 过滤 NaN 样本
mask_train = ~np.isnan(X_train).any(axis=1)
mask_test = ~np.isnan(X_test).any(axis=1)

X_train, y_train = X_train[mask_train], y_train[mask_train]
X_test, y_test = X_test[mask_test], y_test[mask_test]
# print(X_train.shape, y_train.shape)
# **再次检查 NaN**
print(f"NaNs in X_train (after cleaning): {np.isnan(X_train).sum()} / {X_train.size}")
print(f"NaNs in X_test (after cleaning): {np.isnan(X_test).sum()} / {X_test.size}")

# **KNN 训练**
knn = KNeighborsClassifier(n_neighbors=5)  # k=5
knn.fit(X_train, y_train)

# **KNN 预测**
y_pred = knn.predict(X_test)

# **评估**
accuracy = accuracy_score(y_test, y_pred)
print(f"KNN 测试集准确率: {accuracy:.2f}")

# **打印分类报告**
print("分类报告:\n", classification_report(y_test, y_pred))


加载特征矩阵: (390, 49, 15, 10)
加载标签: (390,)
训练集: (312, 7350), 测试集: (78, 7350)
NaNs in X_train: 255200 / 2293200
NaNs in X_test: 65240 / 573300
NaNs in X_train (after cleaning): 0 / 360150
NaNs in X_test (after cleaning): 0 / 80850
KNN 测试集准确率: 0.00
分类报告:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       1.0
           2       0.00      0.00      0.00       1.0
           3       0.00      0.00      0.00       0.0
           4       0.00      0.00      0.00       1.0
           5       0.00      0.00      0.00       1.0
           6       0.00      0.00      0.00       1.0
           8       0.00      0.00      0.00       1.0
          10       0.00      0.00      0.00       1.0
          11       0.00      0.00      0.00       1.0
          15       0.00      0.00      0.00       1.0
          22       0.00      0.00      0.00       1.0
          24       0.00      0.00      0.00       1.0

    accuracy                           0.00   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


RNN

In [7]:
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

def load_data(data_folder, test_size=0.2, random_state=42):
    """ 加载 MyoWare EMG 特征数据，并拆分训练集和测试集 """
    feature_path = os.path.join(data_folder, "feature_matrix.npy")
    label_path = os.path.join(data_folder, "labels.npy")

    if not os.path.exists(feature_path) or not os.path.exists(label_path):
        raise FileNotFoundError("特征文件或标签文件未找到！")

    X = np.load(feature_path)  # 形状: (num_batches, num_windows, 15, num_channels)
    y = np.load(label_path)    # 形状: (num_batches,)

    print(f"加载特征矩阵: {X.shape}")
    print(f"加载标签: {y.shape}")

    # **调整数据格式 (num_batches, num_windows, 15, num_channels) → (num_batches, num_windows, 15 * num_channels)**
    num_batches, num_windows, num_rows, num_channels = X.shape
    X = X.reshape(num_batches, num_windows, num_rows * num_channels)  # (num_batches, num_windows, num_features)

    # **归一化数据**
    scaler = StandardScaler()
    X = np.array([scaler.fit_transform(x) for x in X])

    # **转换标签为 one-hot 编码**
    num_classes = len(np.unique(y))
    y = to_categorical(y, num_classes)

    # **确保每个类别都有足够样本**
    test_size = max(num_classes, int(len(y) * test_size))  # 至少保证每个类别有样本
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state, stratify=y.argmax(axis=1)
    )

    print(f"训练集: {X_train.shape}, 测试集: {X_test.shape}")
    return X_train, X_test, y_train, y_test, num_classes

# **加载数据**
data_folder = r"E:\MSC\AML\AML-Project\data\G\windowed_data"
X_train, X_test, y_train, y_test, num_classes = load_data(data_folder)


# **构建 RNN（LSTM）模型**
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    BatchNormalization(),
    Dropout(0.3),

    LSTM(32, return_sequences=False),
    BatchNormalization(),
    Dropout(0.3),

    Dense(16, activation='relu'),
    Dense(num_classes, activation='softmax')  # 多分类任务
])

# **编译模型**
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# **训练 RNN**
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

# **评估 RNN**
loss, accuracy = model.evaluate(X_test, y_test)
print(f"\nRNN 测试集准确率: {accuracy:.2f}")

# **打印分类报告**
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

from sklearn.metrics import classification_report
print("\n分类报告:\n", classification_report(y_test_classes, y_pred_classes))


FileNotFoundError: 特征文件或标签文件未找到！