In [21]:
import numpy as np
import pandas as pd
import scipy.stats as stats
from scipy.signal import welch
import os

def process_emg_folder(input_folder, output_folder, shuffle_order_file):
    """
    遍历 input_folder 下的所有 CSV 文件，处理 EMG 和 IMU 数据，并保存特征矩阵和标签到 output_folder。
    """
    # 设定采样率
    fs = 1000  # 采样率 Hz
    cycle_duration = 10  # 每个周期 10 秒
    skip_seconds = 5  # 跳过前 4 秒
    use_seconds = 5  # 需要保留的秒数

    cycle_samples = fs * cycle_duration  # 10 秒数据点数 = 10000
    skip_samples = fs * skip_seconds  # 跳过 4 秒 = 4000
    use_samples = fs * use_seconds  # 取后 6 秒 = 6000

    # 滑动窗口参数
    window_size = 200  # 200ms = 200 采样点
    step_size = 100  # 100ms = 100 采样点
    num_windows = (use_samples - window_size) // step_size + 1  # 计算窗口数

    # 创建存储文件夹
    os.makedirs(output_folder, exist_ok=True)

    # **读取 shuffle_order.xlsx**
    shuffle_df = pd.read_excel(shuffle_order_file, engine="openpyxl")
    print(shuffle_df.shape)
    
    if shuffle_df.shape[0] < 15:
        raise ValueError("标签文件数据不足 15 组！请检查 `shuffle_order.xlsx`。")

    # 存储所有数据和标签
    all_features = []
    all_labels = []

    # **遍历 CSV 文件**
    csv_files = sorted([f for f in os.listdir(input_folder) if f.endswith(".csv")])

    for file_idx, file_name in enumerate(csv_files):
        file_path = os.path.join(input_folder, file_name)
        print(f"Processing {file_name} ({file_idx+1}/{len(csv_files)})")

        # **读取数据**
        raw_data = pd.read_csv(file_path)

        # **检查通道数（忽略时间列）**
        num_channels = raw_data.shape[1] - 1
        print(f"Detected {num_channels} channels (excluding time column).")

        # **归一化（Z-score 标准化）**
        data = raw_data.iloc[:, 1:]  # 去掉时间列
        mean_vals = data.mean(axis=0)
        std_vals = data.std(axis=0)
        normalized_data = (data - mean_vals) / (std_vals + 1e-10)  # 避免除零错误

        # **更新原始数据**
        raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)

        segments = []  # 存储所有分割后的数据
        labels = shuffle_df.iloc[file_idx].values.tolist()  # 获取该文件对应的 26 组标签
        num_cycles = 26  # 每个文件固定 26 段（每段 10s）

        for i in range(num_cycles):
            start_idx = i * cycle_samples + skip_samples  # 跳过前 4 秒
            end_idx = start_idx + use_samples  # 取后 6 秒

            if end_idx > len(raw_data):  # 处理不足 6000 采样点的情况
                segment = raw_data.iloc[start_idx:].values  # 取剩余数据
                pad_size = use_samples - len(segment)  # 计算填充数
                segment = np.pad(segment, ((0, pad_size), (0, 0)), mode='constant', constant_values=0)  # 填充 0
            else:
                segment = raw_data.iloc[start_idx:end_idx].values  # 正常提取数据

            # **滑动窗口**
            windows = [
                segment[j:j + window_size, 1:]  # 取 200 采样点，忽略时间列
                for j in range(0, use_samples - window_size + 1, step_size)  # 滑动步长 100
            ]
            segments.append(np.array(windows))

        # **转换为 NumPy 数组**
        segments_array = np.array(segments)  # 形状 (26, num_windows, 200, num_channels)

        # **计算特征**
        features_batches = []
        for batch_idx in range(segments_array.shape[0]):  # 26 个 batch
            batch_features = []  # 存储当前 batch 的所有窗口特征
            for window_idx in range(segments_array.shape[1]):  # 计算每个窗口
                window = segments_array[batch_idx, window_idx]  # (200, num_channels)
                features = extract_features(window)  # 计算 (15, 10)
                batch_features.append(features)  # 存入 batch

            features_batches.append(np.array(batch_features))

        features_array = np.array(features_batches)  # (26, num_windows, 15, 10)

        # **存储数据**
        all_features.append(features_array)
        all_labels.extend(labels)  # 每个文件 26个 cycle，取 shuffle_order.xlsx 里的前 26 个标签

    # **最终转换为 NumPy 数组**
    all_features = np.vstack(all_features)  # 合并所有 batch，形状 (总 batch, num_windows, 15, 10)
    all_labels = np.array(all_labels)  # (总 batch,)

    # **保存**
    np.save(os.path.join(output_folder, "feature_matrix.npy"), all_features)
    np.save(os.path.join(output_folder, "labels.npy"), all_labels)

    print(f"Feature extraction complete! Shape: {all_features.shape}")
    print(f"Labels saved: {all_labels.shape}")
    print(f"Feature matrix saved at: {output_folder}/feature_matrix.npy")
    print(f"Labels saved at: {output_folder}/labels.npy")


def extract_features(segment):
    """
    计算 EMG 和 IMU 的 15 个特征，并拼接成 (15, 10)
    """
    # **分离 EMG (前四列) 和 IMU (后六列)**
    emg_signals = segment[:, :4]  # 4 个 EMG 通道 (200, 4)
    imu_signals = segment[:, 4:]  # 6 个 IMU 通道 (200, 6)

    def compute_emg_features(signal):
        return np.array([
            np.var(signal), np.mean(np.abs(signal)), np.sqrt(np.mean(signal**2)),
            np.std(signal), np.mean(np.abs(np.diff(signal))), np.max(signal),
            np.min(signal), np.sum(np.diff(signal) > 0), np.sum(np.diff(np.sign(signal)) != 0),
            stats.kurtosis(signal), stats.skew(signal), np.sum(np.abs(np.diff(signal))),
            np.sum(signal ** 2), np.log10(np.mean(signal**2) + 1e-10),
            np.mean(welch(signal, fs=1000, nperseg=200)[1])
        ])

    def compute_imu_features(signal):
        return np.array([
            np.var(signal), np.mean(signal), np.sqrt(np.mean(signal**2)),
            np.std(signal), np.max(signal), np.min(signal),
            stats.kurtosis(signal), stats.skew(signal),
            np.mean(np.abs(np.diff(signal))), np.sum(np.abs(np.diff(signal))),
            np.sum(signal ** 2), np.log10(np.mean(signal**2) + 1e-10),
            np.mean(welch(signal, fs=1000, nperseg=200)[1]), np.median(signal), np.ptp(signal)
        ])

    emg_features = np.array([compute_emg_features(emg_signals[:, i]) for i in range(4)]).T  # (15, 4)
    imu_features = np.array([compute_imu_features(imu_signals[:, i]) for i in range(6)]).T  # (15, 6)

    return np.concatenate((emg_features, imu_features), axis=1)  # (15, 10)

# **运行函数**
root = r"data\G"
input_folder = root
output_folder = os.path.join(root, "windowed_data")
shuffle_order_file = os.path.join(root,"shuffle_order.xlsx")
process_emg_folder(input_folder, output_folder, shuffle_order_file)


(15, 26)
Processing sensor_data1.csv (1/15)
Detected 10 channels (excluding time column).


1       -3.775955
2       -0.214248
3       -0.214248
4       -0.214248
           ...   
45715   -0.005459
45716   -0.017740
45717   -0.017740
45718   -0.017740
45719   -0.017740
Name: EMG1, Length: 45720, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -6.768217
2        0.054180
3        0.076403
4        0.054180
           ...   
45715   -0.012488
45716   -0.012488
45717   -0.034711
45718   -0.056934
45719   -0.034711
Name: EMG2, Length: 45720, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -3.806194
2       -0.135337
3       -0.135337
4       -0.135337
           ...   
45715   -0.023345
45716   -0.023345
45717   -0.023345
45718   -0.035788
45719   -0.023345
Name: EMG3, Length: 45720, dtype: float64' has dtype incompatible with int64, pl

Processing sensor_data10.csv (2/15)
Detected 10 channels (excluding time column).


1       -5.279856
2       -0.008469
3       -0.008469
4       -0.008469
           ...   
46195   -0.043035
46196   -0.043035
46197   -0.043035
46198   -0.112168
46199   -0.146735
Name: EMG1, Length: 46200, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -7.967664
2        0.052054
3        0.025846
4        0.025846
           ...   
46195   -0.052779
46196   -0.052779
46197   -0.026571
46198   -0.000362
46199   -0.000362
Name: EMG2, Length: 46200, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -5.075739
2        0.028550
3        0.045340
4        0.028550
           ...   
46195   -0.072193
46196   -0.122564
46197   -0.172935
46198   -0.206516
46199   -0.206516
Name: EMG3, Length: 46200, dtype: float64' has dtype incompatible with int64, pl

Processing sensor_data11.csv (3/15)
Detected 10 channels (excluding time column).


1       -5.222709
2       -0.004360
3       -0.004360
4       -0.004360
           ...   
46091    0.080907
46092    0.063854
46093    0.063854
46094    0.046800
46095    0.063854
Name: EMG1, Length: 46096, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -7.467259
2       -0.052592
3       -0.028041
4       -0.052592
           ...   
46091   -0.003489
46092   -0.003489
46093   -0.003489
46094   -0.003489
46095   -0.003489
Name: EMG2, Length: 46096, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -4.279730
2       -0.110086
3       -0.110086
4       -0.110086
           ...   
46091   -0.054305
46092   -0.054305
46093   -0.054305
46094   -0.054305
46095   -0.054305
Name: EMG3, Length: 46096, dtype: float64' has dtype incompatible with int64, pl

Processing sensor_data12.csv (4/15)
Detected 10 channels (excluding time column).


1       -5.727696
2        0.218083
3        0.218083
4        0.218083
           ...   
45838    1.287198
45839    1.249685
45840    1.193416
45841    1.118391
45842    1.005852
Name: EMG1, Length: 45843, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -7.407128
2       -0.008183
3       -0.008183
4       -0.008183
           ...   
45838    0.040494
45839    0.137849
45840    0.186526
45841    0.259542
45842    0.332558
Name: EMG2, Length: 45843, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -3.519855
2       -0.167930
3       -0.167930
4       -0.167930
           ...   
45838   -0.349729
45839   -0.349729
45840   -0.372454
45841   -0.395179
45842   -0.406541
Name: EMG3, Length: 45843, dtype: float64' has dtype incompatible with int64, pl

Processing sensor_data13.csv (5/15)
Detected 10 channels (excluding time column).


1       -5.565778
2        0.018900
3        0.018900
4        0.018900
           ...   
45864    0.000650
45865   -0.017601
45866   -0.035851
45867   -0.017601
45868   -0.054102
Name: EMG1, Length: 45869, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -8.123906
2       -0.060378
3       -0.060378
4       -0.033678
           ...   
45864   -0.006978
45865   -0.033678
45866   -0.033678
45867   -0.060378
45868   -0.060378
Name: EMG2, Length: 45869, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -3.184999
2       -0.099384
3       -0.089200
4       -0.099384
           ...   
45864   -0.099384
45865   -0.099384
45866   -0.089200
45867   -0.089200
45868   -0.089200
Name: EMG3, Length: 45869, dtype: float64' has dtype incompatible with int64, pl

Processing sensor_data14.csv (6/15)
Detected 10 channels (excluding time column).


1       -6.540702
2       -0.013084
3       -0.034556
4       -0.034556
           ...   
45710    0.029861
45711    0.051334
45712    0.051334
45713    0.051334
45714    0.029861
Name: EMG1, Length: 45715, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -7.784970
2       -0.057567
3       -0.057567
4       -0.031980
           ...   
45710   -0.057567
45711   -0.057567
45712   -0.057567
45713   -0.031980
45714   -0.031980
Name: EMG2, Length: 45715, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -3.021728
2       -0.101993
3       -0.111629
4       -0.101993
           ...   
45710   -0.140537
45711   -0.130901
45712   -0.130901
45713   -0.130901
45714   -0.130901
Name: EMG3, Length: 45715, dtype: float64' has dtype incompatible with int64, pl

Processing sensor_data15.csv (7/15)
Detected 10 channels (excluding time column).


1       -6.199383
2        1.348968
3        1.328622
4        1.328622
           ...   
46094    0.046826
46095    0.026480
46096    0.026480
46097    0.046826
46098    0.026480
Name: EMG1, Length: 46099, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -10.013532
2        -0.132160
3        -0.099222
4        -0.099222
           ...    
46094     0.032529
46095     0.065467
46096     0.065467
46097     0.065467
46098     0.065467
Name: EMG2, Length: 46099, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -4.404701
2        0.075113
3        0.089471
4        0.075113
           ...   
46094   -0.054112
46095   -0.054112
46096   -0.039754
46097   -0.025396
46098   -0.025396
Name: EMG3, Length: 46099, dtype: float64' has dtype incompatible with

Processing sensor_data2.csv (8/15)
Detected 10 channels (excluding time column).


1       -4.256927
2        0.379596
3        0.379596
4        0.379596
           ...   
45993   -0.064622
45994   -0.050740
45995   -0.064622
45996   -0.064622
45997   -0.064622
Name: EMG1, Length: 45998, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -5.987406
2       -0.300576
3       -0.280899
4       -0.280899
           ...   
45993   -0.044767
45994   -0.044767
45995   -0.064445
45996   -0.064445
45997   -0.064445
Name: EMG2, Length: 45998, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -6.257649
2        1.129313
3        1.129313
4        1.129313
           ...   
45993   -0.011931
45994   -0.032681
45995   -0.011931
45996   -0.011931
45997   -0.011931
Name: EMG3, Length: 45998, dtype: float64' has dtype incompatible with int64, pl

Processing sensor_data3.csv (9/15)
Detected 10 channels (excluding time column).


1       -3.960794
2        0.018730
3        0.005851
4        0.018730
           ...   
45586    0.005851
45587    0.018730
45588   -0.019907
45589   -0.019907
45590   -0.019907
Name: EMG1, Length: 45591, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -4.883248
2       -0.027118
3       -0.027118
4       -0.011092
           ...   
45586   -0.027118
45587   -0.043145
45588   -0.043145
45589   -0.043145
45590   -0.011092
Name: EMG2, Length: 45591, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -5.122701
2       -0.013994
3       -0.013994
4       -0.013994
           ...   
45586    0.019951
45587    0.019951
45588    0.019951
45589    0.036923
45590    0.036923
Name: EMG3, Length: 45591, dtype: float64' has dtype incompatible with int64, pl

Processing sensor_data4.csv (10/15)
Detected 10 channels (excluding time column).


1       -4.545249
2       -0.049926
3       -0.064762
4       -0.049926
           ...   
45895    0.024254
45896    0.039090
45897    0.024254
45898    0.024254
45899    0.024254
Name: EMG1, Length: 45900, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -5.977079
2        0.367423
3        0.367423
4        0.387066
           ...   
45895   -0.005783
45896    0.013860
45897    0.013860
45898    0.013860
45899    0.033502
Name: EMG2, Length: 45900, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -8.222593
2        0.291772
3        0.264482
4        0.264482
           ...   
45895   -0.008414
45896   -0.008414
45897   -0.008414
45898   -0.035704
45899   -0.008414
Name: EMG3, Length: 45900, dtype: float64' has dtype incompatible with int64, pl

Processing sensor_data5.csv (11/15)
Detected 10 channels (excluding time column).


1       -4.327092
2        0.049459
3        0.049459
4        0.049459
           ...   
45743    0.021314
45744    0.007242
45745    0.021314
45746    0.007242
45747    0.007242
Name: EMG1, Length: 45748, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -8.380283
2       -0.082293
3       -0.082293
4       -0.082293
           ...   
45743    0.027979
45744    0.027979
45745    0.027979
45746    0.027979
45747    0.000411
Name: EMG2, Length: 45748, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -7.213789
2       -0.058468
3       -0.058468
4       -0.034537
           ...   
45743   -0.106329
45744   -0.130260
45745   -0.130260
45746   -0.106329
45747   -0.130260
Name: EMG3, Length: 45748, dtype: float64' has dtype incompatible with int64, pl

Processing sensor_data6.csv (12/15)
Detected 10 channels (excluding time column).


1       -3.744446
2       -0.025337
3       -0.025337
4       -0.025337
           ...   
45962   -0.001108
45963   -0.001108
45964   -0.025337
45965   -0.025337
45966   -0.013223
Name: EMG1, Length: 45967, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -6.152634
2        0.021264
3        0.041440
4        0.021264
           ...   
45962   -0.119969
45963   -0.119969
45964   -0.119969
45965   -0.099793
45966   -0.099793
Name: EMG2, Length: 45967, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -6.079366
2        0.001534
3        0.001534
4       -0.018602
           ...   
45962    0.061940
45963    0.041804
45964    0.041804
45965    0.041804
45966    0.041804
Name: EMG3, Length: 45967, dtype: float64' has dtype incompatible with int64, pl

Processing sensor_data7.csv (13/15)
Detected 10 channels (excluding time column).


1       -4.453065
2       -0.034803
3       -0.034803
4       -0.034803
           ...   
46164   -0.107234
46165   -0.107234
46166   -0.107234
46167   -0.107234
46168   -0.092748
Name: EMG1, Length: 46169, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -6.632306
2       -0.203781
3       -0.203781
4       -0.181990
           ...   
46164   -0.051240
46165   -0.051240
46166   -0.029448
46167   -0.029448
46168   -0.007657
Name: EMG2, Length: 46169, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -6.749679
2        0.023593
3        0.045947
4        0.045947
           ...   
46164    0.001239
46165    0.001239
46166    0.023593
46167    0.023593
46168    0.023593
Name: EMG3, Length: 46169, dtype: float64' has dtype incompatible with int64, pl

Processing sensor_data8.csv (14/15)
Detected 10 channels (excluding time column).


1       -4.303589
2       -0.032001
3       -0.032001
4       -0.032001
           ...   
45996   -0.088206
45997   -0.088206
45998   -0.088206
45999   -0.102257
46000   -0.088206
Name: EMG1, Length: 46001, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -6.680219
2       -0.269486
3       -0.269486
4       -0.269486
           ...   
45996    0.037878
45997    0.015923
45998    0.015923
45999    0.015923
46000   -0.006031
Name: EMG2, Length: 46001, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -5.888249
2       -0.000472
3       -0.000472
4       -0.000472
           ...   
45996   -0.039464
45997   -0.039464
45998   -0.039464
45999   -0.039464
46000   -0.039464
Name: EMG3, Length: 46001, dtype: float64' has dtype incompatible with int64, pl

Processing sensor_data9.csv (15/15)
Detected 10 channels (excluding time column).


1       -4.303005
2        0.506601
3        0.562690
4        0.604756
           ...   
46333   -0.096353
46334   -0.110375
46335   -0.096353
46336   -0.124397
46337   -0.096353
Name: EMG1, Length: 46338, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -7.636328
2       -0.081067
3       -0.030866
4       -0.005765
           ...   
46333   -0.005765
46334    0.019335
46335    0.019335
46336    0.044436
46337    0.044436
Name: EMG2, Length: 46338, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -6.654441
2        0.534974
3        0.534974
4        0.534974
           ...   
46333    0.049798
46334    0.027745
46335    0.027745
46336    0.027745
46337    0.027745
Name: EMG3, Length: 46338, dtype: float64' has dtype incompatible with int64, pl

Feature extraction complete! Shape: (390, 49, 15, 10)
Labels saved: (390,)
Feature matrix saved at: data\G\windowed_data/feature_matrix.npy
Labels saved at: data\G\windowed_data/labels.npy


In [15]:
import numpy as np
import pandas as pd
import scipy.stats as stats
from scipy.signal import welch
import os

def process_emg_folder(input_folder, output_folder, shuffle_order_file):
    """
    遍历 input_folder 下的所有 CSV 文件，处理 EMG 和 IMU 数据，并保存特征矩阵和标签到 output_folder。
    """
    # 设定采样率
    original_fs = 1000  # 原采样率 Hz
    target_fs = 200  # 目标采样率 Hz
    downsample_factor = original_fs // target_fs  # 降采样因子
    cycle_duration = 10  # 每个周期 10 秒
    skip_seconds = 5  # 跳过前 4 秒
    use_seconds = 5  # 需要保留的秒数

    cycle_samples = (original_fs * cycle_duration) // downsample_factor  # 2000
    skip_samples = (original_fs * skip_seconds) // downsample_factor  # 1000
    use_samples = (original_fs * use_seconds) // downsample_factor  # 1000

    # 滑动窗口参数
    window_size = 100  # 200ms = 40 采样点 (原 200, 降采样后 40)
    step_size = 50  # 100ms = 20 采样点 (原 100, 降采样后 20)
    num_windows = (use_samples - window_size) // step_size + 1  # 计算窗口数

    # 创建存储文件夹
    os.makedirs(output_folder, exist_ok=True)

    # 读取 shuffle_order.xlsx
    shuffle_df = pd.read_excel(shuffle_order_file, engine="openpyxl")
    if shuffle_df.shape[0] < 15:
        raise ValueError("标签文件数据不足 15 组！请检查 `shuffle_order.xlsx`。")

    all_features = []
    all_labels = []
    
    csv_files = sorted([f for f in os.listdir(input_folder) if f.endswith(".csv")])

    for file_idx, file_name in enumerate(csv_files):
        file_path = os.path.join(input_folder, file_name)
        print(f"Processing {file_name} ({file_idx+1}/{len(csv_files)})")

        raw_data = pd.read_csv(file_path)
        num_channels = raw_data.shape[1] - 1
        print(f"Detected {num_channels} channels (excluding time column).")

        # 降采样
        raw_data = raw_data.iloc[::downsample_factor, :].reset_index(drop=True)
        
        # 归一化
        data = raw_data.iloc[:, 1:]
        mean_vals = data.mean(axis=0)
        std_vals = data.std(axis=0)
        normalized_data = (data - mean_vals) / (std_vals + 1e-10)
        raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)

        segments = []
        labels = shuffle_df.iloc[file_idx].values.tolist()
        num_cycles = 26

        for i in range(num_cycles):
            start_idx = i * cycle_samples + skip_samples
            end_idx = start_idx + use_samples

            if end_idx > len(raw_data):
                segment = raw_data.iloc[start_idx:].values
                pad_size = use_samples - len(segment)
                segment = np.pad(segment, ((0, pad_size), (0, 0)), mode='constant', constant_values=0)
            else:
                segment = raw_data.iloc[start_idx:end_idx].values

            windows = [
                segment[j:j + window_size, 1:]
                for j in range(0, use_samples - window_size + 1, step_size)
            ]
            segments.append(np.array(windows))

        segments_array = np.array(segments)
        
        features_batches = []
        for batch_idx in range(segments_array.shape[0]):
            batch_features = []
            for window_idx in range(segments_array.shape[1]):
                window = segments_array[batch_idx, window_idx]
                features = extract_features(window)
                batch_features.append(features)

            features_batches.append(np.array(batch_features))

        features_array = np.array(features_batches)

        all_features.append(features_array)
        all_labels.extend(labels)

    all_features = np.vstack(all_features)
    all_labels = np.array(all_labels)

    np.save(os.path.join(output_folder, "feature_matrix.npy"), all_features)
    np.save(os.path.join(output_folder, "labels.npy"), all_labels)

    print(f"Feature extraction complete! Shape: {all_features.shape}")
    print(f"Labels saved: {all_labels.shape}")
    print(f"Feature matrix saved at: {output_folder}/feature_matrix.npy")
    print(f"Labels saved at: {output_folder}/labels.npy")

def extract_features(segment):
    emg_signals = segment[:, :4]
    imu_signals = segment[:, 4:]

    def compute_emg_features(signal):
        return np.array([
            np.var(signal), np.mean(np.abs(signal)), np.sqrt(np.mean(signal**2)),
            np.std(signal), np.mean(np.abs(np.diff(signal))), np.max(signal),
            np.min(signal), np.sum(np.diff(signal) > 0), np.sum(np.diff(np.sign(signal)) != 0),
            stats.kurtosis(signal), stats.skew(signal), np.sum(np.abs(np.diff(signal))),
            np.sum(signal ** 2), np.log10(np.mean(signal**2) + 1e-10),
            np.mean(welch(signal, fs=200, nperseg=40)[1])
        ])

    def compute_imu_features(signal):
        return np.array([
            np.var(signal), np.mean(signal), np.sqrt(np.mean(signal**2)),
            np.std(signal), np.max(signal), np.min(signal),
            stats.kurtosis(signal), stats.skew(signal),
            np.mean(np.abs(np.diff(signal))), np.sum(np.abs(np.diff(signal))),
            np.sum(signal ** 2), np.log10(np.mean(signal**2) + 1e-10),
            np.mean(welch(signal, fs=200, nperseg=40)[1]), np.median(signal), np.ptp(signal)
        ])

    emg_features = np.array([compute_emg_features(emg_signals[:, i]) for i in range(4)]).T
    imu_features = np.array([compute_imu_features(imu_signals[:, i]) for i in range(6)]).T

    return np.concatenate((emg_features, imu_features), axis=1)

root = r"data\G"
input_folder = root
output_folder = os.path.join(root, "windowed_data")
shuffle_order_file = os.path.join(root, "shuffle_order.xlsx")
process_emg_folder(input_folder, output_folder, shuffle_order_file)

ValueError: 标签文件数据不足 15 组！请检查 `shuffle_order.xlsx`。

In [5]:
import numpy as np
import os
from sklearn.model_selection import train_test_split

def load_data(data_folder, test_size=0.2, random_state=42):
    """
    加载 `feature_matrix.npy` 和 `labels.npy` 数据，并划分训练集和测试集。

    参数：
    - data_folder: 存放数据的文件夹路径
    - test_size: 测试集比例 (默认 20%)
    - random_state: 随机种子，保证可复现性

    返回：
    - X_train: 训练集特征，形状 (train_batches, num_windows, 15, num_channels)
    - X_test: 测试集特征，形状 (test_batches, num_windows, 15, num_channels)
    - y_train: 训练集标签，形状 (train_batches,)
    - y_test: 测试集标签，形状 (test_batches,)
    """
    # **加载数据**
    feature_path = os.path.join(data_folder, "feature_matrix.npy")
    label_path = os.path.join(data_folder, "labels.npy")

    if not os.path.exists(feature_path) or not os.path.exists(label_path):
        raise FileNotFoundError("特征文件或标签文件未找到，请检查路径！")

    X = np.load(feature_path)  # 形状 (num_batches, num_windows, 15, num_channels)
    y = np.load(label_path)  # 形状 (num_batches,)

    # **数据基本信息**
    print(f"Loaded features from {feature_path}, shape: {X.shape}")
    print(f"Loaded labels from {label_path}, shape: {y.shape}")

    # **划分训练集和测试集**
    # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, stratify=y)
    X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=random_state
        )

    # **打印数据划分信息**
    print(f"Training set: X_train: {X_train.shape}, y_train: {y_train.shape}")
    print(f"Testing set: X_test: {X_test.shape}, y_test: {y_test.shape}")

    return X_train, X_test, y_train, y_test

# **使用示例**
data_folder = r"E:\MSC\Spring\AML\GestureLink\data\G\windowed_data"
X_train, X_test, y_train, y_test = load_data(data_folder)



Loaded features from E:\MSC\Spring\AML\GestureLink\data\G\windowed_data\feature_matrix.npy, shape: (390, 19, 15, 10)
Loaded labels from E:\MSC\Spring\AML\GestureLink\data\G\windowed_data\labels.npy, shape: (390,)
Training set: X_train: (312, 19, 15, 10), y_train: (312,)
Testing set: X_test: (78, 19, 15, 10), y_test: (78,)


----------------------------

# 处理分开的数据

In [23]:
import numpy as np
import pandas as pd
import os
import scipy.stats as stats
from scipy.signal import welch

def process_emg_folder(input_folder, output_folder, shuffle_order_file):
    """
    处理 EMG 数据文件，先按照 shuffle_order.xlsx 划分手势数据，
    然后进行窗口切分，并提取特征。
    """
    fs = 1000  # 采样率 1000Hz，每秒 1000 个数据点
    target_samples = 5000  # 每个手势的固定采样点数
    window_size = 200  # 窗口大小 400 采样点（0.4s）
    step_size = 100  # 窗口滑动步长 200 采样点（0.2s）

    # 创建输出文件夹
    os.makedirs(output_folder, exist_ok=True)

    # 读取 shuffle_order.xlsx
    shuffle_df = pd.read_excel(shuffle_order_file, engine="openpyxl")
    if shuffle_df.shape[1] < 26:
        raise ValueError("标签文件数据不足 26 组！请检查 `shuffle_order.xlsx`。")

    all_features = []
    all_labels = []
    csv_files = sorted([f for f in os.listdir(input_folder) if f.endswith(".csv")])

    for file_idx, file_name in enumerate(csv_files):
        file_path = os.path.join(input_folder, file_name)
        print(f"Processing {file_name} ({file_idx + 1}/{len(csv_files)})")

        # 读取数据
        raw_data = pd.read_csv(file_path)
        num_channels = raw_data.shape[1] - 1  # 去掉时间列
        print(f"Detected {num_channels} channels (excluding time column).")

        # 归一化数据
        data = raw_data.iloc[:, 1:]
        normalized_data = (data - data.mean(axis=0)) / (data.std(axis=0) + 1e-10)
        raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)

        # 按 shuffle_order.xlsx 划分 26 组手势
        valid_segments = []
        for i in range(26):
            start_idx = shuffle_df.iloc[file_idx, i]  # 该手势起始索引
            if start_idx + target_samples <= len(raw_data):
                segment = raw_data.iloc[start_idx:start_idx + target_samples, 1:].values
            else:
                segment = raw_data.iloc[start_idx:, 1:].values
                pad_size = target_samples - len(segment)
                segment = np.pad(segment, ((0, pad_size), (0, 0)), mode='constant', constant_values=0)
            valid_segments.append(segment)

        valid_segments = np.array(valid_segments)  # 形状 (26, 5000, num_channels)

        # 进行窗口划分和特征提取
        features_batches = []
        for seg in valid_segments:
            windows = [seg[j:j + window_size, :] for j in range(0, target_samples - window_size + 1, step_size)]
            batch_features = np.array([extract_features(window) for window in windows])  # (num_windows, 15, num_channels)
            features_batches.append(batch_features)

        processed_segments = np.array(features_batches)  # (26, num_windows, 15, num_channels)

        if processed_segments.shape[0] != 26:
            print(f"Warning: {file_name} 提取到 {processed_segments.shape[0]} 个手势，不是 26 个。")

        # 存储数据
        all_features.append(processed_segments)
        all_labels.extend(shuffle_df.iloc[file_idx, :26].values.tolist())

    # 最终转换为 NumPy 数组
    all_features = np.vstack(all_features)  # (总手势数, num_windows, 15, num_channels)
    all_labels = np.array(all_labels)  # (总手势数,)

    # 保存数据
    np.save(os.path.join(output_folder, "feature_matrix.npy"), all_features)
    np.save(os.path.join(output_folder, "labels.npy"), all_labels)

    print(f"Feature extraction complete! Shape: {all_features.shape}")
    print(f"Labels saved: {all_labels.shape}")
    print(f"Feature matrix saved at: {output_folder}/feature_matrix.npy")
    print(f"Labels saved at: {output_folder}/labels.npy")

def extract_features(segment):
    """
    计算 EMG 和 IMU 的特征。
    """
    emg_signals = segment[:, :4]  # 4 个 EMG 通道
    imu_signals = segment[:, 4:]  # 6 个 IMU 通道

    def compute_emg_features(signal):
        return np.array([
            np.var(signal), np.mean(np.abs(signal)), np.sqrt(np.mean(signal**2)),
            np.std(signal), np.mean(np.abs(np.diff(signal))), np.max(signal),
            np.min(signal), np.sum(np.diff(signal) > 0), np.sum(np.diff(np.sign(signal)) != 0),
            stats.kurtosis(signal), stats.skew(signal), np.sum(np.abs(np.diff(signal))),
            np.sum(signal ** 2), np.log10(np.mean(signal**2) + 1e-10),
            np.mean(welch(signal, fs=1000, nperseg=200)[1])
        ])

    def compute_imu_features(signal):
        return np.array([
            np.var(signal), np.mean(signal), np.sqrt(np.mean(signal**2)),
            np.std(signal), np.max(signal), np.min(signal),
            stats.kurtosis(signal), stats.skew(signal),
            np.mean(np.abs(np.diff(signal))), np.sum(np.abs(np.diff(signal))),
            np.sum(signal ** 2), np.log10(np.mean(signal**2) + 1e-10),
            np.mean(welch(signal, fs=1000, nperseg=200)[1]), np.median(signal), np.ptp(signal)
        ])

    emg_features = np.array([compute_emg_features(emg_signals[:, i]) for i in range(4)]).T  # (15, 4)
    imu_features = np.array([compute_imu_features(imu_signals[:, i]) for i in range(6)]).T  # (15, 6)

    return np.concatenate((emg_features, imu_features), axis=1)  # (15, 10)

# 运行代码
root = r"data\FZH"
input_folder = root
output_folder = os.path.join(root, "processed_data")
shuffle_order_file = os.path.join(root, "shuffle_order.xlsx")
process_emg_folder(input_folder, output_folder, shuffle_order_file)

Processing sensor_data1.csv (1/15)
Detected 10 channels (excluding time column).


1       -0.033795
2       -0.033795
3       -0.033795
4       -0.033795
           ...   
22275   -0.033795
22276   -0.033795
22277   -0.033795
22278   -0.033795
22279   -0.033795
Name: 303, Length: 22280, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.132256
2       -0.114027
3       -0.123142
4       -0.132256
           ...   
22275    1.079998
22276    1.079998
22277    1.061769
22278    1.061769
22279    1.070883
Name: 293, Length: 22280, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.262535
2       -0.227800
3       -0.253851
4       -0.271218
           ...   
22275    0.223756
22276    0.258491
22277    0.293226
22278    0.362697
22279    0.414799
Name: 271, Length: 22280, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data10.csv (2/15)
Detected 10 channels (excluding time column).


1       -0.333118
2       -1.149433
3       -1.965747
4       -0.333118
           ...   
22307    2.115824
22308    2.115824
22309    2.115824
22310    2.115824
22311    2.115824
Name: 305, Length: 22312, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.964646
2       -1.178968
3       -1.250408
4       -1.321849
           ...   
22307    1.921554
22308    2.078723
22309    2.193028
22310    2.293045
22311    2.407350
Name: 272, Length: 22312, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.142334
2        0.028957
3       -0.068224
4       -0.116814
           ...   
22307    0.223318
22308    0.223318
22309    0.239515
22310    0.271908
22311    0.304302
Name: 323, Length: 22312, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data11.csv (3/15)
Detected 10 channels (excluding time column).


1       -0.832400
2       -0.832400
3       -0.832400
4       -1.554049
           ...   
22312   -1.554049
22313   -1.554049
22314   -1.554049
22315   -1.554049
22316   -1.554049
Name: 305, Length: 22317, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.078749
2        0.066240
3        0.066240
4        0.078749
           ...   
22312    1.117040
22313    1.092021
22314    1.016964
22315    0.954416
22316    0.854340
Name: 317, Length: 22317, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.542718
2       -0.444232
3       -0.378574
4       -0.280087
           ...   
22312   -0.001042
22313   -0.050285
22314   -0.083114
22315   -0.083114
22316   -0.115943
Name: 266, Length: 22317, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data12.csv (4/15)
Detected 10 channels (excluding time column).


1       -2.270786
2       -2.270786
3       -2.270786
4       -1.618301
           ...   
22304    0.339156
22305   -0.965815
22306   -0.965815
22307   -0.313330
22308   -0.313330
Name: 304, Length: 22309, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.041594
2        0.041594
3        0.041594
4        0.030784
           ...   
22304   -0.044889
22305   -0.012457
22306   -0.001647
22307   -0.012457
22308   -0.001647
Name: 312, Length: 22309, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.018306
2        0.010806
3       -0.018306
4       -0.032862
           ...   
22304    0.039918
22305    0.039918
22306    0.025362
22307   -0.018306
22308   -0.018306
Name: 303, Length: 22309, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data13.csv (5/15)
Detected 10 channels (excluding time column).


1       -0.080190
2       -0.080190
3       -0.080190
4       -0.491295
           ...   
22260   -0.902400
22261   -0.491295
22262   -1.313505
22263   -1.313505
22264   -0.902400
Name: 306, Length: 22265, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        1.287102
2        1.428335
3        1.569569
4        1.651956
           ...   
22260    1.192946
22261    0.757475
22262    0.439698
22263    0.086614
22264   -0.266471
Name: 395, Length: 22265, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.173559
2        0.290442
3        0.407325
4        0.465766
           ...   
22260    0.217390
22261    0.217390
22262    0.232000
22263    0.275831
22264    0.275831
Name: 300, Length: 22265, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data14.csv (6/15)
Detected 10 channels (excluding time column).


1       -1.962066
2       -0.660634
3       -0.660634
4        0.640797
           ...   
22301   -0.660634
22302   -0.660634
22303    0.640797
22304   -0.660634
22305   -0.660634
Name: 304, Length: 22306, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.391314
2        0.430486
3        0.469657
4        0.456600
           ...   
22301    0.025714
22302   -0.000400
22303   -0.026515
22304   -0.000400
22305   -0.000400
Name: 329, Length: 22306, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.603585
2       -0.625398
3       -0.647210
4       -0.669023
           ...   
22301   -0.036461
22302   -0.058274
22303   -0.036461
22304    0.007164
22305   -0.014649
Name: 284, Length: 22306, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data15.csv (7/15)
Detected 10 channels (excluding time column).


1        0.709376
2        0.709376
3        0.709376
4       -1.018934
           ...   
22456   -1.018934
22457    0.709376
22458    0.709376
22459   -1.018934
22460    0.709376
Name: 305, Length: 22461, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.429730
2       -0.367977
3       -0.330925
4       -0.306224
           ...   
22456   -0.022162
22457   -0.046863
22458   -0.034513
22459   -0.034513
22460   -0.046863
Name: 269, Length: 22461, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.205028
2        0.205028
3        0.156892
4        0.132824
           ...   
22456    0.012484
22457   -0.011585
22458    0.036552
22459    0.036552
22460    0.012484
Name: 310, Length: 22461, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data2.csv (8/15)
Detected 10 channels (excluding time column).


1        1.176720
2       -0.123785
3        1.176720
4        1.176720
           ...   
22121   -0.123785
22122   -0.123785
22123   -0.123785
22124   -0.123785
22125   -0.123785
Name: 306, Length: 22126, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.103353
2        0.021503
3       -0.019423
4        0.062428
           ...   
22121    0.072659
22122    0.093122
22123    0.082891
22124    0.103353
22125    0.134047
Name: 313, Length: 22126, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.071769
2        0.023573
3        0.023573
4        0.071769
           ...   
22121    0.059720
22122    0.083818
22123    0.095866
22124    0.119964
22125    0.144062
Name: 302, Length: 22126, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data3.csv (9/15)
Detected 10 channels (excluding time column).


1        1.855306
2       -0.073787
3       -0.073787
4       -0.073787
           ...   
22296    1.855306
22297   -0.073787
22298   -0.073787
22299   -2.002879
22300   -0.073787
Name: 304, Length: 22301, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.004799
2        0.004799
3        0.029941
4        0.029941
           ...   
22296   -0.083195
22297   -0.083195
22298   -0.070625
22299   -0.070625
22300   -0.108337
Name: 305, Length: 22301, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.042013
2        0.042013
3        0.053538
4        0.065062
           ...   
22296    0.088111
22297    0.076587
22298    0.099635
22299    0.076587
22300    0.065062
Name: 305.1, Length: 22301, dtype: float64' has dtype incompatible with int64, ple

Processing sensor_data4.csv (10/15)
Detected 10 channels (excluding time column).


1       -1.382926
2       -0.338009
3       -0.338009
4       -0.338009
           ...   
22219   -1.382926
22220   -1.382926
22221   -0.338009
22222   -0.338009
22223   -1.382926
Name: 304, Length: 22224, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.082638
2        0.082638
3        0.106773
4        0.138955
           ...   
22219    1.064162
22220    1.056117
22221    1.048071
22222    1.023935
22223    0.975664
Name: 318, Length: 22224, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.229847
2        0.003291
3       -0.212967
4       -0.408629
           ...   
22219    0.301932
22220    0.271039
22221    0.250443
22222    0.229847
22223    0.198953
Name: 364, Length: 22224, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data5.csv (11/15)
Detected 10 channels (excluding time column).


1        0.489099
2        0.489099
3        0.489099
4        0.489099
           ...   
22107    0.489099
22108   -1.385894
22109   -1.385894
22110   -1.385894
22111    0.489099
Name: 303, Length: 22112, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        5.955064
2        6.160061
3        6.321901
4        6.397427
           ...   
22107    0.344603
22108    0.376971
22109    0.376971
22110    0.366182
22111    0.355392
Name: 804, Length: 22112, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        3.733359
2        3.474104
3        3.153848
4        2.894594
           ...   
22107   -0.612969
22108   -0.536718
22109   -0.490967
22110   -0.445216
22111   -0.353714
Name: 590, Length: 22112, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data6.csv (12/15)
Detected 10 channels (excluding time column).


1       -1.809805
2       -0.800597
3        0.208610
4       -0.800597
           ...   
22181    1.217818
22182    0.208610
22183    1.217818
22184    0.208610
22185    0.208610
Name: 303, Length: 22186, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.066430
2        0.042341
3        0.030296
4        0.066430
           ...   
22181    0.090519
22182    0.078474
22183    0.066430
22184    0.078474
22185    0.066430
Name: 310, Length: 22186, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.003142
2       -0.009509
3       -0.009509
4        0.003142
           ...   
22181    0.116995
22182    0.104345
22183    0.104345
22184    0.129646
22185    0.129646
Name: 300, Length: 22186, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data7.csv (13/15)
Detected 10 channels (excluding time column).


1       -0.212562
2        1.007459
3       -0.212562
4        1.007459
           ...   
22081    1.007459
22082    1.007459
22083    1.007459
22084   -0.212562
22085    1.007459
Name: 305, Length: 22086, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.004996
2       -0.006915
3       -0.006915
4       -0.042649
           ...   
22081   -0.030738
22082   -0.006915
22083   -0.006915
22084   -0.006915
22085   -0.006915
Name: 310, Length: 22086, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.001635
2        0.009678
3        0.020991
4        0.009678
           ...   
22081    0.020991
22082    0.032304
22083    0.020991
22084    0.020991
22085    0.032304
Name: 301, Length: 22086, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data8.csv (14/15)
Detected 10 channels (excluding time column).


1       -0.105940
2       -0.105940
3       -0.105940
4       -0.105940
           ...   
22182    0.969801
22183   -0.105940
22184   -0.105940
22185   -0.105940
22186   -0.105940
Name: 306, Length: 22187, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.085506
2        0.071930
3        0.071930
4        0.099082
           ...   
22182    0.126235
22183    0.112659
22184    0.112659
22185    0.139811
22186    0.139811
Name: 309, Length: 22187, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.221709
2       -0.276146
3       -0.258001
4       -0.221709
           ...   
22182    0.050477
22183    0.014186
22184    0.032331
22185    0.068623
22186    0.086769
Name: 292, Length: 22187, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data9.csv (15/15)
Detected 10 channels (excluding time column).


1       -0.327510
2       -0.327510
3        0.968289
4       -0.327510
           ...   
22377   -0.327510
22378   -0.327510
22379   -0.327510
22380    0.968289
22381    0.968289
Name: 306, Length: 22382, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.091994
2       -0.091994
3       -0.091994
4       -0.079637
           ...   
22377   -0.549196
22378   -0.598623
22379   -0.623337
22380   -0.635694
22381   -0.685121
Name: 300, Length: 22382, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.163314
2        0.146247
3        0.112113
4        0.077980
           ...   
22377   -2.618566
22378   -2.550299
22379   -2.499099
22380   -2.328432
22381   -2.140697
Name: 310, Length: 22382, dtype: float64' has dtype incompatible with int64, pleas

Feature extraction complete! Shape: (390, 49, 15, 10)
Labels saved: (390,)
Feature matrix saved at: data\FZH\processed_data/feature_matrix.npy
Labels saved at: data\FZH\processed_data/labels.npy


In [12]:
import numpy as np
import os
from sklearn.model_selection import train_test_split

def load_data(data_folder, test_size=0.2, random_state=42):
    """
    加载 `feature_matrix.npy` 和 `labels.npy` 数据，并划分训练集和测试集。

    参数：
    - data_folder: 存放数据的文件夹路径
    - test_size: 测试集比例 (默认 20%)
    - random_state: 随机种子，保证可复现性

    返回：
    - X_train: 训练集特征，形状 (train_batches, num_windows, 15, num_channels)
    - X_test: 测试集特征，形状 (test_batches, num_windows, 15, num_channels)
    - y_train: 训练集标签，形状 (train_batches,)
    - y_test: 测试集标签，形状 (test_batches,)
    """
    # **加载数据**
    feature_path = os.path.join(data_folder, "feature_matrix.npy")
    label_path = os.path.join(data_folder, "labels.npy")

    if not os.path.exists(feature_path) or not os.path.exists(label_path):
        raise FileNotFoundError("特征文件或标签文件未找到，请检查路径！")

    X = np.load(feature_path)  # 形状 (num_batches, num_windows, 15, num_channels)
    y = np.load(label_path)  # 形状 (num_batches,)

    # **数据基本信息**
    print(f"Loaded features from {feature_path}, shape: {X.shape}")
    print(f"Loaded labels from {label_path}, shape: {y.shape}")

    # **划分训练集和测试集**
    # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, stratify=y)
    X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=random_state
        )

    # **打印数据划分信息**
    print(f"Training set: X_train: {X_train.shape}, y_train: {y_train.shape}")
    print(f"Testing set: X_test: {X_test.shape}, y_test: {y_test.shape}")

    return X_train, X_test, y_train, y_test

# **使用示例**
data_folder = r"E:\MSC\Spring\AML\GestureLink\data\FZH\processed_data"
X_train, X_test, y_train, y_test = load_data(data_folder)



Loaded features from E:\MSC\Spring\AML\GestureLink\data\FZH\processed_data\feature_matrix.npy, shape: (390, 49, 15, 10)
Loaded labels from E:\MSC\Spring\AML\GestureLink\data\FZH\processed_data\labels.npy, shape: (390,)
Training set: X_train: (312, 49, 15, 10), y_train: (312,)
Testing set: X_test: (78, 49, 15, 10), y_test: (78,)


KNN

In [14]:
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

def load_data(data_folder, test_size=0.2, random_state=42):
    """ 加载 MyoWare EMG 特征数据，并拆分训练集和测试集 """
    feature_path = os.path.join(data_folder, "feature_matrix.npy")
    label_path = os.path.join(data_folder, "labels.npy")

    if not os.path.exists(feature_path) or not os.path.exists(label_path):
        raise FileNotFoundError("特征文件或标签文件未找到！")

    X = np.load(feature_path)  # 形状: (num_batches, num_windows, 15, num_channels)

    # X = X[:,:,:,:6]
    y = np.load(label_path)    # 形状: (num_batches,)

    print(f"加载特征矩阵: {X.shape}")
    print(f"加载标签: {y.shape}")

    # **展平数据**: (num_batches, num_windows, 15, num_channels) → (num_batches, num_windows * 15 * num_channels)
    num_batches, num_windows, num_rows, num_channels = X.shape
    X = X.reshape(num_batches, -1)  # 变成 (num_batches, num_features)

    # **确保每个类别都有足够样本**
    num_classes = len(np.unique(y))
    test_size = max(num_classes, int(len(y) * test_size))  # 至少保证每个类别有样本

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state, stratify=y
    )

    print(f"训练集: {X_train.shape}, 测试集: {X_test.shape}")
    return X_train, X_test, y_train, y_test

# **加载数据**
# **使用示例**
data_folder = r"E:\MSC\Spring\AML\GestureLink\data\FZH\processed_data"
X_train, X_test, y_train, y_test = load_data(data_folder)

# **检查并去除 NaN**
print(f"NaNs in X_train: {np.isnan(X_train).sum()} / {X_train.size}")
print(f"NaNs in X_test: {np.isnan(X_test).sum()} / {X_test.size}")

# 过滤 NaN 样本
mask_train = ~np.isnan(X_train).any(axis=1)
mask_test = ~np.isnan(X_test).any(axis=1)

X_train, y_train = X_train[mask_train], y_train[mask_train]
X_test, y_test = X_test[mask_test], y_test[mask_test]
# print(X_train.shape, y_train.shape)
# **再次检查 NaN**
print(f"NaNs in X_train (after cleaning): {np.isnan(X_train).sum()} / {X_train.size}")
print(f"NaNs in X_test (after cleaning): {np.isnan(X_test).sum()} / {X_test.size}")

# **KNN 训练**
knn = KNeighborsClassifier(n_neighbors=5)  # k=5
knn.fit(X_train, y_train)

# **KNN 预测**
y_pred = knn.predict(X_test)

# **评估**
accuracy = accuracy_score(y_test, y_pred)
print(f"KNN 测试集准确率: {accuracy:.2f}")

# **打印分类报告**
print("分类报告:\n", classification_report(y_test, y_pred))


加载特征矩阵: (390, 49, 15, 10)
加载标签: (390,)
训练集: (312, 7350), 测试集: (78, 7350)
NaNs in X_train: 0 / 2293200
NaNs in X_test: 0 / 573300
NaNs in X_train (after cleaning): 0 / 2293200
NaNs in X_test (after cleaning): 0 / 573300
KNN 测试集准确率: 0.00
分类报告:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       3.0
           2       0.00      0.00      0.00       3.0
           3       0.00      0.00      0.00       3.0
           4       0.00      0.00      0.00       3.0
           5       0.00      0.00      0.00       3.0
           6       0.00      0.00      0.00       3.0
           7       0.00      0.00      0.00       3.0
           8       0.00      0.00      0.00       3.0
           9       0.00      0.00      0.00       3.0
          10       0.00      0.00      0.00       3.0
          11       0.00      0.00      0.00       3.0
          12       0.00      0.00      0.00       3.0
          13       0.00      0.00      0.00       3.0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Libsvm

In [15]:
import numpy as np
import os
from sklearn.model_selection import train_test_split
from libsvm.svmutil import *

def load_data(data_folder, test_size=0.2, random_state=42):
    """
    加载 `feature_matrix.npy` 和 `labels.npy` 数据，并划分训练集和测试集。

    参数：
    - data_folder: 存放数据的文件夹路径
    - test_size: 测试集比例 (默认 20%)
    - random_state: 随机种子，保证可复现性

    返回：
    - X_train: 训练集特征，形状 (train_samples, num_features)
    - X_test: 测试集特征，形状 (test_samples, num_features)
    - y_train: 训练集标签，形状 (train_samples,)
    - y_test: 测试集标签，形状 (test_samples,)
    """
    feature_path = os.path.join(data_folder, "feature_matrix.npy")
    label_path = os.path.join(data_folder, "labels.npy")

    if not os.path.exists(feature_path) or not os.path.exists(label_path):
        raise FileNotFoundError("特征文件或标签文件未找到，请检查路径！")

    X = np.load(feature_path)  # 形状 (num_batches, num_windows, 15, num_channels)
    y = np.load(label_path)  # 形状 (num_batches,)

    print(f"Loaded features from {feature_path}, shape: {X.shape}")
    print(f"Loaded labels from {label_path}, shape: {y.shape}")

    # **展平数据 (num_batches, num_windows, 15, num_channels) -> (num_batches, num_windows * 15 * num_channels)**
    num_batches, num_windows, height, num_channels = X.shape
    X = X.reshape(num_batches, -1)  # 变成 (num_batches, num_windows * 15 * num_channels)

    # **划分训练集和测试集**
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    print(f"Training set: X_train: {X_train.shape}, y_train: {y_train.shape}")
    print(f"Testing set: X_test: {X_test.shape}, y_test: {y_test.shape}")

    return X_train, X_test, y_train, y_test

# **数据路径**
data_folder = r"E:\MSC\Spring\AML\GestureLink\data\FZH\processed_data"
X_train, X_test, y_train, y_test = load_data(data_folder)

# **转换数据格式，适应 LIBSVM**
X_train_list = [dict(enumerate(x, 1)) for x in X_train]  # 将 NumPy 数组转换为 LIBSVM 格式的字典列表
X_test_list = [dict(enumerate(x, 1)) for x in X_test]  # 同样转换测试集

# **训练 SVM 模型**
svm_model = svm_train(y_train.tolist(), X_train_list, '-s 0 -t 2 -c 1')  # -s 0: C-SVC, -t 2: RBF 核, -c 1: 惩罚参数C

# **在测试集上评估**
pred_labels, acc, vals = svm_predict(y_test.tolist(), X_test_list, svm_model)

print(f"测试集准确率: {acc[0]:.2f}%")


Loaded features from E:\MSC\Spring\AML\GestureLink\data\FZH\processed_data\feature_matrix.npy, shape: (390, 49, 15, 10)
Loaded labels from E:\MSC\Spring\AML\GestureLink\data\FZH\processed_data\labels.npy, shape: (390,)
Training set: X_train: (312, 7350), y_train: (312,)
Testing set: X_test: (78, 7350), y_test: (78,)
Accuracy = 0% (0/78) (classification)
测试集准确率: 0.00%


LSTM

In [8]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, TimeDistributed, Conv1D, Flatten
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization, GlobalAveragePooling1D


# **加载数据**
data_folder = r"E:\MSC\Spring\AML\GestureLink\data\FZH\processed_data"
X_train, X_test, y_train, y_test = load_data(data_folder)
# X_train, X_test, y_train, y_test = load_and_split_data(data_folder, selected_labels=[24, 25, 26])
X_train = np.nan_to_num(X_train, nan=0.0)
X_test = np.nan_to_num(X_test, nan=0.0)
y_train = np.nan_to_num(y_train, nan = 0.0)
y_test = np.nan_to_num(y_test, nan = 0.0)
# **获取数据形状**
num_batches = X_train.shape[0]  # batch 维度
num_windows = X_train.shape[1]  # 时间步（窗口数 59）
num_features = X_train.shape[2]  # 特征数（15）
num_channels = X_train.shape[3]  # 通道数（1）

# **检测类别数量**
unique_classes = np.unique(y_train)
num_classes = len(unique_classes)  # 确保类别数正确

print(f"Corrected Classes: {num_classes}, Batches: {num_batches}, Windows: {num_windows}, Features: {num_features}, Channels: {num_channels}")

# **保持 X 形状**
X_train = X_train.reshape(num_batches, num_windows, num_features, num_channels)
X_test = X_test.reshape(X_test.shape[0], num_windows, num_features, num_channels)
X_train = X_train[:,:,:,:]
X_test = X_test[:,:,:,:]

# print(X_train.shape)

num_features = X_train.shape[2] 
num_channels = X_train.shape[3]
# **标签编码**
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)  # 转换成 0,1,2
y_test = encoder.transform(y_test)

# **转换为 One-Hot**
y_train = to_categorical(y_train, num_classes=num_classes)
y_test = to_categorical(y_test, num_classes=num_classes)

print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")  # (batch, 59, 15, 1) (batch, 3)
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")  # (batch, 59, 15, 1) (batch, 3)


# model = Sequential([
#     TimeDistributed(Flatten(), input_shape=(num_windows, 4, 10)),  # 把 (4, 10) 展平成 40
#     LSTM(64, return_sequences=True),
#     Dropout(0.3),
#     LSTM(32, return_sequences=False),
#     Dropout(0.3),
#     Dense(32, activation='relu'),
#     Dense(num_classes, activation='softmax')  # 多分类
# ])

model = Sequential([
    Conv1D(filters=64, kernel_size=3, activation='relu', padding='same', input_shape=(num_windows, num_features * num_channels)),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),

    Conv1D(filters=128, kernel_size=3, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),

    Conv1D(filters=256, kernel_size=3, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),

    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(num_classes, activation='softmax')  # 多分类输出
])

X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], -1)  # (batch_size, 29, 40)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], -1)  

# # print(num_features)
# # **构建 LSTM 处理通道的模型**

# model = Sequential([
#     TimeDistributed(Conv1D(filters=16, kernel_size=3, activation='relu'), input_shape=(num_windows, num_features, num_channels)),  
#     TimeDistributed(GlobalAveragePooling1D()),  # 只对特征维度池化，不影响时间维度
#     LSTM(64, return_sequences=True),  # 保持时间序列结构
#     Dropout(0.2),
#     LSTM(128, return_sequences=False),  # 输出 2D (batch, 128)
#     Dropout(0.2),
#     Dense(64, activation='relu'),
#     Dropout(0.2),
#     Dense(num_classes, activation='softmax')  # 最终分类
# ])


# **编译模型**
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# **训练模型**
epochs = 40

batch_size = 8

history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=epochs,
    batch_size=batch_size,
    verbose=1
)

# **测试模型**
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

# **计算准确率**
accuracy = accuracy_score(y_test_classes, y_pred_classes)
print(f"Test Accuracy: {accuracy:.4f}")

# **保存模型**
# model.save("rnn_emg_model.h5")
# print("Model saved as rnn_emg_model.h5")


Loaded features from E:\MSC\Spring\AML\GestureLink\data\FZH\processed_data\feature_matrix.npy, shape: (390, 49, 15, 10)
Loaded labels from E:\MSC\Spring\AML\GestureLink\data\FZH\processed_data\labels.npy, shape: (390,)
Training set: X_train: (312, 49, 15, 10), y_train: (312,)
Testing set: X_test: (78, 49, 15, 10), y_test: (78,)
Corrected Classes: 26, Batches: 312, Windows: 49, Features: 15, Channels: 10
X_train shape: (312, 49, 15, 10), y_train shape: (312, 26)
X_test shape: (78, 49, 15, 10), y_test shape: (78, 26)
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
Test Acc

———————————————————————————————————

WXR

In [62]:
import numpy as np
import pandas as pd
import os
import scipy.stats as stats
from scipy.signal import welch

def process_emg_folder(input_folder, output_folder, shuffle_order_file):
    """
    处理 EMG 数据文件，自动选择 `shuffle_order.xlsx` 中的 5 个手势类别，
    然后进行窗口切分，并提取特征。
    """
    fs = 1000  # 采样率 1000Hz
    target_samples = 5000  # 每个手势的固定采样点数
    window_size = 200  # 窗口大小 200 采样点（0.2s）
    step_size = 100  # 窗口滑动步长 100 采样点（0.1s）

    # 创建输出文件夹
    os.makedirs(output_folder, exist_ok=True)

    # 读取 shuffle_order.xlsx
    shuffle_df = pd.read_excel(shuffle_order_file, engine="openpyxl")

    # **检查 shuffle_order.xlsx 是否有 NaN**
    if shuffle_df.isnull().values.any():
        print("Warning: `shuffle_order.xlsx` 解析后包含 NaN 值，已填充为 -1")
        shuffle_df = shuffle_df.fillna(-1)  # 避免 NaN 影响索引匹配

    # **自动提取 5 个唯一的类别**
    unique_labels = shuffle_df.iloc[:, :].values.flatten()
    unique_labels = np.unique(unique_labels)  # 获取所有唯一的类别
    unique_labels = unique_labels[unique_labels != -1]  # 移除 -1 (填充的无效数据)
    unique_labels = unique_labels.astype(int)  # 转换为整数
    
    if len(unique_labels) < 5:
        raise ValueError("数据中可用的手势类别不足 5 个，请检查 `shuffle_order.xlsx`。")
    
    selected_classes = unique_labels[:5]  # 选择前 5 个类别
    print(f"自动选择的 5 个手势类别: {selected_classes}")

    all_features = []
    all_labels = []
    csv_files = sorted([f for f in os.listdir(input_folder) if f.endswith(".csv")])

    for file_idx, file_name in enumerate(csv_files):
        if file_idx >= len(shuffle_df):
            print(f"Warning: `shuffle_order.xlsx` 只有 {len(shuffle_df)} 行，但尝试访问第 {file_idx} 行。跳过该文件。")
            continue  # 跳过越界文件

        file_path = os.path.join(input_folder, file_name)
        print(f"Processing {file_name} ({file_idx + 1}/{len(csv_files)})")

        # 读取数据
        raw_data = pd.read_csv(file_path)
        num_channels = raw_data.shape[1] - 1  # 去掉时间列
        print(f"Detected {num_channels} channels (excluding time column).")

        # **确保数据是数值型**
        raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)

        # 归一化数据
        data = raw_data.iloc[:, 1:]
        normalized_data = (data - data.mean(axis=0)) / (data.std(axis=0) + 1e-10)
        raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)

        # 仅保留选定类别的数据
        valid_segments = []
        valid_labels = []
        for i in selected_classes:
            start_idx = np.where(shuffle_df.iloc[file_idx, :].values == i)[0]  # 找到匹配的索引
            if len(start_idx) == 0:
                print(f"Warning: 文件 {file_name} 未找到类别 {i}，跳过该类别。")
                continue  # 该 CSV 文件可能没有这个类别，跳过

            start_idx = shuffle_df.iloc[file_idx, start_idx[0]]  # 获取具体索引
            if start_idx + target_samples <= len(raw_data):
                segment = raw_data.iloc[start_idx:start_idx + target_samples, 1:].values
            else:
                segment = raw_data.iloc[start_idx:, 1:].values
                pad_size = target_samples - len(segment)
                segment = np.pad(segment, ((0, pad_size), (0, 0)), mode='constant', constant_values=0)

            # **确保 segment 仅包含数值**
            segment = segment.astype(np.float64)

            valid_segments.append(segment)
            valid_labels.append(i)  # 记录手势类别

        if len(valid_segments) == 0:
            print(f"Warning: {file_name} 没有匹配的手势数据，跳过该文件。")
            continue

        valid_segments = np.array(valid_segments)  # 形状 (5, 5000, num_channels)

        # 进行窗口划分和特征提取
        features_batches = []
        for seg in valid_segments:
            windows = [seg[j:j + window_size, :] for j in range(0, target_samples - window_size + 1, step_size)]
            batch_features = np.array([extract_features(window) for window in windows])  # (num_windows, 15, num_channels)
            features_batches.append(batch_features)

        processed_segments = np.array(features_batches)  # (5, num_windows, 15, num_channels)

        # 存储数据
        all_features.append(processed_segments)
        all_labels.extend(valid_labels)

    # 最终转换为 NumPy 数组
    if len(all_features) == 0:
        raise ValueError("未提取到任何手势数据，请检查 `shuffle_order.xlsx` 和 CSV 文件。")

    all_features = np.vstack(all_features)  # (总手势数, num_windows, 15, num_channels)
    all_labels = np.array(all_labels)  # (总手势数,)

    # 保存数据
    np.save(os.path.join(output_folder, "feature_matrix.npy"), all_features)
    np.save(os.path.join(output_folder, "labels.npy"), all_labels)

    print(f"Feature extraction complete! Shape: {all_features.shape}")
    print(f"Labels saved: {all_labels.shape}")
    print(f"Feature matrix saved at: {output_folder}/feature_matrix.npy")
    print(f"Labels saved at: {output_folder}/labels.npy")

# 运行代码
root = r"data\WXR_5"
input_folder = root
output_folder = os.path.join(root, "processed_data")
shuffle_order_file = os.path.join(root, "shuffle_order.xlsx")

# 自动选择 `shuffle_order.xlsx` 中的 5 个类别
process_emg_folder(input_folder, output_folder, shuffle_order_file)


自动选择的 5 个手势类别: [1 2 3 4 5]
Processing sensor_data1.csv (1/15)
Detected 10 channels (excluding time column).


1       332.0
2       330.0
3       329.0
4       330.0
        ...  
4221    346.0
4222    343.0
4223    336.0
4224    328.0
4225    325.0
Name: 334, Length: 4226, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1       304.0
2       303.0
3       304.0
4       305.0
        ...  
4221    268.0
4222    271.0
4223    271.0
4224    272.0
4225    275.0
Name: 302, Length: 4226, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1       327.0
2       328.0
3       330.0
4       332.0
        ...  
4221    328.0
4222    321.0
4223    313.0
4224    305.0
4225    300.0
Name: 322, Length: 4226, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1

Processing sensor_data10.csv (2/15)
Detected 10 channels (excluding time column).


1       305.0
2       304.0
3       306.0
4       307.0
        ...  
4312    312.0
4313    311.0
4314    311.0
4315    311.0
4316    311.0
Name: 304, Length: 4317, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1       304.0
2       295.0
3       300.0
4       306.0
        ...  
4312    283.0
4313    274.0
4314    269.0
4315    275.0
4316    275.0
Name: 296, Length: 4317, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1       303.0
2       302.0
3       303.0
4       303.0
        ...  
4312    302.0
4313    303.0
4314    303.0
4315    306.0
4316    307.0
Name: 303, Length: 4317, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1

Processing sensor_data11.csv (3/15)
Detected 10 channels (excluding time column).


1       302.0
2       304.0
3       303.0
4       303.0
        ...  
4265    301.0
4266    301.0
4267    302.0
4268    303.0
4269    302.0
Name: 304, Length: 4270, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1       326.0
2       337.0
3       337.0
4       332.0
        ...  
4265    331.0
4266    325.0
4267    331.0
4268    337.0
4269    331.0
Name: 328, Length: 4270, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1       301.0
2       302.0
3       301.0
4       300.0
        ...  
4265    309.0
4266    309.0
4267    310.0
4268    310.0
4269    309.0
Name: 302, Length: 4270, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1

Processing sensor_data12.csv (4/15)
Detected 10 channels (excluding time column).


1       293.0
2       292.0
3       295.0
4       294.0
        ...  
4312    306.0
4313    306.0
4314    307.0
4315    306.0
4316    306.0
Name: 293, Length: 4317, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1       285.0
2       278.0
3       284.0
4       287.0
        ...  
4312    307.0
4313    308.0
4314    314.0
4315    306.0
4316    303.0
Name: 282, Length: 4317, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1       313.0
2       313.0
3       315.0
4       315.0
        ...  
4312    301.0
4313    301.0
4314    301.0
4315    299.0
4316    300.0
Name: 312, Length: 4317, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1

Processing sensor_data13.csv (5/15)
Detected 10 channels (excluding time column).


1       307.0
2       308.0
3       309.0
4       307.0
        ...  
4291    291.0
4292    289.0
4293    291.0
4294    290.0
4295    289.0
Name: 310, Length: 4296, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1       292.0
2       296.0
3       302.0
4       294.0
        ...  
4291    259.0
4292    257.0
4293    268.0
4294    275.0
4295    271.0
Name: 300, Length: 4296, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1       301.0
2       302.0
3       302.0
4       301.0
        ...  
4291    290.0
4292    287.0
4293    287.0
4294    286.0
4295    284.0
Name: 303, Length: 4296, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1

Processing sensor_data14.csv (6/15)
Detected 10 channels (excluding time column).


1       301.0
2       299.0
3       300.0
4       298.0
        ...  
4269    304.0
4270    304.0
4271    304.0
4272    304.0
4273    301.0
Name: 303, Length: 4274, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1       310.0
2       310.0
3       321.0
4       320.0
        ...  
4269    305.0
4270    303.0
4271    314.0
4272    314.0
4273    307.0
Name: 311, Length: 4274, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1       304.0
2       304.0
3       306.0
4       305.0
        ...  
4269    298.0
4270    297.0
4271    298.0
4272    298.0
4273    298.0
Name: 304, Length: 4274, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1

Processing sensor_data15.csv (7/15)
Detected 10 channels (excluding time column).


1       296.0
2       296.0
3       294.0
4       295.0
        ...  
4318    300.0
4319    303.0
4320    300.0
4321    300.0
4322    302.0
Name: 296, Length: 4323, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1       286.0
2       293.0
3       283.0
4       286.0
        ...  
4318    285.0
4319    296.0
4320    289.0
4321    283.0
4322    295.0
Name: 286, Length: 4323, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1       300.0
2       299.0
3       297.0
4       297.0
        ...  
4318    298.0
4319    300.0
4320    299.0
4321    299.0
4322    299.0
Name: 300, Length: 4323, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1

Processing sensor_data2.csv (8/15)
Detected 10 channels (excluding time column).


1       311.0
2       310.0
3       312.0
4       314.0
        ...  
4179    283.0
4180    279.0
4181    279.0
4182    280.0
4183    279.0
Name: 307, Length: 4184, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1       336.0
2       330.0
3       325.0
4       321.0
        ...  
4179    294.0
4180    292.0
4181    290.0
4182    290.0
4183    287.0
Name: 335, Length: 4184, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1       297.0
2       297.0
3       300.0
4       305.0
        ...  
4179    312.0
4180    311.0
4181    310.0
4182    311.0
4183    310.0
Name: 295, Length: 4184, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1

Processing sensor_data3.csv (9/15)
Detected 10 channels (excluding time column).


1       255.0
2       248.0
3       244.0
4       242.0
        ...  
4287    298.0
4288    296.0
4289    296.0
4290    298.0
4291    298.0
Name: 259, Length: 4292, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1       421.0
2       418.0
3       416.0
4       421.0
        ...  
4287    292.0
4288    291.0
4289    291.0
4290    293.0
4291    293.0
Name: 414, Length: 4292, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1       368.0
2       362.0
3       356.0
4       349.0
        ...  
4287    306.0
4288    302.0
4289    300.0
4290    297.0
4291    292.0
Name: 372, Length: 4292, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1

Processing sensor_data4.csv (10/15)
Detected 10 channels (excluding time column).


1       315.0
2       314.0
3       313.0
4       315.0
        ...  
4284    288.0
4285    292.0
4286    297.0
4287    302.0
4288    307.0
Name: 314, Length: 4289, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1       297.0
2       291.0
3       283.0
4       280.0
        ...  
4284    331.0
4285    330.0
4286    330.0
4287    325.0
4288    321.0
Name: 299, Length: 4289, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1       285.0
2       291.0
3       294.0
4       299.0
        ...  
4284    315.0
4285    311.0
4286    307.0
4287    302.0
4288    297.0
Name: 275, Length: 4289, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1

Processing sensor_data5.csv (11/15)
Detected 10 channels (excluding time column).


1       273.0
2       272.0
3       274.0
4       273.0
        ...  
4278    298.0
4279    301.0
4280    303.0
4281    304.0
4282    307.0
Name: 274, Length: 4283, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1       296.0
2       291.0
3       294.0
4       292.0
        ...  
4278    336.0
4279    339.0
4280    330.0
4281    325.0
4282    328.0
Name: 299, Length: 4283, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1       304.0
2       306.0
3       310.0
4       311.0
        ...  
4278    332.0
4279    332.0
4280    330.0
4281    329.0
4282    329.0
Name: 300, Length: 4283, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1

Processing sensor_data6.csv (12/15)
Detected 10 channels (excluding time column).


1       278.0
2       281.0
3       282.0
4       283.0
        ...  
4292    304.0
4293    303.0
4294    301.0
4295    302.0
4296    302.0
Name: 274, Length: 4297, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1       206.0
2       214.0
3       218.0
4       214.0
        ...  
4292    312.0
4293    308.0
4294    299.0
4295    299.0
4296    299.0
Name: 209, Length: 4297, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1       294.0
2       295.0
3       295.0
4       294.0
        ...  
4292    329.0
4293    327.0
4294    324.0
4295    322.0
4296    318.0
Name: 295, Length: 4297, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1

Processing sensor_data7.csv (13/15)
Detected 10 channels (excluding time column).
Processing sensor_data8.csv (14/15)
Detected 10 channels (excluding time column).


1       281.0
2       284.0
3       286.0
4       285.0
        ...  
4240    306.0
4241    307.0
4242    308.0
4243    307.0
4244    306.0
Name: 281, Length: 4245, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1       241.0
2       245.0
3       250.0
4       242.0
        ...  
4240    311.0
4241    314.0
4242    320.0
4243    314.0
4244    313.0
Name: 247, Length: 4245, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1       278.0
2       281.0
3       284.0
4       284.0
        ...  
4240    307.0
4241    309.0
4242    309.0
4243    308.0
4244    308.0
Name: 280, Length: 4245, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = raw_data.iloc[:, 1:].astype(np.float64)
1

Feature extraction complete! Shape: (70, 49, 15, 10)
Labels saved: (70,)
Feature matrix saved at: data\WXR_5\processed_data/feature_matrix.npy
Labels saved at: data\WXR_5\processed_data/labels.npy


_________________________________________________

In [74]:
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

def load_data(data_folder, test_size=0.2, random_state=42):
    """
    加载 `feature_matrix.npy` 和 `labels.npy`，并拆分训练集和测试集。
    只保留自动选定的 5 个类别的数据。
    """
    feature_path = os.path.join(data_folder, "feature_matrix.npy")
    label_path = os.path.join(data_folder, "labels.npy")

    if not os.path.exists(feature_path) or not os.path.exists(label_path):
        raise FileNotFoundError("特征文件或标签文件未找到！")

    # 加载数据
    X = np.load(feature_path)  # 形状: (num_samples, num_windows, 15, num_channels)
    # X = X[:,:,:,:]
    y = np.load(label_path)    # 形状: (num_samples,)

    print(f"加载特征矩阵: {X.shape}")
    print(f"加载标签: {y.shape}")

    # **检查 NaN**
    total_nan = np.isnan(X).sum()
    print(f"数据集中 NaN 总数: {total_nan}")

    if total_nan == X.size:
        raise ValueError("所有数据都是 NaN，检查 `feature_matrix.npy` 是否正确。")

    # **用 0 替换 NaN**
    X = np.nan_to_num(X, nan=0.0)  # 把 NaN 变成 0，避免过滤掉所有数据

    # **展平数据**: (num_samples, num_windows, 15, num_channels) → (num_samples, num_features)
    num_samples, num_windows, num_rows, num_channels = X.shape
    X = X.reshape(num_samples, -1)  # 变成 (num_samples, num_features)

    # **确保每个类别都有足够样本**
    unique_classes, class_counts = np.unique(y, return_counts=True)
    print(f"数据集中唯一类别: {unique_classes}")
    print(f"每个类别的样本数量: {dict(zip(unique_classes, class_counts))}")

    if len(unique_classes) < 5:
        raise ValueError("数据类别不足 5 个，请检查 `shuffle_order.xlsx` 是否正确。")

    # **避免类别样本太少导致 stratify 失败**
    min_class_count = min(class_counts)
    if min_class_count < 2:
        print("Warning: 某些类别样本太少，禁用 `stratify`")
        stratify = None
    else:
        stratify = y

    # **拆分训练集和测试集**
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state, stratify=stratify
    )

    print(f"训练集: {X_train.shape}, 测试集: {X_test.shape}")

    # **检查数据是否仍然为空**
    if X_train.shape[0] == 0 or X_test.shape[0] == 0:
        raise ValueError("训练集或测试集为空，可能因为数据中存在过多 NaN，请检查 `feature_matrix.npy`。")

    return X_train, X_test, y_train, y_test

# **使用示例**
data_folder = r"data\WXR_5\processed_data"
X_train, X_test, y_train, y_test = load_data(data_folder)

# **KNN 训练**
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# **KNN 预测**
y_pred = knn.predict(X_test)

# **评估**
accuracy = accuracy_score(y_test, y_pred)
print(f"KNN 测试集准确率: {accuracy:.2f}")

# **打印分类报告**
print("分类报告:\n", classification_report(y_test, y_pred))


In [76]:
# **转换数据格式，适应 LIBSVM**
X_train_list = [dict(enumerate(x, 1)) for x in X_train]  # 将 NumPy 数组转换为 LIBSVM 格式的字典列表
X_test_list = [dict(enumerate(x, 1)) for x in X_test]  # 同样转换测试集

# **训练 SVM 模型**
svm_model = svm_train(y_train.tolist(), X_train_list, '-s 0 -t 2 -c 1')  # -s 0: C-SVC, -t 2: RBF 核, -c 1: 惩罚参数C

# **在测试集上评估**
pred_labels, acc, vals = svm_predict(y_test.tolist(), X_test_list, svm_model)

print(f"测试集准确率: {acc[0]:.2f}%")


Accuracy = 7.14286% (1/14) (classification)
测试集准确率: 7.14%


In [None]:
# **转换数据格式，适应 LIBSVM**
X_train_list = [dict(enumerate(x, 1)) for x in X_train]  # 将 NumPy 数组转换为 LIBSVM 格式的字典列表
X_test_list = [dict(enumerate(x, 1)) for x in X_test]  # 同样转换测试集

# **训练 SVM 模型**
svm_model = svm_train(y_train.tolist(), X_train_list, '-s 0 -t 2 -c 1')  # -s 0: C-SVC, -t 2: RBF 核, -c 1: 惩罚参数C

# **在测试集上评估**
pred_labels, acc, vals = svm_predict(y_test.tolist(), X_test_list, svm_model)

print(f"测试集准确率: {acc[0]:.2f}%")


Accuracy = 7.14286% (1/14) (classification)
测试集准确率: 7.14%
