In [21]:
import numpy as np
import pandas as pd
import scipy.stats as stats
from scipy.signal import welch
import os

def process_emg_folder(input_folder, output_folder, shuffle_order_file):
    """
    遍历 input_folder 下的所有 CSV 文件，处理 EMG 和 IMU 数据，并保存特征矩阵和标签到 output_folder。
    """
    # 设定采样率
    fs = 1000  # 采样率 Hz
    cycle_duration = 10  # 每个周期 10 秒
    skip_seconds = 5  # 跳过前 4 秒
    use_seconds = 5  # 需要保留的秒数

    cycle_samples = fs * cycle_duration  # 10 秒数据点数 = 10000
    skip_samples = fs * skip_seconds  # 跳过 4 秒 = 4000
    use_samples = fs * use_seconds  # 取后 6 秒 = 6000

    # 滑动窗口参数
    window_size = 200  # 200ms = 200 采样点
    step_size = 100  # 100ms = 100 采样点
    num_windows = (use_samples - window_size) // step_size + 1  # 计算窗口数

    # 创建存储文件夹
    os.makedirs(output_folder, exist_ok=True)

    # **读取 shuffle_order.xlsx**
    shuffle_df = pd.read_excel(shuffle_order_file, engine="openpyxl")
    print(shuffle_df.shape)
    
    if shuffle_df.shape[0] < 15:
        raise ValueError("标签文件数据不足 15 组！请检查 `shuffle_order.xlsx`。")

    # 存储所有数据和标签
    all_features = []
    all_labels = []

    # **遍历 CSV 文件**
    csv_files = sorted([f for f in os.listdir(input_folder) if f.endswith(".csv")])

    for file_idx, file_name in enumerate(csv_files):
        file_path = os.path.join(input_folder, file_name)
        print(f"Processing {file_name} ({file_idx+1}/{len(csv_files)})")

        # **读取数据**
        raw_data = pd.read_csv(file_path)

        # **检查通道数（忽略时间列）**
        num_channels = raw_data.shape[1] - 1
        print(f"Detected {num_channels} channels (excluding time column).")

        # **归一化（Z-score 标准化）**
        data = raw_data.iloc[:, 1:]  # 去掉时间列
        mean_vals = data.mean(axis=0)
        std_vals = data.std(axis=0)
        normalized_data = (data - mean_vals) / (std_vals + 1e-10)  # 避免除零错误

        # **更新原始数据**
        raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)

        segments = []  # 存储所有分割后的数据
        labels = shuffle_df.iloc[file_idx].values.tolist()  # 获取该文件对应的 26 组标签
        num_cycles = 26  # 每个文件固定 26 段（每段 10s）

        for i in range(num_cycles):
            start_idx = i * cycle_samples + skip_samples  # 跳过前 4 秒
            end_idx = start_idx + use_samples  # 取后 6 秒

            if end_idx > len(raw_data):  # 处理不足 6000 采样点的情况
                segment = raw_data.iloc[start_idx:].values  # 取剩余数据
                pad_size = use_samples - len(segment)  # 计算填充数
                segment = np.pad(segment, ((0, pad_size), (0, 0)), mode='constant', constant_values=0)  # 填充 0
            else:
                segment = raw_data.iloc[start_idx:end_idx].values  # 正常提取数据

            # **滑动窗口**
            windows = [
                segment[j:j + window_size, 1:]  # 取 200 采样点，忽略时间列
                for j in range(0, use_samples - window_size + 1, step_size)  # 滑动步长 100
            ]
            segments.append(np.array(windows))

        # **转换为 NumPy 数组**
        segments_array = np.array(segments)  # 形状 (26, num_windows, 200, num_channels)

        # **计算特征**
        features_batches = []
        for batch_idx in range(segments_array.shape[0]):  # 26 个 batch
            batch_features = []  # 存储当前 batch 的所有窗口特征
            for window_idx in range(segments_array.shape[1]):  # 计算每个窗口
                window = segments_array[batch_idx, window_idx]  # (200, num_channels)
                features = extract_features(window)  # 计算 (15, 10)
                batch_features.append(features)  # 存入 batch

            features_batches.append(np.array(batch_features))

        features_array = np.array(features_batches)  # (26, num_windows, 15, 10)

        # **存储数据**
        all_features.append(features_array)
        all_labels.extend(labels)  # 每个文件 26个 cycle，取 shuffle_order.xlsx 里的前 26 个标签

    # **最终转换为 NumPy 数组**
    all_features = np.vstack(all_features)  # 合并所有 batch，形状 (总 batch, num_windows, 15, 10)
    all_labels = np.array(all_labels)  # (总 batch,)

    # **保存**
    np.save(os.path.join(output_folder, "feature_matrix.npy"), all_features)
    np.save(os.path.join(output_folder, "labels.npy"), all_labels)

    print(f"Feature extraction complete! Shape: {all_features.shape}")
    print(f"Labels saved: {all_labels.shape}")
    print(f"Feature matrix saved at: {output_folder}/feature_matrix.npy")
    print(f"Labels saved at: {output_folder}/labels.npy")


def extract_features(segment):
    """
    计算 EMG 和 IMU 的 15 个特征，并拼接成 (15, 10)
    """
    # **分离 EMG (前四列) 和 IMU (后六列)**
    emg_signals = segment[:, :4]  # 4 个 EMG 通道 (200, 4)
    imu_signals = segment[:, 4:]  # 6 个 IMU 通道 (200, 6)

    def compute_emg_features(signal):
        return np.array([
            np.var(signal), np.mean(np.abs(signal)), np.sqrt(np.mean(signal**2)),
            np.std(signal), np.mean(np.abs(np.diff(signal))), np.max(signal),
            np.min(signal), np.sum(np.diff(signal) > 0), np.sum(np.diff(np.sign(signal)) != 0),
            stats.kurtosis(signal), stats.skew(signal), np.sum(np.abs(np.diff(signal))),
            np.sum(signal ** 2), np.log10(np.mean(signal**2) + 1e-10),
            np.mean(welch(signal, fs=1000, nperseg=200)[1])
        ])

    def compute_imu_features(signal):
        return np.array([
            np.var(signal), np.mean(signal), np.sqrt(np.mean(signal**2)),
            np.std(signal), np.max(signal), np.min(signal),
            stats.kurtosis(signal), stats.skew(signal),
            np.mean(np.abs(np.diff(signal))), np.sum(np.abs(np.diff(signal))),
            np.sum(signal ** 2), np.log10(np.mean(signal**2) + 1e-10),
            np.mean(welch(signal, fs=1000, nperseg=200)[1]), np.median(signal), np.ptp(signal)
        ])

    emg_features = np.array([compute_emg_features(emg_signals[:, i]) for i in range(4)]).T  # (15, 4)
    imu_features = np.array([compute_imu_features(imu_signals[:, i]) for i in range(6)]).T  # (15, 6)

    return np.concatenate((emg_features, imu_features), axis=1)  # (15, 10)

# **运行函数**
root = r"data\G"
input_folder = root
output_folder = os.path.join(root, "windowed_data")
shuffle_order_file = os.path.join(root,"shuffle_order.xlsx")
process_emg_folder(input_folder, output_folder, shuffle_order_file)


(15, 26)
Processing sensor_data1.csv (1/15)
Detected 10 channels (excluding time column).


1       -3.775955
2       -0.214248
3       -0.214248
4       -0.214248
           ...   
45715   -0.005459
45716   -0.017740
45717   -0.017740
45718   -0.017740
45719   -0.017740
Name: EMG1, Length: 45720, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -6.768217
2        0.054180
3        0.076403
4        0.054180
           ...   
45715   -0.012488
45716   -0.012488
45717   -0.034711
45718   -0.056934
45719   -0.034711
Name: EMG2, Length: 45720, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -3.806194
2       -0.135337
3       -0.135337
4       -0.135337
           ...   
45715   -0.023345
45716   -0.023345
45717   -0.023345
45718   -0.035788
45719   -0.023345
Name: EMG3, Length: 45720, dtype: float64' has dtype incompatible with int64, pl

Processing sensor_data10.csv (2/15)
Detected 10 channels (excluding time column).


1       -5.279856
2       -0.008469
3       -0.008469
4       -0.008469
           ...   
46195   -0.043035
46196   -0.043035
46197   -0.043035
46198   -0.112168
46199   -0.146735
Name: EMG1, Length: 46200, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -7.967664
2        0.052054
3        0.025846
4        0.025846
           ...   
46195   -0.052779
46196   -0.052779
46197   -0.026571
46198   -0.000362
46199   -0.000362
Name: EMG2, Length: 46200, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -5.075739
2        0.028550
3        0.045340
4        0.028550
           ...   
46195   -0.072193
46196   -0.122564
46197   -0.172935
46198   -0.206516
46199   -0.206516
Name: EMG3, Length: 46200, dtype: float64' has dtype incompatible with int64, pl

Processing sensor_data11.csv (3/15)
Detected 10 channels (excluding time column).


1       -5.222709
2       -0.004360
3       -0.004360
4       -0.004360
           ...   
46091    0.080907
46092    0.063854
46093    0.063854
46094    0.046800
46095    0.063854
Name: EMG1, Length: 46096, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -7.467259
2       -0.052592
3       -0.028041
4       -0.052592
           ...   
46091   -0.003489
46092   -0.003489
46093   -0.003489
46094   -0.003489
46095   -0.003489
Name: EMG2, Length: 46096, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -4.279730
2       -0.110086
3       -0.110086
4       -0.110086
           ...   
46091   -0.054305
46092   -0.054305
46093   -0.054305
46094   -0.054305
46095   -0.054305
Name: EMG3, Length: 46096, dtype: float64' has dtype incompatible with int64, pl

Processing sensor_data12.csv (4/15)
Detected 10 channels (excluding time column).


1       -5.727696
2        0.218083
3        0.218083
4        0.218083
           ...   
45838    1.287198
45839    1.249685
45840    1.193416
45841    1.118391
45842    1.005852
Name: EMG1, Length: 45843, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -7.407128
2       -0.008183
3       -0.008183
4       -0.008183
           ...   
45838    0.040494
45839    0.137849
45840    0.186526
45841    0.259542
45842    0.332558
Name: EMG2, Length: 45843, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -3.519855
2       -0.167930
3       -0.167930
4       -0.167930
           ...   
45838   -0.349729
45839   -0.349729
45840   -0.372454
45841   -0.395179
45842   -0.406541
Name: EMG3, Length: 45843, dtype: float64' has dtype incompatible with int64, pl

Processing sensor_data13.csv (5/15)
Detected 10 channels (excluding time column).


1       -5.565778
2        0.018900
3        0.018900
4        0.018900
           ...   
45864    0.000650
45865   -0.017601
45866   -0.035851
45867   -0.017601
45868   -0.054102
Name: EMG1, Length: 45869, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -8.123906
2       -0.060378
3       -0.060378
4       -0.033678
           ...   
45864   -0.006978
45865   -0.033678
45866   -0.033678
45867   -0.060378
45868   -0.060378
Name: EMG2, Length: 45869, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -3.184999
2       -0.099384
3       -0.089200
4       -0.099384
           ...   
45864   -0.099384
45865   -0.099384
45866   -0.089200
45867   -0.089200
45868   -0.089200
Name: EMG3, Length: 45869, dtype: float64' has dtype incompatible with int64, pl

Processing sensor_data14.csv (6/15)
Detected 10 channels (excluding time column).


1       -6.540702
2       -0.013084
3       -0.034556
4       -0.034556
           ...   
45710    0.029861
45711    0.051334
45712    0.051334
45713    0.051334
45714    0.029861
Name: EMG1, Length: 45715, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -7.784970
2       -0.057567
3       -0.057567
4       -0.031980
           ...   
45710   -0.057567
45711   -0.057567
45712   -0.057567
45713   -0.031980
45714   -0.031980
Name: EMG2, Length: 45715, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -3.021728
2       -0.101993
3       -0.111629
4       -0.101993
           ...   
45710   -0.140537
45711   -0.130901
45712   -0.130901
45713   -0.130901
45714   -0.130901
Name: EMG3, Length: 45715, dtype: float64' has dtype incompatible with int64, pl

Processing sensor_data15.csv (7/15)
Detected 10 channels (excluding time column).


1       -6.199383
2        1.348968
3        1.328622
4        1.328622
           ...   
46094    0.046826
46095    0.026480
46096    0.026480
46097    0.046826
46098    0.026480
Name: EMG1, Length: 46099, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -10.013532
2        -0.132160
3        -0.099222
4        -0.099222
           ...    
46094     0.032529
46095     0.065467
46096     0.065467
46097     0.065467
46098     0.065467
Name: EMG2, Length: 46099, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -4.404701
2        0.075113
3        0.089471
4        0.075113
           ...   
46094   -0.054112
46095   -0.054112
46096   -0.039754
46097   -0.025396
46098   -0.025396
Name: EMG3, Length: 46099, dtype: float64' has dtype incompatible with

Processing sensor_data2.csv (8/15)
Detected 10 channels (excluding time column).


1       -4.256927
2        0.379596
3        0.379596
4        0.379596
           ...   
45993   -0.064622
45994   -0.050740
45995   -0.064622
45996   -0.064622
45997   -0.064622
Name: EMG1, Length: 45998, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -5.987406
2       -0.300576
3       -0.280899
4       -0.280899
           ...   
45993   -0.044767
45994   -0.044767
45995   -0.064445
45996   -0.064445
45997   -0.064445
Name: EMG2, Length: 45998, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -6.257649
2        1.129313
3        1.129313
4        1.129313
           ...   
45993   -0.011931
45994   -0.032681
45995   -0.011931
45996   -0.011931
45997   -0.011931
Name: EMG3, Length: 45998, dtype: float64' has dtype incompatible with int64, pl

Processing sensor_data3.csv (9/15)
Detected 10 channels (excluding time column).


1       -3.960794
2        0.018730
3        0.005851
4        0.018730
           ...   
45586    0.005851
45587    0.018730
45588   -0.019907
45589   -0.019907
45590   -0.019907
Name: EMG1, Length: 45591, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -4.883248
2       -0.027118
3       -0.027118
4       -0.011092
           ...   
45586   -0.027118
45587   -0.043145
45588   -0.043145
45589   -0.043145
45590   -0.011092
Name: EMG2, Length: 45591, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -5.122701
2       -0.013994
3       -0.013994
4       -0.013994
           ...   
45586    0.019951
45587    0.019951
45588    0.019951
45589    0.036923
45590    0.036923
Name: EMG3, Length: 45591, dtype: float64' has dtype incompatible with int64, pl

Processing sensor_data4.csv (10/15)
Detected 10 channels (excluding time column).


1       -4.545249
2       -0.049926
3       -0.064762
4       -0.049926
           ...   
45895    0.024254
45896    0.039090
45897    0.024254
45898    0.024254
45899    0.024254
Name: EMG1, Length: 45900, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -5.977079
2        0.367423
3        0.367423
4        0.387066
           ...   
45895   -0.005783
45896    0.013860
45897    0.013860
45898    0.013860
45899    0.033502
Name: EMG2, Length: 45900, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -8.222593
2        0.291772
3        0.264482
4        0.264482
           ...   
45895   -0.008414
45896   -0.008414
45897   -0.008414
45898   -0.035704
45899   -0.008414
Name: EMG3, Length: 45900, dtype: float64' has dtype incompatible with int64, pl

Processing sensor_data5.csv (11/15)
Detected 10 channels (excluding time column).


1       -4.327092
2        0.049459
3        0.049459
4        0.049459
           ...   
45743    0.021314
45744    0.007242
45745    0.021314
45746    0.007242
45747    0.007242
Name: EMG1, Length: 45748, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -8.380283
2       -0.082293
3       -0.082293
4       -0.082293
           ...   
45743    0.027979
45744    0.027979
45745    0.027979
45746    0.027979
45747    0.000411
Name: EMG2, Length: 45748, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -7.213789
2       -0.058468
3       -0.058468
4       -0.034537
           ...   
45743   -0.106329
45744   -0.130260
45745   -0.130260
45746   -0.106329
45747   -0.130260
Name: EMG3, Length: 45748, dtype: float64' has dtype incompatible with int64, pl

Processing sensor_data6.csv (12/15)
Detected 10 channels (excluding time column).


1       -3.744446
2       -0.025337
3       -0.025337
4       -0.025337
           ...   
45962   -0.001108
45963   -0.001108
45964   -0.025337
45965   -0.025337
45966   -0.013223
Name: EMG1, Length: 45967, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -6.152634
2        0.021264
3        0.041440
4        0.021264
           ...   
45962   -0.119969
45963   -0.119969
45964   -0.119969
45965   -0.099793
45966   -0.099793
Name: EMG2, Length: 45967, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -6.079366
2        0.001534
3        0.001534
4       -0.018602
           ...   
45962    0.061940
45963    0.041804
45964    0.041804
45965    0.041804
45966    0.041804
Name: EMG3, Length: 45967, dtype: float64' has dtype incompatible with int64, pl

Processing sensor_data7.csv (13/15)
Detected 10 channels (excluding time column).


1       -4.453065
2       -0.034803
3       -0.034803
4       -0.034803
           ...   
46164   -0.107234
46165   -0.107234
46166   -0.107234
46167   -0.107234
46168   -0.092748
Name: EMG1, Length: 46169, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -6.632306
2       -0.203781
3       -0.203781
4       -0.181990
           ...   
46164   -0.051240
46165   -0.051240
46166   -0.029448
46167   -0.029448
46168   -0.007657
Name: EMG2, Length: 46169, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -6.749679
2        0.023593
3        0.045947
4        0.045947
           ...   
46164    0.001239
46165    0.001239
46166    0.023593
46167    0.023593
46168    0.023593
Name: EMG3, Length: 46169, dtype: float64' has dtype incompatible with int64, pl

Processing sensor_data8.csv (14/15)
Detected 10 channels (excluding time column).


1       -4.303589
2       -0.032001
3       -0.032001
4       -0.032001
           ...   
45996   -0.088206
45997   -0.088206
45998   -0.088206
45999   -0.102257
46000   -0.088206
Name: EMG1, Length: 46001, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -6.680219
2       -0.269486
3       -0.269486
4       -0.269486
           ...   
45996    0.037878
45997    0.015923
45998    0.015923
45999    0.015923
46000   -0.006031
Name: EMG2, Length: 46001, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -5.888249
2       -0.000472
3       -0.000472
4       -0.000472
           ...   
45996   -0.039464
45997   -0.039464
45998   -0.039464
45999   -0.039464
46000   -0.039464
Name: EMG3, Length: 46001, dtype: float64' has dtype incompatible with int64, pl

Processing sensor_data9.csv (15/15)
Detected 10 channels (excluding time column).


1       -4.303005
2        0.506601
3        0.562690
4        0.604756
           ...   
46333   -0.096353
46334   -0.110375
46335   -0.096353
46336   -0.124397
46337   -0.096353
Name: EMG1, Length: 46338, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -7.636328
2       -0.081067
3       -0.030866
4       -0.005765
           ...   
46333   -0.005765
46334    0.019335
46335    0.019335
46336    0.044436
46337    0.044436
Name: EMG2, Length: 46338, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -6.654441
2        0.534974
3        0.534974
4        0.534974
           ...   
46333    0.049798
46334    0.027745
46335    0.027745
46336    0.027745
46337    0.027745
Name: EMG3, Length: 46338, dtype: float64' has dtype incompatible with int64, pl

Feature extraction complete! Shape: (390, 49, 15, 10)
Labels saved: (390,)
Feature matrix saved at: data\G\windowed_data/feature_matrix.npy
Labels saved at: data\G\windowed_data/labels.npy


In [15]:
import numpy as np
import pandas as pd
import scipy.stats as stats
from scipy.signal import welch
import os

def process_emg_folder(input_folder, output_folder, shuffle_order_file):
    """
    遍历 input_folder 下的所有 CSV 文件，处理 EMG 和 IMU 数据，并保存特征矩阵和标签到 output_folder。
    """
    # 设定采样率
    original_fs = 1000  # 原采样率 Hz
    target_fs = 200  # 目标采样率 Hz
    downsample_factor = original_fs // target_fs  # 降采样因子
    cycle_duration = 10  # 每个周期 10 秒
    skip_seconds = 5  # 跳过前 4 秒
    use_seconds = 5  # 需要保留的秒数

    cycle_samples = (original_fs * cycle_duration) // downsample_factor  # 2000
    skip_samples = (original_fs * skip_seconds) // downsample_factor  # 1000
    use_samples = (original_fs * use_seconds) // downsample_factor  # 1000

    # 滑动窗口参数
    window_size = 100  # 200ms = 40 采样点 (原 200, 降采样后 40)
    step_size = 50  # 100ms = 20 采样点 (原 100, 降采样后 20)
    num_windows = (use_samples - window_size) // step_size + 1  # 计算窗口数

    # 创建存储文件夹
    os.makedirs(output_folder, exist_ok=True)

    # 读取 shuffle_order.xlsx
    shuffle_df = pd.read_excel(shuffle_order_file, engine="openpyxl")
    if shuffle_df.shape[0] < 15:
        raise ValueError("标签文件数据不足 15 组！请检查 `shuffle_order.xlsx`。")

    all_features = []
    all_labels = []
    
    csv_files = sorted([f for f in os.listdir(input_folder) if f.endswith(".csv")])

    for file_idx, file_name in enumerate(csv_files):
        file_path = os.path.join(input_folder, file_name)
        print(f"Processing {file_name} ({file_idx+1}/{len(csv_files)})")

        raw_data = pd.read_csv(file_path)
        num_channels = raw_data.shape[1] - 1
        print(f"Detected {num_channels} channels (excluding time column).")

        # 降采样
        raw_data = raw_data.iloc[::downsample_factor, :].reset_index(drop=True)
        
        # 归一化
        data = raw_data.iloc[:, 1:]
        mean_vals = data.mean(axis=0)
        std_vals = data.std(axis=0)
        normalized_data = (data - mean_vals) / (std_vals + 1e-10)
        raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)

        segments = []
        labels = shuffle_df.iloc[file_idx].values.tolist()
        num_cycles = 26

        for i in range(num_cycles):
            start_idx = i * cycle_samples + skip_samples
            end_idx = start_idx + use_samples

            if end_idx > len(raw_data):
                segment = raw_data.iloc[start_idx:].values
                pad_size = use_samples - len(segment)
                segment = np.pad(segment, ((0, pad_size), (0, 0)), mode='constant', constant_values=0)
            else:
                segment = raw_data.iloc[start_idx:end_idx].values

            windows = [
                segment[j:j + window_size, 1:]
                for j in range(0, use_samples - window_size + 1, step_size)
            ]
            segments.append(np.array(windows))

        segments_array = np.array(segments)
        
        features_batches = []
        for batch_idx in range(segments_array.shape[0]):
            batch_features = []
            for window_idx in range(segments_array.shape[1]):
                window = segments_array[batch_idx, window_idx]
                features = extract_features(window)
                batch_features.append(features)

            features_batches.append(np.array(batch_features))

        features_array = np.array(features_batches)

        all_features.append(features_array)
        all_labels.extend(labels)

    all_features = np.vstack(all_features)
    all_labels = np.array(all_labels)

    np.save(os.path.join(output_folder, "feature_matrix.npy"), all_features)
    np.save(os.path.join(output_folder, "labels.npy"), all_labels)

    print(f"Feature extraction complete! Shape: {all_features.shape}")
    print(f"Labels saved: {all_labels.shape}")
    print(f"Feature matrix saved at: {output_folder}/feature_matrix.npy")
    print(f"Labels saved at: {output_folder}/labels.npy")

def extract_features(segment):
    emg_signals = segment[:, :4]
    imu_signals = segment[:, 4:]

    def compute_emg_features(signal):
        return np.array([
            np.var(signal), np.mean(np.abs(signal)), np.sqrt(np.mean(signal**2)),
            np.std(signal), np.mean(np.abs(np.diff(signal))), np.max(signal),
            np.min(signal), np.sum(np.diff(signal) > 0), np.sum(np.diff(np.sign(signal)) != 0),
            stats.kurtosis(signal), stats.skew(signal), np.sum(np.abs(np.diff(signal))),
            np.sum(signal ** 2), np.log10(np.mean(signal**2) + 1e-10),
            np.mean(welch(signal, fs=200, nperseg=40)[1])
        ])

    def compute_imu_features(signal):
        return np.array([
            np.var(signal), np.mean(signal), np.sqrt(np.mean(signal**2)),
            np.std(signal), np.max(signal), np.min(signal),
            stats.kurtosis(signal), stats.skew(signal),
            np.mean(np.abs(np.diff(signal))), np.sum(np.abs(np.diff(signal))),
            np.sum(signal ** 2), np.log10(np.mean(signal**2) + 1e-10),
            np.mean(welch(signal, fs=200, nperseg=40)[1]), np.median(signal), np.ptp(signal)
        ])

    emg_features = np.array([compute_emg_features(emg_signals[:, i]) for i in range(4)]).T
    imu_features = np.array([compute_imu_features(imu_signals[:, i]) for i in range(6)]).T

    return np.concatenate((emg_features, imu_features), axis=1)

root = r"data\G"
input_folder = root
output_folder = os.path.join(root, "windowed_data")
shuffle_order_file = os.path.join(root, "shuffle_order.xlsx")
process_emg_folder(input_folder, output_folder, shuffle_order_file)

ValueError: 标签文件数据不足 15 组！请检查 `shuffle_order.xlsx`。

In [5]:
import numpy as np
import os
from sklearn.model_selection import train_test_split

def load_data(data_folder, test_size=0.2, random_state=42):
    """
    加载 `feature_matrix.npy` 和 `labels.npy` 数据，并划分训练集和测试集。

    参数：
    - data_folder: 存放数据的文件夹路径
    - test_size: 测试集比例 (默认 20%)
    - random_state: 随机种子，保证可复现性

    返回：
    - X_train: 训练集特征，形状 (train_batches, num_windows, 15, num_channels)
    - X_test: 测试集特征，形状 (test_batches, num_windows, 15, num_channels)
    - y_train: 训练集标签，形状 (train_batches,)
    - y_test: 测试集标签，形状 (test_batches,)
    """
    # **加载数据**
    feature_path = os.path.join(data_folder, "feature_matrix.npy")
    label_path = os.path.join(data_folder, "labels.npy")

    if not os.path.exists(feature_path) or not os.path.exists(label_path):
        raise FileNotFoundError("特征文件或标签文件未找到，请检查路径！")

    X = np.load(feature_path)  # 形状 (num_batches, num_windows, 15, num_channels)
    y = np.load(label_path)  # 形状 (num_batches,)

    # **数据基本信息**
    print(f"Loaded features from {feature_path}, shape: {X.shape}")
    print(f"Loaded labels from {label_path}, shape: {y.shape}")

    # **划分训练集和测试集**
    # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, stratify=y)
    X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=random_state
        )

    # **打印数据划分信息**
    print(f"Training set: X_train: {X_train.shape}, y_train: {y_train.shape}")
    print(f"Testing set: X_test: {X_test.shape}, y_test: {y_test.shape}")

    return X_train, X_test, y_train, y_test

# **使用示例**
data_folder = r"E:\MSC\Spring\AML\GestureLink\data\G\windowed_data"
X_train, X_test, y_train, y_test = load_data(data_folder)



Loaded features from E:\MSC\Spring\AML\GestureLink\data\G\windowed_data\feature_matrix.npy, shape: (390, 19, 15, 10)
Loaded labels from E:\MSC\Spring\AML\GestureLink\data\G\windowed_data\labels.npy, shape: (390,)
Training set: X_train: (312, 19, 15, 10), y_train: (312,)
Testing set: X_test: (78, 19, 15, 10), y_test: (78,)


----------------------------

# 处理分开的数据

In [23]:
import numpy as np
import pandas as pd
import os
import scipy.stats as stats
from scipy.signal import welch

def process_emg_folder(input_folder, output_folder, shuffle_order_file):
    """
    处理 EMG 数据文件，先按照 shuffle_order.xlsx 划分手势数据，
    然后进行窗口切分，并提取特征。
    """
    fs = 1000  # 采样率 1000Hz，每秒 1000 个数据点
    target_samples = 5000  # 每个手势的固定采样点数
    window_size = 200  # 窗口大小 400 采样点（0.4s）
    step_size = 100  # 窗口滑动步长 200 采样点（0.2s）

    # 创建输出文件夹
    os.makedirs(output_folder, exist_ok=True)

    # 读取 shuffle_order.xlsx
    shuffle_df = pd.read_excel(shuffle_order_file, engine="openpyxl")
    if shuffle_df.shape[1] < 26:
        raise ValueError("标签文件数据不足 26 组！请检查 `shuffle_order.xlsx`。")

    all_features = []
    all_labels = []
    csv_files = sorted([f for f in os.listdir(input_folder) if f.endswith(".csv")])

    for file_idx, file_name in enumerate(csv_files):
        file_path = os.path.join(input_folder, file_name)
        print(f"Processing {file_name} ({file_idx + 1}/{len(csv_files)})")

        # 读取数据
        raw_data = pd.read_csv(file_path)
        num_channels = raw_data.shape[1] - 1  # 去掉时间列
        print(f"Detected {num_channels} channels (excluding time column).")

        # 归一化数据
        data = raw_data.iloc[:, 1:]
        normalized_data = (data - data.mean(axis=0)) / (data.std(axis=0) + 1e-10)
        raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)

        # 按 shuffle_order.xlsx 划分 26 组手势
        valid_segments = []
        for i in range(26):
            start_idx = shuffle_df.iloc[file_idx, i]  # 该手势起始索引
            if start_idx + target_samples <= len(raw_data):
                segment = raw_data.iloc[start_idx:start_idx + target_samples, 1:].values
            else:
                segment = raw_data.iloc[start_idx:, 1:].values
                pad_size = target_samples - len(segment)
                segment = np.pad(segment, ((0, pad_size), (0, 0)), mode='constant', constant_values=0)
            valid_segments.append(segment)

        valid_segments = np.array(valid_segments)  # 形状 (26, 5000, num_channels)

        # 进行窗口划分和特征提取
        features_batches = []
        for seg in valid_segments:
            windows = [seg[j:j + window_size, :] for j in range(0, target_samples - window_size + 1, step_size)]
            batch_features = np.array([extract_features(window) for window in windows])  # (num_windows, 15, num_channels)
            features_batches.append(batch_features)

        processed_segments = np.array(features_batches)  # (26, num_windows, 15, num_channels)

        if processed_segments.shape[0] != 26:
            print(f"Warning: {file_name} 提取到 {processed_segments.shape[0]} 个手势，不是 26 个。")

        # 存储数据
        all_features.append(processed_segments)
        all_labels.extend(shuffle_df.iloc[file_idx, :26].values.tolist())

    # 最终转换为 NumPy 数组
    all_features = np.vstack(all_features)  # (总手势数, num_windows, 15, num_channels)
    all_labels = np.array(all_labels)  # (总手势数,)

    # 保存数据
    np.save(os.path.join(output_folder, "feature_matrix.npy"), all_features)
    np.save(os.path.join(output_folder, "labels.npy"), all_labels)

    print(f"Feature extraction complete! Shape: {all_features.shape}")
    print(f"Labels saved: {all_labels.shape}")
    print(f"Feature matrix saved at: {output_folder}/feature_matrix.npy")
    print(f"Labels saved at: {output_folder}/labels.npy")

def extract_features(segment):
    """
    计算 EMG 和 IMU 的特征。
    """
    emg_signals = segment[:, :4]  # 4 个 EMG 通道
    imu_signals = segment[:, 4:]  # 6 个 IMU 通道

    def compute_emg_features(signal):
        return np.array([
            np.var(signal), np.mean(np.abs(signal)), np.sqrt(np.mean(signal**2)),
            np.std(signal), np.mean(np.abs(np.diff(signal))), np.max(signal),
            np.min(signal), np.sum(np.diff(signal) > 0), np.sum(np.diff(np.sign(signal)) != 0),
            stats.kurtosis(signal), stats.skew(signal), np.sum(np.abs(np.diff(signal))),
            np.sum(signal ** 2), np.log10(np.mean(signal**2) + 1e-10),
            np.mean(welch(signal, fs=1000, nperseg=200)[1])
        ])

    def compute_imu_features(signal):
        return np.array([
            np.var(signal), np.mean(signal), np.sqrt(np.mean(signal**2)),
            np.std(signal), np.max(signal), np.min(signal),
            stats.kurtosis(signal), stats.skew(signal),
            np.mean(np.abs(np.diff(signal))), np.sum(np.abs(np.diff(signal))),
            np.sum(signal ** 2), np.log10(np.mean(signal**2) + 1e-10),
            np.mean(welch(signal, fs=1000, nperseg=200)[1]), np.median(signal), np.ptp(signal)
        ])

    emg_features = np.array([compute_emg_features(emg_signals[:, i]) for i in range(4)]).T  # (15, 4)
    imu_features = np.array([compute_imu_features(imu_signals[:, i]) for i in range(6)]).T  # (15, 6)

    return np.concatenate((emg_features, imu_features), axis=1)  # (15, 10)

# 运行代码
root = r"data\FZH"
input_folder = root
output_folder = os.path.join(root, "processed_data")
shuffle_order_file = os.path.join(root, "shuffle_order.xlsx")
process_emg_folder(input_folder, output_folder, shuffle_order_file)

Processing sensor_data1.csv (1/15)
Detected 10 channels (excluding time column).


1       -0.033795
2       -0.033795
3       -0.033795
4       -0.033795
           ...   
22275   -0.033795
22276   -0.033795
22277   -0.033795
22278   -0.033795
22279   -0.033795
Name: 303, Length: 22280, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.132256
2       -0.114027
3       -0.123142
4       -0.132256
           ...   
22275    1.079998
22276    1.079998
22277    1.061769
22278    1.061769
22279    1.070883
Name: 293, Length: 22280, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.262535
2       -0.227800
3       -0.253851
4       -0.271218
           ...   
22275    0.223756
22276    0.258491
22277    0.293226
22278    0.362697
22279    0.414799
Name: 271, Length: 22280, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data10.csv (2/15)
Detected 10 channels (excluding time column).


1       -0.333118
2       -1.149433
3       -1.965747
4       -0.333118
           ...   
22307    2.115824
22308    2.115824
22309    2.115824
22310    2.115824
22311    2.115824
Name: 305, Length: 22312, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.964646
2       -1.178968
3       -1.250408
4       -1.321849
           ...   
22307    1.921554
22308    2.078723
22309    2.193028
22310    2.293045
22311    2.407350
Name: 272, Length: 22312, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.142334
2        0.028957
3       -0.068224
4       -0.116814
           ...   
22307    0.223318
22308    0.223318
22309    0.239515
22310    0.271908
22311    0.304302
Name: 323, Length: 22312, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data11.csv (3/15)
Detected 10 channels (excluding time column).


1       -0.832400
2       -0.832400
3       -0.832400
4       -1.554049
           ...   
22312   -1.554049
22313   -1.554049
22314   -1.554049
22315   -1.554049
22316   -1.554049
Name: 305, Length: 22317, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.078749
2        0.066240
3        0.066240
4        0.078749
           ...   
22312    1.117040
22313    1.092021
22314    1.016964
22315    0.954416
22316    0.854340
Name: 317, Length: 22317, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.542718
2       -0.444232
3       -0.378574
4       -0.280087
           ...   
22312   -0.001042
22313   -0.050285
22314   -0.083114
22315   -0.083114
22316   -0.115943
Name: 266, Length: 22317, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data12.csv (4/15)
Detected 10 channels (excluding time column).


1       -2.270786
2       -2.270786
3       -2.270786
4       -1.618301
           ...   
22304    0.339156
22305   -0.965815
22306   -0.965815
22307   -0.313330
22308   -0.313330
Name: 304, Length: 22309, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.041594
2        0.041594
3        0.041594
4        0.030784
           ...   
22304   -0.044889
22305   -0.012457
22306   -0.001647
22307   -0.012457
22308   -0.001647
Name: 312, Length: 22309, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.018306
2        0.010806
3       -0.018306
4       -0.032862
           ...   
22304    0.039918
22305    0.039918
22306    0.025362
22307   -0.018306
22308   -0.018306
Name: 303, Length: 22309, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data13.csv (5/15)
Detected 10 channels (excluding time column).


1       -0.080190
2       -0.080190
3       -0.080190
4       -0.491295
           ...   
22260   -0.902400
22261   -0.491295
22262   -1.313505
22263   -1.313505
22264   -0.902400
Name: 306, Length: 22265, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        1.287102
2        1.428335
3        1.569569
4        1.651956
           ...   
22260    1.192946
22261    0.757475
22262    0.439698
22263    0.086614
22264   -0.266471
Name: 395, Length: 22265, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.173559
2        0.290442
3        0.407325
4        0.465766
           ...   
22260    0.217390
22261    0.217390
22262    0.232000
22263    0.275831
22264    0.275831
Name: 300, Length: 22265, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data14.csv (6/15)
Detected 10 channels (excluding time column).


1       -1.962066
2       -0.660634
3       -0.660634
4        0.640797
           ...   
22301   -0.660634
22302   -0.660634
22303    0.640797
22304   -0.660634
22305   -0.660634
Name: 304, Length: 22306, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.391314
2        0.430486
3        0.469657
4        0.456600
           ...   
22301    0.025714
22302   -0.000400
22303   -0.026515
22304   -0.000400
22305   -0.000400
Name: 329, Length: 22306, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.603585
2       -0.625398
3       -0.647210
4       -0.669023
           ...   
22301   -0.036461
22302   -0.058274
22303   -0.036461
22304    0.007164
22305   -0.014649
Name: 284, Length: 22306, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data15.csv (7/15)
Detected 10 channels (excluding time column).


1        0.709376
2        0.709376
3        0.709376
4       -1.018934
           ...   
22456   -1.018934
22457    0.709376
22458    0.709376
22459   -1.018934
22460    0.709376
Name: 305, Length: 22461, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.429730
2       -0.367977
3       -0.330925
4       -0.306224
           ...   
22456   -0.022162
22457   -0.046863
22458   -0.034513
22459   -0.034513
22460   -0.046863
Name: 269, Length: 22461, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.205028
2        0.205028
3        0.156892
4        0.132824
           ...   
22456    0.012484
22457   -0.011585
22458    0.036552
22459    0.036552
22460    0.012484
Name: 310, Length: 22461, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data2.csv (8/15)
Detected 10 channels (excluding time column).


1        1.176720
2       -0.123785
3        1.176720
4        1.176720
           ...   
22121   -0.123785
22122   -0.123785
22123   -0.123785
22124   -0.123785
22125   -0.123785
Name: 306, Length: 22126, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.103353
2        0.021503
3       -0.019423
4        0.062428
           ...   
22121    0.072659
22122    0.093122
22123    0.082891
22124    0.103353
22125    0.134047
Name: 313, Length: 22126, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.071769
2        0.023573
3        0.023573
4        0.071769
           ...   
22121    0.059720
22122    0.083818
22123    0.095866
22124    0.119964
22125    0.144062
Name: 302, Length: 22126, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data3.csv (9/15)
Detected 10 channels (excluding time column).


1        1.855306
2       -0.073787
3       -0.073787
4       -0.073787
           ...   
22296    1.855306
22297   -0.073787
22298   -0.073787
22299   -2.002879
22300   -0.073787
Name: 304, Length: 22301, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.004799
2        0.004799
3        0.029941
4        0.029941
           ...   
22296   -0.083195
22297   -0.083195
22298   -0.070625
22299   -0.070625
22300   -0.108337
Name: 305, Length: 22301, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.042013
2        0.042013
3        0.053538
4        0.065062
           ...   
22296    0.088111
22297    0.076587
22298    0.099635
22299    0.076587
22300    0.065062
Name: 305.1, Length: 22301, dtype: float64' has dtype incompatible with int64, ple

Processing sensor_data4.csv (10/15)
Detected 10 channels (excluding time column).


1       -1.382926
2       -0.338009
3       -0.338009
4       -0.338009
           ...   
22219   -1.382926
22220   -1.382926
22221   -0.338009
22222   -0.338009
22223   -1.382926
Name: 304, Length: 22224, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.082638
2        0.082638
3        0.106773
4        0.138955
           ...   
22219    1.064162
22220    1.056117
22221    1.048071
22222    1.023935
22223    0.975664
Name: 318, Length: 22224, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.229847
2        0.003291
3       -0.212967
4       -0.408629
           ...   
22219    0.301932
22220    0.271039
22221    0.250443
22222    0.229847
22223    0.198953
Name: 364, Length: 22224, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data5.csv (11/15)
Detected 10 channels (excluding time column).


1        0.489099
2        0.489099
3        0.489099
4        0.489099
           ...   
22107    0.489099
22108   -1.385894
22109   -1.385894
22110   -1.385894
22111    0.489099
Name: 303, Length: 22112, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        5.955064
2        6.160061
3        6.321901
4        6.397427
           ...   
22107    0.344603
22108    0.376971
22109    0.376971
22110    0.366182
22111    0.355392
Name: 804, Length: 22112, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        3.733359
2        3.474104
3        3.153848
4        2.894594
           ...   
22107   -0.612969
22108   -0.536718
22109   -0.490967
22110   -0.445216
22111   -0.353714
Name: 590, Length: 22112, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data6.csv (12/15)
Detected 10 channels (excluding time column).


1       -1.809805
2       -0.800597
3        0.208610
4       -0.800597
           ...   
22181    1.217818
22182    0.208610
22183    1.217818
22184    0.208610
22185    0.208610
Name: 303, Length: 22186, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.066430
2        0.042341
3        0.030296
4        0.066430
           ...   
22181    0.090519
22182    0.078474
22183    0.066430
22184    0.078474
22185    0.066430
Name: 310, Length: 22186, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.003142
2       -0.009509
3       -0.009509
4        0.003142
           ...   
22181    0.116995
22182    0.104345
22183    0.104345
22184    0.129646
22185    0.129646
Name: 300, Length: 22186, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data7.csv (13/15)
Detected 10 channels (excluding time column).


1       -0.212562
2        1.007459
3       -0.212562
4        1.007459
           ...   
22081    1.007459
22082    1.007459
22083    1.007459
22084   -0.212562
22085    1.007459
Name: 305, Length: 22086, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.004996
2       -0.006915
3       -0.006915
4       -0.042649
           ...   
22081   -0.030738
22082   -0.006915
22083   -0.006915
22084   -0.006915
22085   -0.006915
Name: 310, Length: 22086, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.001635
2        0.009678
3        0.020991
4        0.009678
           ...   
22081    0.020991
22082    0.032304
22083    0.020991
22084    0.020991
22085    0.032304
Name: 301, Length: 22086, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data8.csv (14/15)
Detected 10 channels (excluding time column).


1       -0.105940
2       -0.105940
3       -0.105940
4       -0.105940
           ...   
22182    0.969801
22183   -0.105940
22184   -0.105940
22185   -0.105940
22186   -0.105940
Name: 306, Length: 22187, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.085506
2        0.071930
3        0.071930
4        0.099082
           ...   
22182    0.126235
22183    0.112659
22184    0.112659
22185    0.139811
22186    0.139811
Name: 309, Length: 22187, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.221709
2       -0.276146
3       -0.258001
4       -0.221709
           ...   
22182    0.050477
22183    0.014186
22184    0.032331
22185    0.068623
22186    0.086769
Name: 292, Length: 22187, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data9.csv (15/15)
Detected 10 channels (excluding time column).


1       -0.327510
2       -0.327510
3        0.968289
4       -0.327510
           ...   
22377   -0.327510
22378   -0.327510
22379   -0.327510
22380    0.968289
22381    0.968289
Name: 306, Length: 22382, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.091994
2       -0.091994
3       -0.091994
4       -0.079637
           ...   
22377   -0.549196
22378   -0.598623
22379   -0.623337
22380   -0.635694
22381   -0.685121
Name: 300, Length: 22382, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.163314
2        0.146247
3        0.112113
4        0.077980
           ...   
22377   -2.618566
22378   -2.550299
22379   -2.499099
22380   -2.328432
22381   -2.140697
Name: 310, Length: 22382, dtype: float64' has dtype incompatible with int64, pleas

Feature extraction complete! Shape: (390, 49, 15, 10)
Labels saved: (390,)
Feature matrix saved at: data\FZH\processed_data/feature_matrix.npy
Labels saved at: data\FZH\processed_data/labels.npy


In [12]:
import numpy as np
import os
from sklearn.model_selection import train_test_split

def load_data(data_folder, test_size=0.2, random_state=42):
    """
    加载 `feature_matrix.npy` 和 `labels.npy` 数据，并划分训练集和测试集。

    参数：
    - data_folder: 存放数据的文件夹路径
    - test_size: 测试集比例 (默认 20%)
    - random_state: 随机种子，保证可复现性

    返回：
    - X_train: 训练集特征，形状 (train_batches, num_windows, 15, num_channels)
    - X_test: 测试集特征，形状 (test_batches, num_windows, 15, num_channels)
    - y_train: 训练集标签，形状 (train_batches,)
    - y_test: 测试集标签，形状 (test_batches,)
    """
    # **加载数据**
    feature_path = os.path.join(data_folder, "feature_matrix.npy")
    label_path = os.path.join(data_folder, "labels.npy")

    if not os.path.exists(feature_path) or not os.path.exists(label_path):
        raise FileNotFoundError("特征文件或标签文件未找到，请检查路径！")

    X = np.load(feature_path)  # 形状 (num_batches, num_windows, 15, num_channels)
    y = np.load(label_path)  # 形状 (num_batches,)

    # **数据基本信息**
    print(f"Loaded features from {feature_path}, shape: {X.shape}")
    print(f"Loaded labels from {label_path}, shape: {y.shape}")

    # **划分训练集和测试集**
    # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, stratify=y)
    X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=random_state
        )

    # **打印数据划分信息**
    print(f"Training set: X_train: {X_train.shape}, y_train: {y_train.shape}")
    print(f"Testing set: X_test: {X_test.shape}, y_test: {y_test.shape}")

    return X_train, X_test, y_train, y_test

# **使用示例**
data_folder = r"E:\MSC\Spring\AML\GestureLink\data\FZH\processed_data"
X_train, X_test, y_train, y_test = load_data(data_folder)



Loaded features from E:\MSC\Spring\AML\GestureLink\data\FZH\processed_data\feature_matrix.npy, shape: (390, 49, 15, 10)
Loaded labels from E:\MSC\Spring\AML\GestureLink\data\FZH\processed_data\labels.npy, shape: (390,)
Training set: X_train: (312, 49, 15, 10), y_train: (312,)
Testing set: X_test: (78, 49, 15, 10), y_test: (78,)


In [10]:
import numpy as np
import pandas as pd
import os
import scipy.stats as stats
from scipy.signal import welch
import os
import re

def process_emg_folder(input_folder, output_folder, shuffle_order_file):
    """
    处理 EMG 数据文件，先按照 shuffle_order.xlsx 划分手势数据，
    然后进行窗口切分，并提取特征。
    """
    fs = 1000  # 采样率 1000Hz，每秒 1000 个数据点
    target_samples = 5000  # 每个手势的固定采样点数
    window_size = 200  # 窗口大小 400 采样点（0.4s）
    step_size = 100  # 窗口滑动步长 200 采样点（0.2s）

    # 创建输出文件夹
    os.makedirs(output_folder, exist_ok=True)

    # 读取 shuffle_order.xlsx
    shuffle_df = pd.read_excel(shuffle_order_file, engine="openpyxl")
    if shuffle_df.shape[1] < 26:
        raise ValueError("标签文件数据不足 26 组！请检查 `shuffle_order.xlsx`。")

    all_features = []
    all_labels = []
    # csv_files = sorted([f for f in os.listdir(input_folder) if f.endswith(".csv")])


    csv_files = sorted(
        [f for f in os.listdir(input_folder) if f.startswith("sensor_data") and f.endswith(".csv")],
        key=lambda x: int(re.search(r"sensor_data(\d+)\.csv", x).group(1))  # 按数字排序
    )
    
    for file_idx, file_name in enumerate(csv_files):
        file_path = os.path.join(input_folder, file_name)
        print(f"Processing {file_name} ({file_idx + 1}/{len(csv_files)})")

        # 读取数据
        raw_data = pd.read_csv(file_path)
        num_channels = raw_data.shape[1] - 1  # 去掉时间列
        print(f"Detected {num_channels} channels (excluding time column).")

        # 归一化数据
        data = raw_data.iloc[:, 1:]
        normalized_data = (data - data.mean(axis=0)) / (data.std(axis=0) + 1e-10)
        raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)

        # 按 shuffle_order.xlsx 划分 26 组手势
        valid_segments = []
        for i in range(0,26):
            start_idx = shuffle_df.iloc[file_idx+1, i]  # 该手势起始索引
            print(start_idx,file_idx+1)
            if start_idx + target_samples <= len(raw_data):
                segment = raw_data.iloc[start_idx:start_idx + target_samples, 1:].values
            else:
                segment = raw_data.iloc[start_idx:, 1:].values
                pad_size = target_samples - len(segment)
                segment = np.pad(segment, ((0, pad_size), (0, 0)), mode='constant', constant_values=0)
            valid_segments.append(segment)

        valid_segments = np.array(valid_segments)  # 形状 (26, 5000, num_channels)

        # 进行窗口划分和特征提取
        features_batches = []
        for seg in valid_segments:
            windows = [seg[j:j + window_size, :] for j in range(0, target_samples - window_size + 1, step_size)]
            batch_features = np.array([extract_features(window) for window in windows])  # (num_windows, 15, num_channels)
            features_batches.append(batch_features)

        processed_segments = np.array(features_batches)  # (26, num_windows, 15, num_channels)

        if processed_segments.shape[0] != 26:
            print(f"Warning: {file_name} 提取到 {processed_segments.shape[0]} 个手势，不是 26 个。")

        # 存储数据
        all_features.append(processed_segments)
        all_labels.extend(shuffle_df.iloc[file_idx, :26].values.tolist())

    # 最终转换为 NumPy 数组
    all_features = np.vstack(all_features)  # (总手势数, num_windows, 15, num_channels)
    all_labels = np.array(all_labels)  # (总手势数,)
    print(all_labels)

    # 保存数据
    np.save(os.path.join(output_folder, "feature_matrix.npy"), all_features)
    np.save(os.path.join(output_folder, "labels.npy"), all_labels)

    print(f"Feature extraction complete! Shape: {all_features.shape}")
    print(f"Labels saved: {all_labels.shape}")
    print(f"Feature matrix saved at: {output_folder}/feature_matrix.npy")
    print(f"Labels saved at: {output_folder}/labels.npy")

def extract_features(segment):
    """
    计算 EMG 和 IMU 的特征。
    """
    emg_signals = segment[:, :4]  # 4 个 EMG 通道
    imu_signals = segment[:, 4:]  # 6 个 IMU 通道

    def compute_emg_features(signal):
        return np.array([
            np.var(signal), np.mean(np.abs(signal)), np.sqrt(np.mean(signal**2)),
            np.std(signal), np.mean(np.abs(np.diff(signal))), np.max(signal),
            np.min(signal), np.sum(np.diff(signal) > 0), np.sum(np.diff(np.sign(signal)) != 0),
            stats.kurtosis(signal), stats.skew(signal), np.sum(np.abs(np.diff(signal))),
            np.sum(signal ** 2), np.log10(np.mean(signal**2) + 1e-10),
            np.mean(welch(signal, fs=1000, nperseg=200)[1])
        ])

    def compute_imu_features(signal):
        return np.array([
            np.var(signal), np.mean(signal), np.sqrt(np.mean(signal**2)),
            np.std(signal), np.max(signal), np.min(signal),
            stats.kurtosis(signal), stats.skew(signal),
            np.mean(np.abs(np.diff(signal))), np.sum(np.abs(np.diff(signal))),
            np.sum(signal ** 2), np.log10(np.mean(signal**2) + 1e-10),
            np.mean(welch(signal, fs=1000, nperseg=200)[1]), np.median(signal), np.ptp(signal)
        ])

    emg_features = np.array([compute_emg_features(emg_signals[:, i]) for i in range(4)]).T  # (15, 4)
    imu_features = np.array([compute_imu_features(imu_signals[:, i]) for i in range(6)]).T  # (15, 6)

    return np.concatenate((emg_features, imu_features), axis=1)  # (15, 10)

# 运行代码
root = r"data\WXR"
input_folder = root
output_folder = os.path.join(root, "processed_data")
shuffle_order_file = os.path.join(root, "shuffle_order.xlsx")
process_emg_folder(input_folder, output_folder, shuffle_order_file)

Processing sensor_data1.csv (1/15)
Detected 10 channels (excluding time column).


1       -0.079371
2       -0.087674
3       -0.104279
4       -0.079371
           ...   
22264   -0.411475
22265   -0.428080
22266   -0.428080
22267   -0.461290
22268   -0.469593
Name: 303, Length: 22269, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.097321
2       -0.113057
3       -0.148461
4       -0.124858
           ...   
22264   -0.286144
22265   -0.266475
22266   -0.266475
22267   -0.309747
22268   -0.313680
Name: 306, Length: 22269, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.064089
2       -0.064089
3       -0.064089
4       -0.064089
           ...   
22264    0.308394
22265    0.330305
22266    0.330305
22267    0.352215
22268    0.374126
Name: 303.1, Length: 22269, dtype: float64' has dtype incompatible with int64, ple

Processing sensor_data2.csv (2/15)
Detected 10 channels (excluding time column).


1        0.449669
2        0.459540
3        0.508897
4        0.538511
           ...   
22247   -0.191971
22248   -0.182100
22249   -0.211714
22250   -0.182100
22251   -0.172228
Name: 345, Length: 22252, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.035294
2       -0.037810
3       -0.008569
4        0.049915
           ...   
22247   -0.266870
22248   -0.223007
22249   -0.286364
22250   -0.276617
22251   -0.223007
Name: 333, Length: 22252, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.108070
2        0.129558
3        0.140301
4        0.129558
           ...   
22247   -0.171268
22248   -0.160524
22249   -0.171268
22250   -0.171268
22251   -0.182012
Name: 321, Length: 22252, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data3.csv (3/15)
Detected 10 channels (excluding time column).


1        0.255395
2        0.158106
3       -0.006536
4       -0.253500
           ...   
22138    0.075785
22139    0.075785
22140    0.068301
22141    0.060817
22142    0.060817
Name: 372, Length: 22143, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        1.136253
2        1.143503
3        1.143503
4        1.172499
           ...   
22138   -0.038111
22139   -0.023613
22140   -0.023613
22141   -0.030862
22142   -0.016363
Name: 464, Length: 22143, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.627769
2       -0.968112
3       -1.441634
4       -1.885560
           ...   
22138   -0.169045
22139   -0.169045
22140   -0.169045
22141   -0.169045
22142   -0.139450
Name: 284, Length: 22143, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data4.csv (4/15)
Detected 10 channels (excluding time column).


1       -0.498903
2       -0.507505
3       -0.507505
4       -0.490301
           ...   
22331   -0.172009
22332   -0.172009
22333   -0.146202
22334   -0.128997
22335   -0.111792
Name: 256, Length: 22336, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -1.326679
2       -1.443499
3       -1.560319
4       -1.636252
           ...   
22331    0.040114
22332    0.034273
22333    0.034273
22334    0.034273
22335    0.022591
Name: 125, Length: 22336, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.359198
2       -0.388443
3       -0.388443
4       -0.373820
           ...   
22331    0.445042
22332    0.445042
22333    0.474287
22334    0.488910
22335    0.474287
Name: 280, Length: 22336, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data5.csv (5/15)
Detected 10 channels (excluding time column).


1       -0.002834
2       -0.002834
3        0.012783
4        0.059632
           ...   
22237   -0.112151
22238   -0.119959
22239   -0.135576
22240   -0.135576
22241   -0.135576
Name: 313, Length: 22242, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.016448
2        0.001248
3        0.016448
4        0.031649
           ...   
22237   -0.348364
22238   -0.355964
22239   -0.363565
22240   -0.363565
22241   -0.371165
Name: 313.1, Length: 22242, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.016468
2       -0.046039
3       -0.060824
4       -0.046039
           ...   
22237   -0.208676
22238   -0.223461
22239   -0.253031
22240   -0.253031
22241   -0.253031
Name: 304, Length: 22242, dtype: float64' has dtype incompatible with int64, ple

Processing sensor_data6.csv (6/15)
Detected 10 channels (excluding time column).


1       -0.048303
2       -0.056093
3       -0.048303
4       -0.048303
           ...   
22192   -0.289790
22193   -0.305370
22194   -0.336529
22195   -0.359899
22196   -0.383269
Name: 307, Length: 22197, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.119696
2       -0.112319
3       -0.112319
4       -0.112319
           ...   
22192   -0.274611
22193   -0.252481
22194   -0.237727
22195   -0.215596
22196   -0.178711
Name: 304, Length: 22197, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.048477
2       -0.061902
3       -0.048477
4       -0.035052
           ...   
22192   -0.129028
22193   -0.129028
22194   -0.155878
22195   -0.155878
22196   -0.155878
Name: 302, Length: 22197, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data7.csv (7/15)
Detected 10 channels (excluding time column).


1       -0.177391
2       -0.169907
3       -0.162424
4       -0.177391
           ...   
22218   -0.035210
22219   -0.027727
22220   -0.035210
22221   -0.042693
22222   -0.065143
Name: 299, Length: 22223, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.131056
2       -0.125940
3       -0.131056
4       -0.136172
           ...   
22218   -0.120824
22219   -0.125940
22220   -0.131056
22221   -0.141289
22222   -0.141289
Name: 308, Length: 22223, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.047455
2       -0.047455
3       -0.062051
4       -0.091241
           ...   
22218    0.463375
22219    0.390399
22220    0.317424
22221    0.200662
22222    0.083901
Name: 303, Length: 22223, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data8.csv (8/15)
Detected 10 channels (excluding time column).


1       -0.382097
2       -0.387642
3       -0.382097
4       -0.365461
           ...   
22211   -0.149190
22212   -0.165827
22213   -0.160281
22214   -0.160281
22215   -0.160281
Name: 267, Length: 22216, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.032784
2       -0.047240
3       -0.047240
4       -0.054468
           ...   
22211   -0.343597
22212   -0.358054
22213   -0.365282
22214   -0.372510
22215   -0.379738
Name: 314, Length: 22216, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.003088
2       -0.022161
3       -0.031697
4       -0.022161
           ...   
22211   -0.165205
22212   -0.184277
22213   -0.193814
22214   -0.203350
22215   -0.212886
Name: 315, Length: 22216, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data9.csv (9/15)
Detected 10 channels (excluding time column).


1       -0.219357
2       -0.251248
3       -0.283139
4       -0.315030
           ...   
22258   -0.212979
22259   -0.232114
22260   -0.244870
22261   -0.251248
22262   -0.264004
Name: 298, Length: 22263, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.080154
2       -0.068002
3       -0.068002
4       -0.055850
           ...   
22258   -0.104459
22259   -0.110535
22260   -0.110535
22261   -0.110535
22262   -0.110535
Name: 311, Length: 22263, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.092940
2        0.092940
3        0.069768
4        0.069768
           ...   
22258   -0.080853
22259   -0.069267
22260   -0.080853
22261   -0.080853
22262   -0.069267
Name: 315, Length: 22263, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data10.csv (10/15)
Detected 10 channels (excluding time column).


1        0.064586
2        0.080977
3        0.089172
4        0.113758
           ...   
22222   -0.115711
22223   -0.107516
22224   -0.107516
22225   -0.140297
22226   -0.140297
Name: 316, Length: 22227, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.016873
2       -0.011477
3       -0.038455
4       -0.054641
           ...   
22222    0.037082
22223    0.053268
22224    0.064059
22225    0.069455
22226    0.080246
Name: 317, Length: 22227, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.071136
2       -0.060755
3       -0.060755
4       -0.060755
           ...   
22222   -0.060755
22223   -0.050373
22224   -0.050373
22225   -0.071136
22226   -0.060755
Name: 304, Length: 22227, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data11.csv (11/15)
Detected 10 channels (excluding time column).


1       -0.071966
2       -0.080680
3       -0.098108
4       -0.098108
           ...   
22211   -0.019683
22212   -0.010969
22213   -0.002255
22214   -0.002255
22215   -0.002255
Name: 304, Length: 22216, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.076479
2       -0.076479
3       -0.081733
4       -0.086987
           ...   
22211   -0.076479
22212   -0.071225
22213   -0.071225
22214   -0.071225
22215   -0.065971
Name: 306, Length: 22216, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.072709
2       -0.062304
3       -0.072709
4       -0.072709
           ...   
22211   -0.083113
22212   -0.083113
22213   -0.083113
22214   -0.093517
22215   -0.103921
Name: 306.1, Length: 22216, dtype: float64' has dtype incompatible with int64, ple

Processing sensor_data12.csv (12/15)
Detected 10 channels (excluding time column).


1       -0.405947
2       -0.413127
3       -0.420307
4       -0.413127
           ...   
22168   -0.018205
22169   -0.003844
22170    0.010517
22171    0.017697
22172    0.024877
Name: 258, Length: 22173, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.082217
2       -0.087149
3       -0.082217
4       -0.052624
           ...   
22168   -0.111810
22169   -0.121674
22170   -0.126607
22171   -0.131539
22172   -0.136471
Name: 302, Length: 22173, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.000410
2        0.007619
3        0.015649
4        0.039737
           ...   
22168   -0.257355
22169   -0.273414
22170   -0.265385
22171   -0.249325
22172   -0.249325
Name: 319, Length: 22173, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data13.csv (13/15)
Detected 10 channels (excluding time column).


1        0.010127
2        0.015803
3        0.015803
4        0.027155
           ...   
22210   -0.154469
22211   -0.148793
22212   -0.143118
22213   -0.143118
22214   -0.137442
Name: 327, Length: 22215, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.043921
2        0.029004
3        0.024031
4       -0.000830
           ...   
22210   -0.130112
22211   -0.120167
22212   -0.120167
22213   -0.110223
22214   -0.105250
Name: 332, Length: 22215, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.176467
2       -0.176467
3       -0.162313
4       -0.169390
           ...   
22210   -0.183544
22211   -0.183544
22212   -0.197698
22213   -0.183544
22214   -0.183544
Name: 303, Length: 22215, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data14.csv (14/15)
Detected 10 channels (excluding time column).


1       -0.340880
2       -0.333884
3       -0.319892
4       -0.319892
           ...   
22165   -0.228942
22166   -0.214949
22167   -0.228942
22168   -0.235938
22169   -0.228942
Name: 272, Length: 22170, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1        0.119284
2        0.119284
3        0.127093
4        0.127093
           ...   
22165   -0.286769
22166   -0.278960
22167   -0.278960
22168   -0.271151
22169   -0.263343
Name: 328, Length: 22170, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.534017
2       -0.567411
3       -0.600804
4       -0.656459
           ...   
22165    0.055929
22166    0.067060
22167    0.067060
22168    0.033667
22169    0.033667
Name: 268, Length: 22170, dtype: float64' has dtype incompatible with int64, pleas

Processing sensor_data15.csv (15/15)
Detected 10 channels (excluding time column).


1       -0.403871
2       -0.438546
3       -0.487090
4       -0.507895
           ...   
22056   -0.209694
22057   -0.209694
22058   -0.216629
22059   -0.209694
22060   -0.216629
Name: 268, Length: 22061, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.002722
2       -0.105473
3       -0.253178
4       -0.388039
           ...   
22056    0.022966
22057    0.022966
22058    0.016544
22059    0.022966
22060    0.022966
Name: 329, Length: 22061, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       -0.758191
2       -0.670335
3       -0.606440
4       -0.558519
           ...   
22056   -0.119241
22057   -0.111255
22058   -0.111255
22059   -0.111255
22060   -0.127228
Name: 208, Length: 22061, dtype: float64' has dtype incompatible with int64, pleas

[21 26 24  2 12 18 17 15  8  7 13 20 11  6  9 14 16 23  1 25  4 22 10  5
 19  3  7 22 13  9 18 15 24  4  5 21  1 23 16  2  6 11 12 17 19 26  8 14
 25 20  3 10  7  3 26 11 21 23 13 18 22  1 10  4 17  8  6 14 25 24 15 20
  2 16 12  5  9 19  1 21  7  4 19 16 23 17  2  9  3 11 22  8 24 25 14  5
  6 18 10 26 15 20 13 12 25  9 22 23  4  1 21  3 26 16 15 12 14  6  2 13
 18  5 10 11 24 17  8 19 20  7  7  4  9 12 18  5  8 22 11  2 21 24 19 16
 25 15  6 13  1 10 14  3 26 23 20 17  2 19  4 16 15  9 17  6 10 22 20  1
  7 21 13 14 26 25 24 11 12 18  5 23  3  8 21 17 23 18  8 16 19  7 14 15
 20  6  2 10  5 11  1  3 12 25  9 13 22 26  4 24  1  4 15 10 21 17  9 24
  8 22 26 20 14  3 18 23 13  5 12  6 11 19 16  7 25  2  7  5  9 18 15 25
  2 26  3 20 19 24 12 14  1 11 22 23 16  8 21 13 10  4 17  6 12 14 11 26
 20  1 17  3  4 10  6  7 23  5  9 25 16 19  2 24 13 15 21 22  8 18  3 24
 18 15  5 19  6 14  8 16 26 13  9 17  2 12 11  7 23 21  1 20  4 10 25 22
 16  1 13 20  4 24  9 14 25  5 26 19 12 18  7  8 15

In [None]:
import numpy as np
import os
from sklearn.model_selection import train_test_split

def load_data(data_folder, test_size=0.2, random_state=42):
    """
    加载 `feature_matrix.npy` 和 `labels.npy` 数据，并划分训练集和测试集。

    参数：
    - data_folder: 存放数据的文件夹路径
    - test_size: 测试集比例 (默认 20%)
    - random_state: 随机种子，保证可复现性

    返回：
    - X_train: 训练集特征，形状 (train_batches, num_windows, 15, num_channels)
    - X_test: 测试集特征，形状 (test_batches, num_windows, 15, num_channels)
    - y_train: 训练集标签，形状 (train_batches,)
    - y_test: 测试集标签，形状 (test_batches,)
    """
    # **加载数据**
    feature_path = os.path.join(data_folder, "feature_matrix.npy")
    label_path = os.path.join(data_folder, "labels.npy")

    if not os.path.exists(feature_path) or not os.path.exists(label_path):
        raise FileNotFoundError("特征文件或标签文件未找到，请检查路径！")

    X = np.load(feature_path)  # 形状 (num_batches, num_windows, 15, num_channels)
    y = np.load(label_path)  # 形状 (num_batches,)

    # **数据基本信息**
    print(f"Loaded features from {feature_path}, shape: {X.shape}")
    print(f"Loaded labels from {label_path}, shape: {y.shape}")

    # **划分训练集和测试集**
    # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, stratify=y)
    X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=random_state
        )

    # **打印数据划分信息**
    print(f"Training set: X_train: {X_train.shape}, y_train: {y_train.shape}")
    print(f"Testing set: X_test: {X_test.shape}, y_test: {y_test.shape}")

    return X_train, X_test, y_train, y_test

# **使用示例**
data_folder = r"E:\MSC\Spring\AML\GestureLink\data\WXR\processed_data"
X_train, X_test, y_train, y_test = load_data(data_folder)



In [12]:
import numpy as np
import pandas as pd
import os
import scipy.stats as stats
from scipy.signal import welch

def process_emg_folder(input_folder, output_folder, shuffle_order_file):
    """
    处理 EMG 数据文件，先按照 shuffle_order.xlsx 划分手势数据，
    然后进行窗口切分，并提取特征。
    """
    fs = 1000  # 采样率 1000Hz，每秒 1000 个数据点
    target_samples = 5000  # 每个手势的固定采样点数
    window_size = 200  # 窗口大小 400 采样点（0.4s）
    step_size = 100  # 窗口滑动步长 200 采样点（0.2s）

    # 创建输出文件夹
    os.makedirs(output_folder, exist_ok=True)

    # 读取 shuffle_order.xlsx
    shuffle_df = pd.read_excel(shuffle_order_file, engine="openpyxl")
    if shuffle_df.shape[1] < 5:
        raise ValueError("标签文件数据不足 26 组！请检查 `shuffle_order.xlsx`。")

    all_features = []
    all_labels = []
    csv_files = sorted(
        [f for f in os.listdir(input_folder) if f.startswith("sensor_data") and f.endswith(".csv")],
        key=lambda x: int(re.search(r"sensor_data(\d+)\.csv", x).group(1))  # 按数字排序
    )
    

    for file_idx, file_name in enumerate(csv_files):
        file_path = os.path.join(input_folder, file_name)
        print(f"Processing {file_name} ({file_idx + 1}/{len(csv_files)})")

        # 读取数据
        raw_data = pd.read_csv(file_path)
        num_channels = raw_data.shape[1] - 1  # 去掉时间列
        print(f"Detected {num_channels} channels (excluding time column).")

        # 归一化数据
        data = raw_data.iloc[:, 1:]
        normalized_data = (data - data.mean(axis=0)) / (data.std(axis=0) + 1e-10)
        raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)

        # 按 shuffle_order.xlsx 划分 26 组手势
        valid_segments = []
        for i in range(5):
            start_idx = shuffle_df.iloc[file_idx+1, i]  # 该手势起始索引
            print(start_idx,file_idx)
            if start_idx + target_samples <= len(raw_data):
                segment = raw_data.iloc[start_idx:start_idx + target_samples, 1:].values
            else:
                segment = raw_data.iloc[start_idx:, 1:].values
                pad_size = target_samples - len(segment)
                segment = np.pad(segment, ((0, pad_size), (0, 0)), mode='constant', constant_values=0)
            valid_segments.append(segment)

        valid_segments = np.array(valid_segments)  # 形状 (26, 5000, num_channels)

        # 进行窗口划分和特征提取
        features_batches = []
        for seg in valid_segments:
            windows = [seg[j:j + window_size, :] for j in range(0, target_samples - window_size + 1, step_size)]
            batch_features = np.array([extract_features(window) for window in windows])  # (num_windows, 15, num_channels)
            features_batches.append(batch_features)

        processed_segments = np.array(features_batches)  # (26, num_windows, 15, num_channels)

        if processed_segments.shape[0] != 5:
            print(f"Warning: {file_name} 提取到 {processed_segments.shape[0]} 个手势，不是 26 个。")

        # 存储数据
        all_features.append(processed_segments)
        all_labels.extend(shuffle_df.iloc[file_idx, :26].values.tolist())

    # 最终转换为 NumPy 数组
    all_features = np.vstack(all_features)  # (总手势数, num_windows, 15, num_channels)
    all_labels = np.array(all_labels)  # (总手势数,)

    # 保存数据
    np.save(os.path.join(output_folder, "feature_matrix.npy"), all_features)
    np.save(os.path.join(output_folder, "labels.npy"), all_labels)

    print(f"Feature extraction complete! Shape: {all_features.shape}")
    print(f"Labels saved: {all_labels.shape}")
    print(f"Feature matrix saved at: {output_folder}/feature_matrix.npy")
    print(f"Labels saved at: {output_folder}/labels.npy")

import numpy as np
import scipy.stats as stats
from scipy.signal import welch

def extract_features(segment):
    """
    计算 EMG 和 IMU 特征，确保返回 `float64` 类型
    """
    emg_signals = segment[:, :4].astype(np.float64)  # 4 个 EMG 通道
    imu_signals = segment[:, 4:].astype(np.float64)  # 6 个 IMU 通道

    def compute_emg_features(signal):
        return np.array([
            np.var(signal), np.mean(np.abs(signal)), np.sqrt(np.mean(signal**2)),
            np.std(signal), np.mean(np.abs(np.diff(signal))), np.max(signal),
            np.min(signal), np.sum(np.diff(signal) > 0), np.sum(np.diff(np.sign(signal)) != 0),
            stats.kurtosis(signal), stats.skew(signal), np.sum(np.abs(np.diff(signal))),
            np.sum(signal ** 2), np.log10(np.mean(signal**2) + 1e-10),
            np.mean(welch(signal, fs=1000, nperseg=200)[1])
        ], dtype=np.float64)

    def compute_imu_features(signal):
        return np.array([
            np.var(signal), np.mean(signal), np.sqrt(np.mean(signal**2)),
            np.std(signal), np.max(signal), np.min(signal),
            stats.kurtosis(signal), stats.skew(signal),
            np.mean(np.abs(np.diff(signal))), np.sum(np.abs(np.diff(signal))),
            np.sum(signal ** 2), np.log10(np.mean(signal**2) + 1e-10),
            np.mean(welch(signal, fs=1000, nperseg=200)[1]), np.median(signal), np.ptp(signal)
        ], dtype=np.float64)

    emg_features = np.array([compute_emg_features(emg_signals[:, i]) for i in range(4)]).T  # (15, 4)
    imu_features = np.array([compute_imu_features(imu_signals[:, i]) for i in range(6)]).T  # (15, 6)

    return np.concatenate((emg_features, imu_features), axis=1).astype(np.float64)  # (15, 10)

# 运行代码
root = r"data\WXR_5"
input_folder = root
output_folder = os.path.join(root, "processed_data")
shuffle_order_file = os.path.join(root, "shuffle_order.xlsx")
process_emg_folder(input_folder, output_folder, shuffle_order_file)

Processing sensor_data1.csv (1/15)
Detected 10 channels (excluding time column).
2 0
5 0
3 0
1 0
4 0


1      -0.027940
2      -0.038536
3      -0.043834
4      -0.038536
          ...   
4221    0.046231
4222    0.030337
4223   -0.006748
4224   -0.049132
4225   -0.065026
Name: 334, Length: 4226, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1      -0.065561
2      -0.073665
3      -0.065561
4      -0.057456
          ...   
4221   -0.357321
4222   -0.333008
4223   -0.333008
4224   -0.324903
4225   -0.300590
Name: 302, Length: 4226, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       0.170588
2       0.179778
3       0.198159
4       0.216539
          ...   
4221    0.179778
4222    0.115448
4223    0.041927
4224   -0.031593
4225   -0.077544
Name: 322, Length: 4226, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible

Processing sensor_data2.csv (2/15)
Detected 10 channels (excluding time column).
2 1
4 1
5 1
1 1
3 1


1       0.024180
2       0.012881
3       0.035479
4       0.058076
          ...   
4179   -0.292184
4180   -0.337379
4181   -0.337379
4182   -0.326080
4183   -0.337379
Name: 307, Length: 4184, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       0.030768
2      -0.013996
3      -0.051299
4      -0.081141
          ...   
4179   -0.282577
4180   -0.297498
4181   -0.312419
4182   -0.312419
4183   -0.334801
Name: 335, Length: 4184, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1      -0.074052
2      -0.074052
3      -0.038905
4       0.019674
          ...   
4179    0.101684
4180    0.089968
4181    0.078253
4182    0.089968
4183    0.078253
Name: 295, Length: 4184, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible

Processing sensor_data3.csv (3/15)
Detected 10 channels (excluding time column).
4 2
5 2
1 2
3 2
2 2


1      -0.621980
2      -0.701923
3      -0.747604
4      -0.770444
          ...   
4287   -0.130908
4288   -0.153748
4289   -0.153748
4290   -0.130908
4291   -0.130908
Name: 259, Length: 4292, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       0.610803
2       0.590138
3       0.576361
4       0.610803
          ...   
4287   -0.277817
4288   -0.284706
4289   -0.284706
4290   -0.270929
4291   -0.270929
Name: 414, Length: 4292, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       0.604763
2       0.546238
3       0.487712
4       0.419432
          ...   
4287   -0.000002
4288   -0.039019
4289   -0.058528
4290   -0.087791
4291   -0.136562
Name: 372, Length: 4292, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible

Processing sensor_data4.csv (4/15)
Detected 10 channels (excluding time column).
2 3
1 3
4 3
5 3
3 3


1       0.008951
2      -0.003461
3      -0.015873
4       0.008951
          ...   
4284   -0.326177
4285   -0.276529
4286   -0.214468
4287   -0.152407
4288   -0.090346
Name: 314, Length: 4289, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1      -0.243978
2      -0.286797
3      -0.343889
4      -0.365299
          ...   
4284   -0.001336
4285   -0.008473
4286   -0.008473
4287   -0.044155
4288   -0.072701
Name: 299, Length: 4289, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1      -0.247069
2      -0.198325
3      -0.173952
4      -0.133332
          ...   
4284   -0.003347
4285   -0.035843
4286   -0.068340
4287   -0.108960
4288   -0.149580
Name: 275, Length: 4289, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible

Processing sensor_data5.csv (5/15)
Detected 10 channels (excluding time column).
2 4
3 4
1 4
4 4
5 4


1      -0.521188
2      -0.533369
3      -0.509008
4      -0.521188
          ...   
4278   -0.216680
4279   -0.180140
4280   -0.155779
4281   -0.143599
4282   -0.107058
Name: 274, Length: 4283, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1      -0.141752
2      -0.171595
3      -0.153689
4      -0.165626
          ...   
4278    0.096993
4279    0.114899
4280    0.061182
4281    0.031338
4282    0.049244
Name: 299, Length: 4283, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1      -0.071214
2      -0.054458
3      -0.020946
4      -0.012568
          ...   
4278    0.163370
4279    0.163370
4280    0.146614
4281    0.138236
4282    0.138236
Name: 300, Length: 4283, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible

Processing sensor_data6.csv (6/15)
Detected 10 channels (excluding time column).
1 5
2 5
5 5
3 5
4 5


1      -0.523873
2      -0.487754
3      -0.475715
4      -0.463675
          ...   
4292   -0.210845
4293   -0.222884
4294   -0.246963
4295   -0.234924
4296   -0.234924
Name: 274, Length: 4297, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1      -0.755006
2      -0.706568
3      -0.682350
4      -0.706568
          ...   
4292   -0.113208
4293   -0.137426
4294   -0.191919
4295   -0.191919
4296   -0.191919
Name: 209, Length: 4297, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1      -0.172553
2      -0.164084
3      -0.164084
4      -0.172553
          ...   
4292    0.123862
4293    0.106924
4294    0.081517
4295    0.064579
4296    0.030703
Name: 295, Length: 4297, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible

Processing sensor_data7.csv (7/15)
Detected 10 channels (excluding time column).
4 6
5 6
3 6
2 6
1 6
Processing sensor_data8.csv (8/15)
Detected 10 channels (excluding time column).
4 7
3 7
2 7
1 7
5 7


1      -0.524891
2      -0.470247
3      -0.433818
4      -0.452033
          ...   
4240   -0.069528
4241   -0.051314
4242   -0.033099
4243   -0.051314
4244   -0.069528
Name: 281, Length: 4245, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1      -0.432289
2      -0.408666
3      -0.379136
4      -0.426384
          ...   
4240   -0.018877
4241   -0.001159
4242    0.034276
4243   -0.001159
4244   -0.007065
Name: 247, Length: 4245, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1      -0.306949
2      -0.280254
3      -0.253560
4      -0.253560
          ...   
4240   -0.048900
4241   -0.031103
4242   -0.031103
4243   -0.040001
4244   -0.040001
Name: 280, Length: 4245, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible

Processing sensor_data9.csv (9/15)
Detected 10 channels (excluding time column).
4 8
3 8
5 8
2 8
1 8


1      -0.148602
2      -0.148602
3      -0.134395
4      -0.134395
          ...   
4287    0.007673
4288   -0.006534
4289   -0.006534
4290    0.036086
4291    0.021880
Name: 304, Length: 4292, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1       0.058028
2       0.063186
3       0.109606
4       0.094132
          ...   
4287    0.068343
4288    0.027081
4289    0.047712
4290    0.078659
4291    0.032239
Name: 306, Length: 4292, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  raw_data.iloc[:, 1:] = normalized_data.astype(np.float64)
1      -0.064258
2      -0.064258
3      -0.054597
4      -0.054597
          ...   
4287   -0.054597
4288   -0.073920
4289   -0.064258
4290   -0.054597
4291   -0.064258
Name: 304.1, Length: 4292, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatib

KeyboardInterrupt: 