In [None]:
import numpy as np
import pandas as pd
import scipy.stats as stats
from scipy.signal import welch
import os

def process_emg_folder(input_folder, output_folder):
    """
    遍历 input_folder 下的所有 CSV 文件，分别处理 EMG 和 IMU 数据，并保存特征矩阵和标签到 output_folder。
    """
    # 设定采样率
    fs = 1000  # 采样率 Hz
    cycle_duration = 10  # 每个周期 10 秒
    skip_seconds = 4  # 跳过前 4 秒
    use_seconds = 6  # 需要保留的秒数

    cycle_samples = fs * cycle_duration  # 10 秒数据点数 = 10000
    skip_samples = fs * skip_seconds  # 跳过 4 秒 = 4000
    use_samples = fs * use_seconds  # 取后 6 秒 = 6000

    # 滑动窗口参数
    window_size = 200  # 200ms = 200 采样点
    step_size = 100  # 100ms = 100 采样点
    num_windows = (use_samples - window_size) // step_size + 1  # 计算窗口数

    # 创建存储文件夹
    os.makedirs(output_folder, exist_ok=True)

    # 存储所有数据和标签
    all_features = []
    all_labels = []
    
    # 遍历文件夹中的所有 CSV 文件
    csv_files = sorted([f for f in os.listdir(input_folder) if f.endswith(".csv")])
    
    for file_idx, file_name in enumerate(csv_files):
        file_path = os.path.join(input_folder, file_name)
        print(f"Processing {file_name} ({file_idx+1}/{len(csv_files)})")

        # 读取数据
        raw_data = pd.read_csv(file_path)

        # **获取通道数（忽略时间列）**
        num_channels = raw_data.shape[1] - 1
        print(f"Detected {num_channels} channels (excluding time column).")

        segments = []  # 存储所有分割后的数据
        labels = []  # 存储当前文件的标签
        num_cycles = 3  # 每个文件固定分 3 段

        for i in range(num_cycles):
            start_idx = i * cycle_samples + skip_samples  # 跳过前 4 秒
            end_idx = start_idx + use_samples  # 取后 6 秒

            if end_idx > len(raw_data):  # 处理不足 6000 采样点的情况
                segment = raw_data.iloc[start_idx:].values  # 取剩余数据
                pad_size = use_samples - len(segment)  # 计算填充数
                segment = np.pad(segment, ((0, pad_size), (0, 0)), mode='constant', constant_values=0)  # 填充 0
            else:
                segment = raw_data.iloc[start_idx:end_idx].values  # 正常提取数据

            # **滑动窗口**
            windows = [
                segment[j:j + window_size, 1:]  # 取 200 采样点，忽略时间列
                for j in range(0, use_samples - window_size + 1, step_size)  # 滑动步长 100
            ]
            segments.append(np.array(windows))

            # **生成标签**
            labels.append(i + 1)  # 第 1 段 = 1, 第 2 段 = 2, 第 3 段 = 3

        # **转换为 NumPy 数组**
        segments_array = np.array(segments)  # 形状 (num_segments=3, num_windows, 200, num_channels)

        # **计算特征**
        features_batches = []
        for batch_idx in range(segments_array.shape[0]):  # 3 个 batch
            batch_features = []  # 存储当前 batch 的所有窗口特征
            for window_idx in range(segments_array.shape[1]):  # 计算每个窗口
                window = segments_array[batch_idx, window_idx]  # (200, num_channels)
                features = extract_features(window)  # 计算 (15, 7)
                batch_features.append(features)  # 存入 batch

            features_batches.append(np.array(batch_features))

        features_array = np.array(features_batches)  # (3, num_windows, 15, 7)

        # **存储数据**
        all_features.append(features_array)
        all_labels.extend(labels)  # 直接扩展标签列表

    # **最终转换为 NumPy 数组**
    all_features = np.vstack(all_features)  # 合并所有 batch，形状 (总 batch, num_windows, 15, 7)
    all_labels = np.array(all_labels)  # (总 batch,)

    # **保存**
    np.save(os.path.join(output_folder, "feature_matrix.npy"), all_features)
    np.save(os.path.join(output_folder, "labels.npy"), all_labels)

    print(f"Feature extraction complete! Shape: {all_features.shape}")
    print(f"Labels saved: {all_labels.shape}")
    print(f"Feature matrix saved at: {output_folder}/feature_matrix.npy")
    print(f"Labels saved at: {output_folder}/labels.npy")

def extract_features(segment):
    """
    分别计算 EMG 和 IMU 的 15 个特征，并拼接成 (15, 7)
    """
    # **分离 EMG (第一列) 和 IMU (后六列)**
    emg_signal = segment[:, 0]  # EMG 数据 (200,)
    imu_signals = segment[:, 1:]  # IMU 数据 (200, 6)

    # **定义 EMG 特征**
    def compute_emg_features(signal):
        return np.array([
            np.var(signal), np.mean(np.abs(signal)), np.sqrt(np.mean(signal**2)),
            np.std(signal), np.mean(np.abs(np.diff(signal))), np.max(signal),
            np.min(signal), np.sum(np.diff(signal) > 0), np.sum(np.diff(np.sign(signal)) != 0),
            stats.kurtosis(signal), stats.skew(signal), np.sum(np.abs(np.diff(signal))),
            np.sum(signal ** 2), np.log10(np.mean(signal**2) + 1e-10),
            np.mean(welch(signal, fs=1000, nperseg=200)[1])  # 频谱均值
        ])

    # **定义 IMU 特征**
    def compute_imu_features(signal):
        return np.array([
            np.var(signal), np.mean(signal), np.sqrt(np.mean(signal**2)),
            np.std(signal), np.max(signal), np.min(signal),
            stats.kurtosis(signal), stats.skew(signal),
            np.mean(np.abs(np.diff(signal))), np.sum(np.abs(np.diff(signal))),
            np.sum(signal ** 2), np.log10(np.mean(signal**2) + 1e-10),
            np.mean(welch(signal, fs=1000, nperseg=200)[1]), np.median(signal), np.ptp(signal)  # 频谱均值、中值、峰峰值
        ])

    emg_features = compute_emg_features(emg_signal).reshape(-1, 1)  # (15, 1)
    imu_features = np.array([compute_imu_features(imu_signals[:, i]) for i in range(6)]).T  # (15, 6)

    return np.concatenate((emg_features, imu_features), axis=1)  # (15, 7)

# **运行函数**
input_folder = r"E:\MSC\Spring\AML\GestureLink\Data_grove"
output_folder = "windowed_data"
process_emg_folder(input_folder, output_folder)
