In [None]:
import numpy as np
import pandas as pd
import scipy.stats as stats
from scipy.signal import welch
import os

# 采样率和窗口参数
FS = 1000  # 采样率 1000Hz
CYCLE_DURATION = 10  # 每个周期 10 秒
SKIP_SECONDS = 4  # 跳过前 4 秒
USE_SECONDS = 6  # 选取后 6 秒
CYCLE_SAMPLES = FS * CYCLE_DURATION  # 10 秒总样本数 = 10000
SKIP_SAMPLES = FS * SKIP_SECONDS  # 4 秒跳过样本数 = 4000
USE_SAMPLES = FS * USE_SECONDS  # 6 秒有效数据样本数 = 6000

# 滑动窗口参数
WINDOW_SIZE = 200  # 200ms = 200 采样点
STEP_SIZE = 100  # 100ms = 100 采样点
NUM_WINDOWS = (USE_SAMPLES - WINDOW_SIZE) // STEP_SIZE + 1  # 每 6s 生成的窗口数
MAX_CYCLES = 10  # **最多支持 10 个手势周期**

# 15 种特征名称
FEATURE_NAMES = ["VAR", "MAV", "RMS", "SDV", "AAC", "MAX", "SSC", "ZCR",
                 "KUR", "SKW", "WWL", "SSI", "LGD", "ARC", "MFR"]

def extract_features(window):
    """计算单个窗口的 15 种特征，每个通道分别计算"""
    num_channels = window.shape[1]
    features = np.zeros((15, num_channels))

    for ch in range(num_channels):
        signal = window[:, ch]

        # 时域特征
        features[0, ch] = np.var(signal)  # 方差 VAR
        features[1, ch] = np.mean(np.abs(signal))  # 平均绝对值 MAV
        features[2, ch] = np.sqrt(np.mean(signal**2))  # 均方根 RMS
        features[3, ch] = np.std(signal)  # 标准差 SDV
        features[4, ch] = np.mean(np.abs(np.diff(signal)))  # 平均绝对变化率 AAC
        features[5, ch] = np.max(signal)  # 最大值 MAX
        features[6, ch] = np.sum(np.diff(signal) > 0)  # 符号变化数 SSC
        features[7, ch] = np.sum(np.diff(np.sign(signal)) != 0)  # 过零率 ZCR
        features[8, ch] = stats.kurtosis(signal)  # 峭度 KUR
        features[9, ch] = stats.skew(signal)  # 偏度 SKW
        features[10, ch] = np.sum(np.abs(np.diff(signal)))  # 波形长度 WWL
        features[11, ch] = np.sum(signal ** 2)  # 积分平方 SSI
        features[12, ch] = np.log10(np.mean(signal**2) + 1e-10)  # 对数能量 LGD

        # 频域特征
        freqs, psd = welch(signal, fs=FS, nperseg=200)
        features[13, ch] = np.sum(psd)  # 频谱面积 ARC
        features[14, ch] = np.sum(freqs * psd) / (np.sum(psd) + 1e-10)  # 频率质心 MFR

    return features

def process_emg_file(file_path, label):
    """
    处理单个 CSV 文件，保证输出最多 10 个手势周期（不够填充 0，多了截取）。
    输入：
        - file_path: CSV 文件路径
        - label: 当前文件的手势类别
    输出：
        - features_array: (MAX_CYCLES, num_windows, 15, num_channels) 的 NumPy 数组
        - labels: (MAX_CYCLES,) 的 NumPy 数组
    """
    raw_data = pd.read_csv(file_path)
    num_channels = raw_data.shape[1] - 1  # 去掉时间列
    print(f"Processing {file_path}, Detected {num_channels} channels")

    # 计算完整的 10s 周期数
    num_cycles = len(raw_data) // CYCLE_SAMPLES
    num_cycles = min(num_cycles, MAX_CYCLES)  # **最多 10 个周期**
    segments = []

    for i in range(num_cycles):
        start_idx = i * CYCLE_SAMPLES + SKIP_SAMPLES
        end_idx = start_idx + USE_SAMPLES

        if end_idx > len(raw_data):  # 处理不足 6000 采样点的情况
            segment = raw_data.iloc[start_idx:].values
            pad_size = USE_SAMPLES - len(segment)
            segment = np.pad(segment, ((0, pad_size), (0, 0)), mode='constant', constant_values=0)
        else:
            segment = raw_data.iloc[start_idx:end_idx].values

        # 滑动窗口
        windows = np.array([
            segment[j:j + WINDOW_SIZE, 1:]  # 取 200 采样点，去掉时间列
            for j in range(0, USE_SAMPLES - WINDOW_SIZE + 1, STEP_SIZE)
        ])
        segments.append(windows)

    # **填充不足 10 个周期的数据**
    while len(segments) < MAX_CYCLES:
        segments.append(np.zeros_like(segments[0]))  # 填充零

    segments_array = np.array(segments)[:MAX_CYCLES]  # **确保最多 10 个周期**
    print(f"Segments shape: {segments_array.shape}")

    # 计算特征
    features_batches = np.array([
        np.array([extract_features(window) for window in cycle])  # (num_windows, 15, num_channels)
        for cycle in segments_array
    ])

    # 生成正确的标签
    labels = np.full(features_batches.shape[0], label)

    return features_batches, labels

def process_emg_folder(input_folder, output_folder):
    """
    处理文件夹中的所有 EMG 数据文件，正确生成手势类别标签。
    """
    os.makedirs(output_folder, exist_ok=True)
    all_features, all_labels = [], []

    csv_files = sorted([f for f in os.listdir(input_folder) if f.endswith(".csv")])
    
    # **假设文件顺序从 `1.csv` 到 `10.csv`，每个代表一个手势**
    for idx, file_name in enumerate(csv_files):
        file_path = os.path.join(input_folder, file_name)
        label = idx + 1  # 1-10 对应 10 种手势类别
        features, labels = process_emg_file(file_path, label)
        all_features.append(features)
        all_labels.append(labels)

    # 转换为 NumPy 数组并保存
    all_features = np.vstack(all_features)  # (总样本数, num_windows, 15, num_channels)
    all_labels = np.hstack(all_labels)  # (总样本数,)

    np.save(os.path.join(output_folder, "feature_matrix.npy"), all_features)
    np.save(os.path.join(output_folder, "labels.npy"), all_labels)

    print(f"Feature extraction complete! Shape: {all_features.shape}")
    print(f"Labels saved: {all_labels.shape}")

# **运行处理**
input_folder = "data/data_1_sensor_3_classes/"  # 输入数据文件夹
output_folder = "processed_data"  # 处理后存储的文件夹
process_emg_folder(input_folder, output_folder)


In [None]:
import numpy as np
import os
from sklearn.model_selection import train_test_split

def load_data(data_folder, test_size=0.2, random_state=42):
    """
    加载 `feature_matrix.npy` 和 `labels.npy` 数据，并划分训练集和测试集。

    参数：
    - data_folder: 存放数据的文件夹路径
    - test_size: 测试集比例 (默认 20%)
    - random_state: 随机种子，保证可复现性

    返回：
    - X_train: 训练集特征，形状 (train_samples, num_windows, 15, num_channels)
    - X_test: 测试集特征，形状 (test_samples, num_windows, 15, num_channels)
    - y_train: 训练集标签，形状 (train_samples,)
    - y_test: 测试集标签，形状 (test_samples,)
    """
    # **加载数据**
    feature_path = os.path.join(data_folder, "feature_matrix.npy")
    label_path = os.path.join(data_folder, "labels.npy")

    if not os.path.exists(feature_path) or not os.path.exists(label_path):
        raise FileNotFoundError("特征文件或标签文件未找到，请检查路径！")

    X = np.load(feature_path)  # 形状 (num_samples, num_windows, 15, num_channels)
    y = np.load(label_path)  # 形状 (num_samples,)

    # **数据基本信息**
    print(f"Loaded features from {feature_path}, shape: {X.shape}")
    print(f"Loaded labels from {label_path}, shape: {y.shape}")

    # **划分训练集和测试集**
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state, stratify=y
    )

    # **打印数据划分信息**
    print(f"Training set: X_train: {X_train.shape}, y_train: {y_train.shape}")
    print(f"Testing set: X_test: {X_test.shape}, y_test: {y_test.shape}")

    return X_train, X_test, y_train, y_test

# **使用示例**
data_folder = "windowed_data"
X_train, X_test, y_train, y_test = load_data(data_folder)


---------------------------------------------

In [None]:
import pandas as pd
import numpy as np
import os

# 读取数据
file_path =r'data/emg_data_20250219_160546_3_10.csv'  # 替换为你的文件路径
raw_data = pd.read_csv(file_path)

# 设定采样率
fs = 1000  # 采样率 Hz
cycle_duration = 10  # 每个周期 10 秒
skip_seconds = 4  # 跳过前 4 秒
use_seconds = 6  # 需要保留的秒数

cycle_samples = fs * cycle_duration  # 10 秒的数据点数 = 10000
skip_samples = fs * skip_seconds  # 需要跳过 4 秒 = 4000
use_samples = fs * use_seconds  # 需要保留 6 秒 = 6000

# 滑动窗口参数
window_size = 200  # 200ms = 200 采样点
step_size = 100  # 100ms = 100 采样点
num_windows = (use_samples - window_size) // step_size + 1  # 计算每个片段可以滑动的窗口数量

# 创建保存文件夹
output_folder = 'segmented_data'
os.makedirs(output_folder, exist_ok=True)

segments = []  # 存储所有分割后的数据
num_cycles = len(raw_data) // cycle_samples  # 计算完整的10秒周期数

for i in range(num_cycles):
    start_idx = i * cycle_samples + skip_samples  # 每个周期内从第 4 秒开始
    end_idx = start_idx + use_samples  # 取后 6 秒的数据

    if end_idx > len(raw_data):  # 如果索引超出范围，则截断并填充
        segment = raw_data.iloc[start_idx:].values  # 取剩余的数据
        pad_size = use_samples - len(segment)  # 计算需要填充的行数
        segment = np.pad(segment, ((0, pad_size), (0, 0)), mode='constant', constant_values=0)  # 填充 0
    else:
        segment = raw_data.iloc[start_idx:end_idx].values  # 正常提取数据

    # 滑动窗口切片
    windows = [
        segment[j : j + window_size]  # 取 200 采样点窗口
        for j in range(0, use_samples - window_size + 1, step_size)  # 滑动步长为 100
    ]

    segments.append(np.array(windows))  # 存入列表

    print(f"Segment {i+1} processed: Rows {start_idx} to {end_idx} (Padded: {pad_size if end_idx > len(raw_data) else 0} rows)")

# 转换为 NumPy 数组，形状为 (num_segments, num_windows, 200, 数据列数)
segments_array = np.array(segments)

# 保存 NumPy 文件
np.save(f"{output_folder}/windowed_segments.npy", segments_array)

# 打印最终形状
print(f"Final shape: {segments_array.shape}")


In [None]:
import numpy as np
import pandas as pd
import scipy.stats as stats
from scipy.signal import welch
import os

def process_emg_folder(input_folder, output_folder):
    """
    遍历 input_folder 下的所有 CSV 文件，分别处理 EMG 和 IMU 数据，并保存特征矩阵和标签到 output_folder。
    """
    # 设定采样率
    fs = 1000  # 采样率 Hz
    cycle_duration = 10  # 每个周期 10 秒
    skip_seconds = 4  # 跳过前 4 秒
    use_seconds = 6  # 需要保留的秒数

    cycle_samples = fs * cycle_duration  # 10 秒数据点数 = 10000
    skip_samples = fs * skip_seconds  # 跳过 4 秒 = 4000
    use_samples = fs * use_seconds  # 取后 6 秒 = 6000

    # 滑动窗口参数
    window_size = 200  # 200ms = 200 采样点
    step_size = 100  # 100ms = 100 采样点
    num_windows = (use_samples - window_size) // step_size + 1  # 计算窗口数

    # 创建存储文件夹
    os.makedirs(output_folder, exist_ok=True)

    # 存储所有数据和标签
    all_features = []
    all_labels = []
    
    # 遍历文件夹中的所有 CSV 文件
    csv_files = sorted([f for f in os.listdir(input_folder) if f.endswith(".csv")])
    
    for file_idx, file_name in enumerate(csv_files):
        file_path = os.path.join(input_folder, file_name)
        print(f"Processing {file_name} ({file_idx+1}/{len(csv_files)})")

        # 读取数据
        raw_data = pd.read_csv(file_path)

        # **获取通道数（忽略时间列）**
        num_channels = raw_data.shape[1] - 1
        print(f"Detected {num_channels} channels (excluding time column).")

        segments = []  # 存储所有分割后的数据
        labels = []  # 存储当前文件的标签
        num_cycles = 3  # 每个文件固定分 3 段

        for i in range(num_cycles):
            start_idx = i * cycle_samples + skip_samples  # 跳过前 4 秒
            end_idx = start_idx + use_samples  # 取后 6 秒

            if end_idx > len(raw_data):  # 处理不足 6000 采样点的情况
                segment = raw_data.iloc[start_idx:].values  # 取剩余数据
                pad_size = use_samples - len(segment)  # 计算填充数
                segment = np.pad(segment, ((0, pad_size), (0, 0)), mode='constant', constant_values=0)  # 填充 0
            else:
                segment = raw_data.iloc[start_idx:end_idx].values  # 正常提取数据

            # **滑动窗口**
            windows = [
                segment[j:j + window_size, 1:]  # 取 200 采样点，忽略时间列
                for j in range(0, use_samples - window_size + 1, step_size)  # 滑动步长 100
            ]
            segments.append(np.array(windows))

            # **生成标签**
            labels.append(i + 1)  # 第 1 段 = 1, 第 2 段 = 2, 第 3 段 = 3

        # **转换为 NumPy 数组**
        segments_array = np.array(segments)  # 形状 (num_segments=3, num_windows, 200, num_channels)

        # **计算特征**
        features_batches = []
        for batch_idx in range(segments_array.shape[0]):  # 3 个 batch
            batch_features = []  # 存储当前 batch 的所有窗口特征
            for window_idx in range(segments_array.shape[1]):  # 计算每个窗口
                window = segments_array[batch_idx, window_idx]  # (200, num_channels)
                features = extract_features(window)  # 计算 (15, 7)
                batch_features.append(features)  # 存入 batch

            features_batches.append(np.array(batch_features))

        features_array = np.array(features_batches)  # (3, num_windows, 15, 7)

        # **存储数据**
        all_features.append(features_array)
        all_labels.extend(labels)  # 直接扩展标签列表

    # **最终转换为 NumPy 数组**
    all_features = np.vstack(all_features)  # 合并所有 batch，形状 (总 batch, num_windows, 15, 7)
    all_labels = np.array(all_labels)  # (总 batch,)

    # **保存**
    np.save(os.path.join(output_folder, "feature_matrix.npy"), all_features)
    np.save(os.path.join(output_folder, "labels.npy"), all_labels)

    print(f"Feature extraction complete! Shape: {all_features.shape}")
    print(f"Labels saved: {all_labels.shape}")
    print(f"Feature matrix saved at: {output_folder}/feature_matrix.npy")
    print(f"Labels saved at: {output_folder}/labels.npy")

def extract_features(segment):
    """
    分别计算 EMG 和 IMU 的 15 个特征，并拼接成 (15, 7)
    """
    # **分离 EMG (第一列) 和 IMU (后六列)**
    emg_signal = segment[:, 0]  # EMG 数据 (200,)
    imu_signals = segment[:, 1:]  # IMU 数据 (200, 6)

    # **定义 EMG 特征**
    def compute_emg_features(signal):
        return np.array([
            np.var(signal), np.mean(np.abs(signal)), np.sqrt(np.mean(signal**2)),
            np.std(signal), np.mean(np.abs(np.diff(signal))), np.max(signal),
            np.min(signal), np.sum(np.diff(signal) > 0), np.sum(np.diff(np.sign(signal)) != 0),
            stats.kurtosis(signal), stats.skew(signal), np.sum(np.abs(np.diff(signal))),
            np.sum(signal ** 2), np.log10(np.mean(signal**2) + 1e-10),
            np.mean(welch(signal, fs=1000, nperseg=200)[1])  # 频谱均值
        ])

    # **定义 IMU 特征**
    def compute_imu_features(signal):
        return np.array([
            np.var(signal), np.mean(signal), np.sqrt(np.mean(signal**2)),
            np.std(signal), np.max(signal), np.min(signal),
            stats.kurtosis(signal), stats.skew(signal),
            np.mean(np.abs(np.diff(signal))), np.sum(np.abs(np.diff(signal))),
            np.sum(signal ** 2), np.log10(np.mean(signal**2) + 1e-10),
            np.mean(welch(signal, fs=1000, nperseg=200)[1]), np.median(signal), np.ptp(signal)  # 频谱均值、中值、峰峰值
        ])

    emg_features = compute_emg_features(emg_signal).reshape(-1, 1)  # (15, 1)
    imu_features = np.array([compute_imu_features(imu_signals[:, i]) for i in range(6)]).T  # (15, 6)

    return np.concatenate((emg_features, imu_features), axis=1)  # (15, 7)

# **运行函数**
input_folder = "data/data_1_sensor_3_classes/"
output_folder = "windowed_data"
process_emg_folder(input_folder, output_folder)
