In [None]:
import numpy as np
import os
from sklearn.model_selection import train_test_split

def load_and_split_data(data_folder, selected_labels=[1, 2, 3], test_size=0.2, random_state=12):
    """
    加载 `feature_matrix.npy` 和 `labels.npy` 数据，筛选标签为指定类别的数据，并划分训练集和测试集。

    参数：
    - data_folder: 存放数据的文件夹路径
    - selected_labels: 需要提取的标签列表
    - test_size: 测试集比例（默认 20%）
    - random_state: 随机种子，保证可复现

    返回：
    - X_train, X_test: 训练和测试集的特征数据
    - y_train, y_test: 训练和测试集的标签数据
    """
    # **加载数据**
    feature_path = os.path.join(data_folder, "feature_matrix.npy")
    label_path = os.path.join(data_folder, "labels.npy")

    if not os.path.exists(feature_path) or not os.path.exists(label_path):
        raise FileNotFoundError("特征文件或标签文件未找到，请检查路径！")

    X = np.load(feature_path)  # 形状 (num_batches, num_windows, 15, num_channels)
    y = np.load(label_path)  # 形状 (num_batches,)

    # **数据基本信息**
    print(f"Loaded features from {feature_path}, shape: {X.shape}")
    print(f"Loaded labels from {label_path}, shape: {y.shape}")

    # **筛选出指定标签的数据**
    mask = np.isin(y, selected_labels)  # 创建布尔掩码
    X_filtered = X[mask]  # 选取符合标签的 X
    y_filtered = y[mask]  # 选取符合标签的 y

    # **划分训练集和测试集**
    X_train, X_test, y_train, y_test = train_test_split(
        X_filtered, y_filtered, test_size=test_size, random_state=random_state, stratify=y_filtered
    )

    # **打印划分后数据的信息**
    print(f"Train set: X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
    print(f"Test set: X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

    return X_train, X_test, y_train, y_test

# **使用示例**
data_folder = r"E:\MSC\Spring\AML\GestureLink\data\WXR\processed_data"
X_train, X_test, y_train, y_test = load_and_split_data(data_folder, selected_labels=[1,2,5,6,8,9,11,13,16,26])
# X_train, X_test, y_train, y_test = load_and_split_data(data_folder, selected_labels=[1,2,6,8,9,11,13,16,26])

import numpy as np
import os
from sklearn.model_selection import train_test_split

def load_and_split_data(data_folder, selected_labels=[1, 2, 3], test_size=0.2, random_state=12):
    """
    加载 `feature_matrix.npy` 和 `labels.npy` 数据，筛选标签为指定类别的数据，并划分训练集和测试集。

    参数：
    - data_folder: 存放数据的文件夹路径
    - selected_labels: 需要提取的标签列表
    - test_size: 测试集比例（默认 20%）
    - random_state: 随机种子，保证可复现

    返回：
    - X_train, X_test: 训练和测试集的特征数据
    - y_train, y_test: 训练和测试集的标签数据
    """
    # **加载数据**
    feature_path = os.path.join(data_folder, "feature_matrix.npy")
    label_path = os.path.join(data_folder, "labels.npy")

    if not os.path.exists(feature_path) or not os.path.exists(label_path):
        raise FileNotFoundError("特征文件或标签文件未找到，请检查路径！")

    X = np.load(feature_path)  # 形状 (num_batches, num_windows, 15, num_channels)
    y = np.load(label_path)  # 形状 (num_batches,)

    # **数据基本信息**
    print(f"Loaded features from {feature_path}, shape: {X.shape}")
    print(f"Loaded labels from {label_path}, shape: {y.shape}")

    # **筛选出指定标签的数据**
    mask = np.isin(y, selected_labels)  # 创建布尔掩码
    X_filtered = X[mask]  # 选取符合标签的 X
    y_filtered = y[mask]  # 选取符合标签的 y

    # **划分训练集和测试集**
    X_train, X_test, y_train, y_test = train_test_split(
        X_filtered, y_filtered, test_size=test_size, random_state=random_state, stratify=y_filtered
    )

    # **打印划分后数据的信息**
    print(f"Train set: X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
    print(f"Test set: X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

    return X_train, X_test, y_train, y_test

# **使用示例**
data_folder = r"E:\MSC\Spring\AML\GestureLink\data\FZH\processed_data"
X_train_f, X_test_f, y_train_f, y_test_f = load_and_split_data(data_folder, selected_labels=[1,2,5,6,8,9,11,13,16,26])
# X_train_f, X_test_f, y_train_f, y_test_f = load_and_split_data(data_folder, selected_labels=[1,2,6,8,9,11,13,16,26])


import numpy as np
import os
from sklearn.model_selection import train_test_split

def load_and_split_data(data_folder, selected_labels=[1, 2, 3], test_size=0.2, random_state=12):
    """
    加载 `feature_matrix.npy` 和 `labels.npy` 数据，筛选标签为指定类别的数据，并划分训练集和测试集。

    参数：
    - data_folder: 存放数据的文件夹路径
    - selected_labels: 需要提取的标签列表
    - test_size: 测试集比例（默认 20%）
    - random_state: 随机种子，保证可复现

    返回：
    - X_train, X_test: 训练和测试集的特征数据
    - y_train, y_test: 训练和测试集的标签数据
    """
    # **加载数据**
    feature_path = os.path.join(data_folder, "feature_matrix.npy")
    label_path = os.path.join(data_folder, "labels.npy")

    if not os.path.exists(feature_path) or not os.path.exists(label_path):
        raise FileNotFoundError("特征文件或标签文件未找到，请检查路径！")

    X = np.load(feature_path)  # 形状 (num_batches, num_windows, 15, num_channels)
    y = np.load(label_path)  # 形状 (num_batches,)

    # **数据基本信息**
    print(f"Loaded features from {feature_path}, shape: {X.shape}")
    print(f"Loaded labels from {label_path}, shape: {y.shape}")

    # **筛选出指定标签的数据**
    mask = np.isin(y, selected_labels)  # 创建布尔掩码
    X_filtered = X[mask]  # 选取符合标签的 X
    y_filtered = y[mask]  # 选取符合标签的 y

    # **划分训练集和测试集**
    X_train, X_test, y_train, y_test = train_test_split(
        X_filtered, y_filtered, test_size=test_size, random_state=random_state, stratify=y_filtered
    )

    # **打印划分后数据的信息**
    print(f"Train set: X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
    print(f"Test set: X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

    return X_train, X_test, y_train, y_test

# **使用示例**
data_folder = r"E:\MSC\Spring\AML\GestureLink\data\GZA\processed_data"
X_train_g, X_test_g, y_train_g, y_test_g = load_and_split_data(data_folder, selected_labels=[1,2,5,6,8,9,11,13,16,26])
# X_train, X_test, y_train, y_test = load_and_split_data(data_folder, selected_labels=[1,2,6,8,9,11,13,16,26])

X_train = np.concatenate([X_train,X_train_f,X_train_g], axis=0)
X_test = np.concatenate([X_test,X_test_f,X_train_g], axis=0)
y_train = np.concatenate([y_train,y_train_f,y_train_g], axis=0)
y_test = np.concatenate([y_test,y_test_f,y_train_g], axis=0)

print(f"Training set: X_train: {X_train.shape}, y_train: {y_train.shape}")
print(f"Testing set: X_test: {X_test.shape}, y_test: {y_test.shape}")