In [30]:
from config.config_loader import load_config
import os
import re
import numpy as np
import pandas as pd
from scipy.stats import skew, kurtosis, iqr

CONFIG_DATA = load_config(os.path.join("config", "feature_engineering_config.json"))

def read_txt(path):
    with open(path,'r', encoding = 'GB2312') as f:
        contents=f.readlines()
    datas=[re.findall('.*= (.*)  .*= (.*)  .*= (.*)  .*= (.*)',line)[0] 
           for line in contents]
    datas = pd.DataFrame(datas, columns=['timestamp', 'steering','throttle', 'brake'],
                         dtype=float)
    
    # 计算方向盘数据的导数，即每个时刻下方向盘的变化率
    datas['steering_derivative'] = np.gradient(datas['steering'], datas['timestamp'])
    
    # 计算方向盘数据的二阶导数，即加速度的变化率
    datas['steering_second_derivative'] = np.gradient(datas['steering_derivative'], datas['timestamp'])
    
    # 计算油门数据的导数，即每个时刻下油门的变化率
    datas['throttle_derivative'] = np.gradient(datas['throttle'], datas['timestamp'])
    
    # 计算油门数据的二阶导数，即加速度的变化率
    datas['throttle_second_derivative'] = np.gradient(datas['throttle_derivative'], datas['timestamp'])
    
    # 计算刹车数据的导数，即每个时刻下刹车踏板的变化率
    datas['brake_derivative'] = np.gradient(datas['brake'], datas['timestamp'])
    
    # 计算刹车数据的二阶导数，即加速度的变化率
    datas['brake_second_derivative'] = np.gradient(datas['brake_derivative'], datas['timestamp'])
    
    return datas

def read_speed_data(path,start_timestamp):
    with open(path,'r', encoding = 'GB2312') as f:
        contents=f.readlines()
    # speed_datas=[re.findall('(.*) (.*)',line)[0] 
    #        for line in contents]
    # speed_datas = pd.DataFrame(speed_datas, columns=['timestamp', 'speed'],
    #                      dtype=float)
    # speed_datas['speed'] = speed_datas['speed'].astype(int)
    
    # 使用正则表达式从每一行中提取时间戳和速度数据，并处理特殊值
    speed_datas = []
    for line in contents:
        match = re.findall('(.*) (.*)', line)
        if match:
            timestamp, speed = match[0]
            # 将 'null' 转换为 np.nan
            if speed == 'null':
                speed = np.nan
            else:
                speed = int(speed)
            timestamp = int(timestamp)/25 + start_timestamp
            speed_datas.append((timestamp, speed))
    
    # 转换第二列的数据类型为整数
    speed_datas = pd.DataFrame(speed_datas, columns=['timestamp', 'speed'])
    #speed_datas['speed'] = pd.to_numeric(speed_datas['speed'], errors='coerce').astype(float)
    
    # 计算速度数据的导数，即加速度
    speed_datas['speed_derivative'] = np.gradient(speed_datas['speed'],
                                                  speed_datas['timestamp'])
    
    # 计算刹车数据的二阶导数，即加速度的变化率
    speed_datas['speed_second_derivative'] = np.gradient(speed_datas['speed_derivative'],
                                                         speed_datas['timestamp'])

    return speed_datas

def csv_reader(name,start_time):
    # 需要读取的数据名称
    columns_to_read = ['Recording timestamp',
                       'Event', 'Recording date', 'Recording start time',
                       'Validity left', 'Validity right',
                       'Gaze point X', 'Gaze point Y',
                       'Gaze point left X', 'Gaze point left Y',
                       'Gaze point right X',
                       'Gaze point right Y',
                       'Eye position left X (DACSmm)',
                       'Eye position left Y (DACSmm)', 'Eye position left Z (DACSmm)',
                       'Eye position right X (DACSmm)', 'Eye position right Y (DACSmm)',
                       'Eye position right Z (DACSmm)',
                       'Eye movement type', 'Gaze event duration',
                       'Pupil diameter left','Pupil diameter right']

    excel_file_path = str(name)
    # df = pd.read_csv(excel_file_path, usecols=[columns_to_read[1]],low_memory=False)
    # for idx, row in df.iterrows():
    #     if row.iloc[0] == 'ScreenRecordingStart':
    #         index = idx
    #         break

    #df = pd.read_csv(excel_file_path, usecols=columns_to_read, skiprows=list(range(1, index + 2)),low_memory=False)  # 指定Excel文件路径
    df = pd.read_csv(excel_file_path, usecols=columns_to_read, low_memory=False)  # 指定Excel文件路径

    #print(df)
    #df_new = df.iloc[:, [0,6,7]].copy()
    df_new = df[['Recording timestamp', 'Gaze point X', 'Gaze point Y',
                 'Eye position left X (DACSmm)','Eye position left Y (DACSmm)',
                 'Eye position left Z (DACSmm)','Eye position right X (DACSmm)',
                 'Eye position right Y (DACSmm)','Eye position right Z (DACSmm)']].copy()
    
    df_new.columns = ['timestamp','Gaze point X','Gaze point Y',
                 'Eye position left X (DACSmm)','Eye position left Y (DACSmm)',
                 'Eye position left Z (DACSmm)','Eye position right X (DACSmm)',
                 'Eye position right Y (DACSmm)','Eye position right Z (DACSmm)']
    df_new['timestamp'] = df_new['timestamp']/1e6 +start_time

    # # 假设 df 是你的 DataFrame
    # df_new['timestamp1'] = pd.to_datetime(df_new['timestamp'], unit = 's')
    # df_new.set_index('timestamp1', inplace=True)

    # df_new.interpolate(method = "time",limit = 120, inplace = True)
    # df_new.dropna(inplace = True)
    # df_new = df_new.reset_index(drop= True)
    # 计算x方向的导数，即水平速度
    df_new['Gaze point X_derivative'] = np.gradient(df_new['Gaze point X'],
                                                  df_new['timestamp'])
    
    # 计算x方向的二阶导数，即水平加速度
    df_new['Gaze point X_second_derivative'] = np.gradient(df_new['Gaze point X_derivative'],
                                                           df_new['timestamp'])
    
    # 计算y方向的导数，即垂直速度
    df_new['Gaze point Y_derivative'] = np.gradient(df_new['Gaze point Y'],
                                                  df_new['timestamp'])
    
    # 计算y方向的二阶导数，即垂直加速度
    df_new['Gaze point Y_second_derivative'] = np.gradient(df_new['Gaze point Y_derivative'],
                                                           df_new['timestamp'])

    df_new['eyemovement_speed'] = (((df_new['Gaze point X'].diff()) ** 2 +
                        (df_new['Gaze point Y'].diff()) ** 2) ** 0.5) / (df_new['timestamp'].diff())
        # 计算加速度
    df_new['eyemovement_acceleration'] = np.gradient(df_new['eyemovement_speed'],df_new['timestamp'])

    # 计算头部位置（左右眼中心坐标）
    df['Head X'] = np.where(
        df[['Eye position left X (DACSmm)', 'Eye position right X (DACSmm)']].isnull().all(axis=1),
        np.nan,
        df[['Eye position left X (DACSmm)', 'Eye position right X (DACSmm)']].mean(axis=1, skipna=True)
    )

    df['Head Y'] = np.where(
        df[['Eye position left Y (DACSmm)', 'Eye position right Y (DACSmm)']].isnull().all(axis=1),
        np.nan,
        df[['Eye position left Y (DACSmm)', 'Eye position right Y (DACSmm)']].mean(axis=1, skipna=True)
    )

    df['Head Z'] = np.where(
        df[['Eye position left Z (DACSmm)', 'Eye position right Z (DACSmm)']].isnull().all(axis=1),
        np.nan,
        df[['Eye position left Z (DACSmm)', 'Eye position right Z (DACSmm)']].mean(axis=1, skipna=True)
    )

    # 使用np.gradient计算速度和加速度
    for axis in ['X', 'Y', 'Z']:
        # 计算速度 (一阶导数)
        df_new[f'Head_Velocity_{axis}'] = np.gradient(
            df[f'Head {axis}'].values, 
            df_new['timestamp']
        )
        
        # 计算加速度 (二阶导数，即速度的梯度)
        df_new[f'Head_Acceleration_{axis}'] = np.gradient(
            df_new[f'Head_Velocity_{axis}'].values,
            df_new['timestamp']
        )

    # 计算合速度
    df_new['Head_Velocity_combined'] = np.sqrt(
        df_new['Head_Velocity_X']**2 + 
        df_new['Head_Velocity_Y']**2 + 
        df_new['Head_Velocity_Z']**2
    )

    # 计算合加速度
    df_new['Head_Acceleration_combined'] = np.sqrt(
        df_new['Head_Acceleration_X']**2 + 
        df_new['Head_Acceleration_Y']**2 + 
        df_new['Head_Acceleration_Z']**2
    )
    
    df_new['Pupil_diameter_left'] = df['Pupil diameter left']
    df_new['Pupil_diameter_right'] = df['Pupil diameter right']
    df_new['Eye movement type'] = df['Eye movement type']
    #print(excel_file_path)
    # 查找绝对值大于100000的行，并显示行号和对应的值
    large_values = df_new[df_new['eyemovement_speed'].abs() > 100000]

    if not large_values.empty:
        print("以下行的 'eyemovement_speed' 绝对值大于 100000:")
        # 显示行索引（index）和对应的值
        for index, row in large_values.iterrows():
            print(f"\n行 {index}:")
            print(f"eyemovement_speed = {row['eyemovement_speed']}")
            print(f"Gaze point X = {row['Gaze point X']}")
            print(f"Gaze point Y = {row['Gaze point Y']}")
            print(f"timestamp = {row['timestamp']}")
    else:
        print("没有绝对值大于100000的数值")

    return df_new

## 下面是速度滤波 从P1_study2.txt处理成P1_study2_cleaned.txt 对应config文件的路径替换一下即可

In [31]:
import json
from pathlib import Path
import pandas as pd
import numpy as np

# === CONFIG 加载 ===
CONFIG_PATH = Path('config') / 'feature_engineering_config.json'
with CONFIG_PATH.open(encoding='utf-8') as f:
    config = json.load(f)

BASE_DIR = Path('.').resolve()

# === 清洗核心函数 ===
def clean_speed_file(input_path, output_path):
    input_path = Path(input_path)
    output_path = Path(output_path)

    if not input_path.exists():
        print(f"[WARNING] 输入文件不存在，跳过: {input_path}")
        return

    lines = input_path.read_text(encoding='utf-8').splitlines()

    data_rows = []
    for i, line in enumerate(lines):
        parts = line.strip().split()
        if len(parts) < 2:
            continue
        value_str = parts[1]
        if value_str == 'null':
            value = np.nan
        else:
            try:
                value = float(value_str)
            except ValueError:
                value = np.nan
        data_rows.append((i, value))

    df = pd.DataFrame(data_rows, columns=['original_line', 'speed'])
    df['diff'] = df['speed'].diff().abs()

    BIG_DIFF_THRESHOLD = 3
    SMALL_DIFF_THRESHOLD = 3
    SMALL_DIFF_COUNT_NEEDED = 17

    state = 'NORMAL'
    small_diff_counter = 0
    start_index = None
    abnormal_segments = []

    for i in range(1, len(df)):
        diff = df.loc[i, 'diff']
        if state == 'NORMAL':
            if diff > BIG_DIFF_THRESHOLD:
                start_index = i
                state = 'ABNORMAL'
                small_diff_counter = 0
        elif state == 'ABNORMAL':
            if diff < SMALL_DIFF_THRESHOLD:
                small_diff_counter += 1
                if small_diff_counter >= SMALL_DIFF_COUNT_NEEDED:
                    end_index = max(start_index, i - SMALL_DIFF_COUNT_NEEDED - 1)
                    segment_speeds = df.loc[start_index:end_index, 'speed'].values
                    is_decreasing = np.all(segment_speeds[1:] <= segment_speeds[:-1])
                    is_abnormal = False
                    if not is_decreasing:
                        is_abnormal = True
                    else:
                        if start_index > 0:
                            prev_value = df.loc[start_index - 1, 'speed']
                            first_abnormal_value = df.loc[start_index, 'speed']
                            if pd.notnull(prev_value) and pd.notnull(first_abnormal_value):
                                if abs(first_abnormal_value - prev_value) > 27:
                                    is_abnormal = True
                    if is_abnormal:
                        start_original = df.loc[start_index, 'original_line']
                        end_original = df.loc[end_index, 'original_line']
                        if end_original > start_original:
                            abnormal_segments.append((start_original, end_original))
                    state = 'NORMAL'
            else:
                small_diff_counter = 0

    mask = pd.Series(True, index=df.index)
    for s, e in abnormal_segments:
        mask.iloc[s:e+1] = False
    cleaned_df = df[mask].copy()

    output_df = cleaned_df[['original_line', 'speed']]
    output_df['speed'] = output_df['speed'].apply(
        lambda x: 'null' if pd.isnull(x) else str(int(round(x)))
    )

    output_path.parent.mkdir(parents=True, exist_ok=True)
    output_df.to_csv(output_path, sep=' ', index=False, header=False, encoding='utf-8')

# === 批量处理主函数 ===
def batch_process_speed_cleaning():
    for group_key in ['NC_data_path', 'PD_data_path']:
        group_data = config.get(group_key, {})
        print(f"\n[INFO] 正在处理组别: {group_key}")

        for ex in ['ex1', 'ex2']:
            in_key = f"speed_path_{ex}"
            out_key = f"speed_path_{ex}_cleaned"

            input_list = group_data.get(in_key, [])
            output_list = group_data.get(out_key, [])


            print(f"[INFO]  实验: {ex}  文件数: {len(input_list)}")

            for in_path, out_path in zip(input_list, output_list):
                clean_speed_file(in_path, out_path)

# === 主程序入口 ===
# if __name__ == "__main__":
#     batch_process_speed_cleaning()

## 眼动计算丢失率，插值，删除invalid所有行，加滤波

In [32]:
import numpy as np

def hampel_filter_forloop_numba(input_series, window_size, n_sigmas=3):
    n = len(input_series)
    new_series = input_series.copy()
    k = 1.4826 # scale factor for Gaussian distribution
    indices = []

    for i in range(window_size, (n - window_size)):
        #计算中心点 i 周围 (window_size * 2) 范围内的中位数。
        x0 = np.nanmedian(input_series[(i - window_size):(i + window_size)])
        #计算中位数绝对偏差 (MAD) 乘以 1.4826 转为近似标准差（稳健尺度）
        S0 = k * np.nanmedian(np.abs(input_series[(i - window_size):(i + window_size)] - x0))
        #判断该点与窗口中位数的偏差是否超过 n_sigmas × S0 如果超出局部平均 ± 3×标准差，就认定是异常值 正态分布
        if (np.abs(input_series[i] - x0) > n_sigmas * S0):
            #若为异常值，用局部窗口的中位数替代
            new_series[i] = x0
            indices.append(i)
    return new_series, indices

In [None]:
from pathlib import Path
import pandas as pd
RAW_DATA_DIR = Path('raw_data')

all_input_files = []

# groups = [
#     {"prefix": "", "range": range(1, 14)},    # P1 - P13
#     {"prefix": "PD/", "range": range(1, 9)}   # PD1 - PD8
# ]

# for group in groups:
#     prefix = group["prefix"]
#     for i in group["range"]:
#         subfolder = f'{prefix}P{i}'
#         filename = f'P{i}_study2.csv'
#         path = RAW_DATA_DIR / subfolder / filename
#         if path.exists():
#             all_input_files.append(path)
#         else:
#             print(f"[WARNING] File not found: {path}")


# 需要插值的列名清单
# VALUE_COLS_TO_INTERPOLATE = [
#     'Gaze point X', 'Gaze point Y',
#     'Gaze point left X', 'Gaze point left Y',
#     'Gaze point right X', 'Gaze point right Y',
#     'Gaze direction left X', 'Gaze direction left Y', 'Gaze direction left Z',
#     'Gaze direction right X', 'Gaze direction right Y', 'Gaze direction right Z',
#     'Pupil diameter left', 'Pupil diameter right',
#     'Eye position left X (DACSmm)', 'Eye position left Y (DACSmm)', 'Eye position left Z (DACSmm)',
#     'Eye position right X (DACSmm)', 'Eye position right Y (DACSmm)', 'Eye position right Z (DACSmm)',
#     'Gaze point left X (DACSmm)', 'Gaze point left Y (DACSmm)',
#     'Gaze point right X (DACSmm)', 'Gaze point right Y (DACSmm)',
#     'Gaze point X (MCSnorm)', 'Gaze point Y (MCSnorm)',
#     'Gaze point left X (MCSnorm)', 'Gaze point left Y (MCSnorm)',
#     'Gaze point right X (MCSnorm)', 'Gaze point right Y (MCSnorm)',
#     'Fixation point X', 'Fixation point Y',
#     'Fixation point X (MCSnorm)', 'Fixation point Y (MCSnorm)'
# ]
# VALUE_COLS_TO_INTERPOLATE = [
#     'Gaze point X', 'Gaze point Y',
#     'Gaze point left X', 'Gaze point left Y',
#     'Gaze point right X', 'Gaze point right Y',
#     'Eye position left X (DACSmm)', 'Eye position left Y (DACSmm)', 'Eye position left Z (DACSmm)',
#     'Eye position right X (DACSmm)', 'Eye position right Y (DACSmm)', 'Eye position right Z (DACSmm)'
# ]
VALUE_COLS_TO_INTERPOLATE = [
    'Gaze point left X', 'Gaze point left Y',
    'Gaze point right X', 'Gaze point right Y',
    'Eye position left X (DACSmm)', 'Eye position left Y (DACSmm)', 'Eye position left Z (DACSmm)',
    'Eye position right X (DACSmm)', 'Eye position right Y (DACSmm)', 'Eye position right Z (DACSmm)',
    'Pupil diameter left','Pupil diameter right']

def preprocess_and_interpolate_file(path: Path, save_processed=False) -> float:
    """
    专业版处理流程：
    1. 删除 Calibration start 之前所有行
    2. Event 有值的行，把数值特征列设为 NaN
    3. Validity 为 Invalid 的行，把数值特征列设为 NaN
    4. 对所有数值特征列做插值
    5. 返回丢失率
    """
    #用于记录异常的索引（方便我查看一下）
    outlier_indices_by_column = {}
    
    df = pd.read_csv(path, dtype=str, low_memory=False)

    # 切掉 ScreenRecordingStart 之前的行
    calibration_start_idx = df[df['Event'] == 'ScreenRecordingStart'].index[0]
    df = df.iloc[calibration_start_idx+1:].copy()
    df.reset_index(drop=True, inplace=True)
    # print("length before =" )
    # print(len(df))
    df = df.drop(df[df['Sensor'] != 'Eye Tracker'].index)
    # print("length after =" )
    # print(len(df))
    #print(df)
    total_count = len(df)

    # Event 非空的行，把所有数值特征列设成 NaN
    event_nonempty_mask = df['Event'].notna() & (df['Event'].str.strip() != '')
    df.loc[event_nonempty_mask, VALUE_COLS_TO_INTERPOLATE] = None

    # Validity 有 Invalid 的行，也把数值特征列设成 NaN
    invalid_mask = (df['Validity left'] == 'Invalid') & (df['Validity right'] == 'Invalid')
    df.loc[invalid_mask, VALUE_COLS_TO_INTERPOLATE] = None

    # 统计丢失率
    invalid_count = invalid_mask.sum() + event_nonempty_mask.sum()
    loss_rate = invalid_count / total_count
    # print(invalid_count)
    # print(total_count)

    # 转换数值列，强制把所有非数值转成 NaN
    for col in VALUE_COLS_TO_INTERPOLATE:
        df[col] = pd.to_numeric(df[col], errors='coerce')

    # 限制连续缺失点数量（5 个以内才插值）
    max_gap = 5

    for col in VALUE_COLS_TO_INTERPOLATE:
        s = df[col]

        # 找出连续缺失段
        is_na = s.isna()
        group_id = (is_na != is_na.shift()).cumsum()
        groups = s.groupby(group_id)

        new_values = s.copy()

        for gid, g in groups:
            if g.isna().all():  # 全是缺失的段
                gap_length = len(g)

                if gap_length <= max_gap:
                    # 小于等于 5 个缺失 → 允许线性插值
                    new_values[g.index] = s.interpolate(
                        method='linear',
                        limit_direction='both'
                    )[g.index]
                else:
                    # 超过 5 的缺失段 → 不插值，保持 NaN
                    new_values[g.index] = np.nan

        df[col] = new_values

    # #线性插值
    # df[VALUE_COLS_TO_INTERPOLATE] = df[VALUE_COLS_TO_INTERPOLATE].interpolate(
    #     method='linear',
    #     limit_direction='both',
    #     axis=0
    # )
     
    # 再做边界外推(前面有一些 NAN NAN 1098 1232 这种数据 插值没法补齐 只能前推补齐)
    df[VALUE_COLS_TO_INTERPOLATE] = (
    df[VALUE_COLS_TO_INTERPOLATE]
    .ffill()
    .bfill()
)
    # print(df['Gaze point X'].iloc[25:36])
    
    # Hampel Filter 滤波
    total_outlier_count = 0
    for col in VALUE_COLS_TO_INTERPOLATE:
        series = df[col].to_numpy()
        filtered, indices = hampel_filter_forloop_numba(series, window_size=10, n_sigmas=3)
        df[col] = filtered
        total_outlier_count += len(indices)
        outlier_indices_by_column[col] = indices

    # 更新 Gaze point X 为左右眼X坐标的平均值
    df['Gaze point X'] = df[['Gaze point left X', 'Gaze point right X']].mean(axis=1)

    # 更新 Gaze point Y 为左右眼Y坐标的平均值
    df['Gaze point Y'] = df[['Gaze point left Y', 'Gaze point right Y']].mean(axis=1)

    return df, loss_rate, total_outlier_count, outlier_indices_by_column

In [None]:
def batch_process_csv_interpolation():
    for group_key in ['NC_data_path', 'PD_data_path']:
    #for group_key in ['PD_data_path']:
        group_data = config.get(group_key, {})
        print(f"\n[INFO] 处理组别: {group_key}")
        
        for ex in ['ex1', 'ex2']:
        #for ex in ['ex2']:
            in_key = f"csv_path_{ex}"
            out_key = f"csv_path_{ex}_cleaned"

            input_list = group_data.get(in_key, [])
            output_list = group_data.get(out_key, [])
            print(f"[INFO] 实验 {ex}：准备处理 {len(input_list)} 个文件")

            for in_path_str, out_path_str in zip(input_list, output_list):
                # if in_path_str !="./raw_data/NC/P26/P26_study1.csv" and in_path_str !="./raw_data/NC/P26/P26_study2.csv":
                #     continue
                if in_path_str =="":
                    continue
                in_path = Path(in_path_str)
                out_path = Path(out_path_str)
                #print("yes")
                df, loss_rate, n_outliers, outlier_indices_by_column = preprocess_and_interpolate_file(in_path)
                print("Hampel Filter 滤波")
                # 确保输出文件夹存在
                out_path.parent.mkdir(parents=True, exist_ok=True)

                df.to_csv(out_path, index=False)
                print(f"[INFO] 已保存清洗后文件: {out_path} (丢失率: {loss_rate:.2%})")
                
if __name__ == "__main__":
    batch_process_csv_interpolation()


[INFO] 处理组别: NC_data_path
[INFO] 实验 ex1：准备处理 31 个文件
Hampel Filter 滤波
[INFO] 已保存清洗后文件: raw_data\NC\P26\P26_study1_cleaned.csv (丢失率: 54.58%)
[INFO] 实验 ex2：准备处理 31 个文件
Hampel Filter 滤波
[INFO] 已保存清洗后文件: raw_data\NC\P26\P26_study2_cleaned.csv (丢失率: 68.18%)

[INFO] 处理组别: PD_data_path
[INFO] 实验 ex1：准备处理 27 个文件
[INFO] 实验 ex2：准备处理 27 个文件


In [36]:
import pandas as pd

# 主保存目录
save_root = "data_pro1"
os.makedirs(save_root, exist_ok=True)

# 定义组别、人数
# groups = {
#     "NC": 13,
#     "PD": 8
# }
groups = {
    "NC": CONFIG_DATA["NC_number"],
    "PD": CONFIG_DATA["PD_number"]
}

# 实验轮次
ex_num_list = ["ex1", "ex2"]
#ex_num_list = ["ex2"]

# 三种传感器
sensor_types = ["pedal", "speed", "eyemovement"]

for group, person_count in groups.items():
    data_path_key = f"{group}_data_path"
    
    for ex_num in ex_num_list:
        for sensor in sensor_types:            
            print(f"Processing: {group} - {ex_num} - {sensor}")
            all_dfs = []
            
            for person_num in range(person_count):
                if person_num == 8 and group == "PD" and ex_num == "ex1":
                    continue
                # 读取路径信息
                pedal_data_path = CONFIG_DATA[data_path_key]["pedal_path_" + ex_num][person_num]
                speed_data_path = CONFIG_DATA[data_path_key]["speed_path_" + ex_num][person_num]
                start_timestamp = CONFIG_DATA[data_path_key]["video_start_timestamp_" + ex_num][person_num]
                print(data_path_key)
                print(ex_num)
                print(person_num)
                csv_path = CONFIG_DATA[data_path_key]["csv_path_" + ex_num + "_cleaned"][person_num]
                #label = CONFIG_DATA[data_path_key]["label_" + ex_num][person_num]
                
                # 读取DataFrame
                if sensor == "pedal":
                    df = read_txt(pedal_data_path)
                elif sensor == "speed":
                    df = read_speed_data(speed_data_path, start_timestamp)
                elif sensor == "eyemovement":
                    df = csv_reader(csv_path, start_timestamp)
                
                # 加person_id
                df['person_id'] = person_num + 1
                all_dfs.append(df)
            
            # 拼接
            result_df = pd.concat(all_dfs, ignore_index=True)
            
            # 构建保存文件名
            save_filename = f"{group}_{sensor}_data_{ex_num}.csv"
            save_path = os.path.join(save_root, save_filename)
            
            # 保存
            result_df.to_csv(save_path, index=False)
            
            print(f"Saved: {save_path}")

Processing: NC - ex1 - pedal
NC_data_path
ex1
0
NC_data_path
ex1
1
NC_data_path
ex1
2
NC_data_path
ex1
3
NC_data_path
ex1
4
NC_data_path
ex1
5
NC_data_path
ex1
6
NC_data_path
ex1
7
NC_data_path
ex1
8
NC_data_path
ex1
9
NC_data_path
ex1
10
NC_data_path
ex1
11
NC_data_path
ex1
12
NC_data_path
ex1
13
NC_data_path
ex1
14
NC_data_path
ex1
15
NC_data_path
ex1
16
NC_data_path
ex1
17
NC_data_path
ex1
18
NC_data_path
ex1
19
NC_data_path
ex1
20
NC_data_path
ex1
21
NC_data_path
ex1
22
NC_data_path
ex1
23
NC_data_path
ex1
24
NC_data_path
ex1
25
NC_data_path
ex1
26
NC_data_path
ex1
27
NC_data_path
ex1
28
NC_data_path
ex1
29
NC_data_path
ex1
30
Saved: data_pro1\NC_pedal_data_ex1.csv
Processing: NC - ex1 - speed
NC_data_path
ex1
0
NC_data_path
ex1
1
NC_data_path
ex1
2
NC_data_path
ex1
3
NC_data_path
ex1
4
NC_data_path
ex1
5
NC_data_path
ex1
6
NC_data_path
ex1
7
NC_data_path
ex1
8
NC_data_path
ex1
9
NC_data_path
ex1
10
NC_data_path
ex1
11
NC_data_path
ex1
12
NC_data_path
ex1
13
NC_data_path
ex1
14
NC_