## 特征计算函数

In [None]:
#window_size = int(60*21.25)
# WINDOW_SIZE = 1275
# WINDOW_STEP = 21
# 0.5s 11
# 1s 21
# 2s 43
# 5s 108
# 150s 
WINDOW_SIZE = 1275
WINDOW_STEP = 21
output_name= "feature_120_1.csv"

In [31]:
import multiprocessing
from joblib import Parallel, delayed
import pandas as pd
import numpy as np
from scipy.stats import skew, kurtosis, iqr
from scipy.fft import fft
from scipy.stats import entropy
import pywt

#频域特征计算
def get_fft_stats(df, columns=['steering', 'throttle', 'brake']):
    features = {}

    for col in columns:
        if col not in df.columns:
            continue

        signal = df[col].dropna().values  # 去掉 NaN

        if len(signal) < 2:
            continue  # 跳过太短的信号

        # 1. 做 FFT（只取实数部分频谱）
        fft_vals = np.abs(fft(signal))
        fft_vals = fft_vals[:len(fft_vals)//2]  # 保留正频率部分

        # 2. 提取特征
        features[f'FFT_{col}_mean'] = np.mean(fft_vals)
        features[f'FFT_{col}_std'] = np.std(fft_vals)
        features[f'FFT_{col}_max'] = np.max(fft_vals)
        features[f'FFT_{col}_min'] = np.min(fft_vals)
        features[f'FFT_{col}_energy'] = np.sum(fft_vals ** 2)

        # 3. 频谱熵（归一化再计算熵）
        psd = fft_vals ** 2
        psd_norm = psd / np.sum(psd) if np.sum(psd) != 0 else psd
        features[f'FFT_{col}_entropy'] = entropy(psd_norm)

    return features

#小波变换参数特征计算
def extract_wavelet_features(df, columns=['steering', 'throttle', 'brake'], wavelet='db4', level=3):
    features = {}

    for col in columns:
        if col not in df.columns:
            continue

        signal = df[col].dropna().values

        if len(signal) < 2:
            continue

        # 小波分解（返回 approximation 和 detail 系数）
        coeffs = pywt.wavedec(signal, wavelet, level=level)

        for i, coeff in enumerate(coeffs):
            prefix = f'{col}_L{i}'

            # 统计特征
            features[f'WVL_{prefix}_mean'] = np.mean(coeff)
            features[f'WVL_{prefix}_std'] = np.std(coeff)
            features[f'WVL_{prefix}_max'] = np.max(coeff)
            features[f'WVL_{prefix}_energy'] = np.sum(np.square(coeff))

            # 熵（表示信号复杂度）
            coeff_norm = np.square(coeff) / np.sum(np.square(coeff)) if np.sum(np.square(coeff)) > 0 else coeff
            features[f'WVL_{prefix}_entropy'] = entropy(coeff_norm)

    return features

#上一篇文章统计特征计算
def extract_pedal(pedal_df:pd.DataFrame):
    res = {
        "LAST_steer_sum" : np.nan,
        "LAST_steer_abs_sum" : np.nan,
        "LAST_steer_flucuate_times" : np.nan,
        "LAST_steer_sum_per_fulct" : np.nan,
        "LAST_steer_max_fluct" : np.nan,
        "LAST_steer_mean_fluct_speed" : np.nan,
        "LAST_steer_max_fluct_speed" : np.nan,
        "LAST_throttle_duration" : np.nan,
        "LAST_brake_duration" : np.nan,
        "LAST_throttle_brake_ratio" : np.nan,
        "LAST_brake_times" : np.nan,
        "LAST_throttle_auc" : np.nan,
    }
    if pedal_df.shape[0]==0:
        return res
    
    pedal_df = pedal_df.copy()
    pedal_df["throttle"] = -pedal_df["throttle"] + 32767.0
    pedal_df = pedal_df.copy()
    pedal_df["brake"] = -pedal_df["brake"] + 32767.0

    res["LAST_steer_sum"] = pedal_df["steering"].sum()
    res["LAST_steer_abs_sum"] = np.abs(pedal_df["steering"]).sum()
    try:
        # res["flucuate_times"] = pedal_df["steering_derivative"].value_counts()[0]
        pedal_df["LAST_pass_zero"] = (pedal_df["steering"] * pedal_df["steering"].shift(1)).map(lambda x: 1 if x<=0 else 0)
        res["LAST_steer_flucuate_times"] = pedal_df["LAST_pass_zero"].sum()
        if res["LAST_steer_flucuate_times"] == 0:
            res["LAST_steer_sum_per_fulct"] = 0
        else:
            res["LAST_steer_sum_per_fulct"] = res["LAST_steer_abs_sum"] / res["LAST_flucuate_times"]
    except:
        res["LAST_flucuate_times"] = 0
        res["LAST_steer_sum_per_fulct"] = 0
    res["LAST_steer_max_fluct"] = np.abs(pedal_df["steering"]).max()
    res["LAST_steer_mean_fluct_speed"] = np.nanmean(np.abs(pedal_df["steering_derivative"]))
    res["LAST_steer_max_fluct_speed"] = np.abs(pedal_df["steering_derivative"]).max()
    
    pedal_df["dura"] = pedal_df["timestamp"].shift(-1) - pedal_df["timestamp"]
    res["LAST_throttle_duration"] = pedal_df[~(pedal_df["throttle"]==0)]["dura"].sum()
    res["LAST_brake_duration"] = pedal_df[~(pedal_df["brake"]==0)]["dura"].sum()
    res["LAST_throttle_brake_ratio"] = res["LAST_throttle_duration"] / (res["LAST_brake_duration"]+0.01)

    _temp_times=0
    _last_idx=pedal_df.index[0]
    for idx, row in pedal_df[pedal_df["brake"]==0].iterrows():
        if (not idx-_last_idx==1) and (not idx-_last_idx==0):
            _temp_times+=1
        _last_idx=idx
    res["LAST_brake_times"] = _temp_times
    res["LAST_throttle_auc"] = pedal_df["throttle"].sum()
    return res

#新特征
def extract_new_pedal_feature(pedal_df:pd.DataFrame):
    res = {

        "NewPedal_steering_Change_quantile_mean" : np.nan,
        "NewPedal_steering_fft_Kurtosis": np.nan,
        "NewPedal_steering_unique_value_ratio": np.nan,
        "NewPedal_steering_FFT_Aggregated_Centroid":np.nan
    }
    if pedal_df.shape[0]==0:
        return res
    
    #————————计算参数一 Steering – Change quantile mean：——————————————
    #Steering – Change quantile mean：The average absolute value of the changes in the time series.
    # 也就是说，它是计算方向盘角度（steering）随时间变化的变化值的绝对值的平均数，可以理解为方向盘活动的“平均强度”。

    # 计算相邻方向盘值的变化（差值）
    steering_diff = pedal_df["steering"].diff()

    # 计算变化的绝对值
    steering_abs_change = steering_diff.abs()

    # 时间差（单位：秒）
    time_diff = pedal_df["timestamp"].diff()

    # 防止除以0
    time_diff.replace(0, np.nan, inplace=True)

    # 方向盘变化率（角速度） - 绝对值
    steering_rate = steering_abs_change / time_diff

    # 求平均
    steering_rate_mean = steering_rate.mean()

    res["NewPedal_steering_Change_quantile_mean"] = steering_rate_mean

    #————————计算参数二 Steering – FFT aggregated kurtosis：——————————————
    #Steering – FFT aggregated kurtosis
    # 即：“方向盘数据的时序信号经过傅里叶变换（FFT）后，计算其频谱的峭度（Kurtosis）”。
    # 这个特征可以帮助你分析方向盘信号中频率分布的尖峭程度，比如是否包含突变或异常的高频行为。

    # 1. 取 steering 数据（去掉 NaN）
    steering_signal = pedal_df["steering"].dropna().values

    # 2. 快速傅里叶变换（FFT）
    steering_fft = fft(steering_signal)

    # 3. 取 FFT 的幅度谱（即复数的模长）
    fft_magnitude = np.abs(steering_fft)

    # 4. 计算频谱的峭度（默认 Fisher=False 为“实际峭度”）
    fft_kurtosis = kurtosis(fft_magnitude, fisher=False)

    res["NewPedal_steering_fft_Kurtosis"] = fft_kurtosis

    #————————计算参数三 Steering – unique value ratio：——————————————
    # "Steering Ratio"（方向盘数据唯一值与时间序列总长度的比率）
    # 这个比率衡量的是方向盘数据的"独特性" - 值越接近1表示数据变化越大，越接近0表示数据重复性越高

    # 获取方向盘数据列
    steering_data = pedal_df["steering"]
    
    # 计算唯一值数量
    unique_values = steering_data.nunique()
    
    # 计算时间序列总长度
    total_length = len(steering_data)
    
    # 计算比率
    ratio = unique_values / total_length

    res["NewPedal_steering_unique_value_ratio"] = ratio

    #————————计算参数四 Steering FFT Aggregated Centroid（方向盘数据的FFT聚合质心频率）：——————————————
    # 该参数表示 方向盘信号（Steering）频谱的能量集中趋势，计算方式为对时间序列进行快速傅里叶变换（FFT），然后求其频谱的加权平均频率（质心频率）。
    # 物理意义：反映方向盘变化的主要频率成分（例如高频抖动 vs 低频平滑转向）。
    # 应用场景：检测方向盘控制的稳定性（高频成分多可能表示抖动或频繁修正）。

    # 计算FFT
    n = len(steering_signal)
    fft_values = fft(steering_signal)
    fft_magnitude = np.abs(fft_values)[:n//2]  # 取单边频谱
    
    # 生成频率轴
    freqs = np.fft.fftfreq(n, d=1.0/21.75)[:n//2]
    
    # 计算质心频率（加权平均频率）
    if np.sum(fft_magnitude) > 0:
        centroid = np.sum(freqs * fft_magnitude) / np.sum(fft_magnitude)
    else:
        centroid = 0.0  # 避免除以零

    res["NewPedal_steering_FFT_Aggregated_Centroid"] = centroid

    return res

#新特征
def extract_new_speed_feature(speed_df:pd.DataFrame):
    res = {
        "NewSpeed_90%_quantile" :np.nan,
        "NewSpeed_C3":np.nan,
        "NewSpeed_change_quantile_variance":np.nan
    }
    if speed_df.shape[0]==0:
        return res
    
    #————————计算参数 Speed 90% quantile（计算速度时间序列中超过90%分位数的数据点比例)——————————————
    # 计算90%分位数
    speed_series = speed_df["speed"]
    q90 = speed_series.quantile(0.9)
    n = len(speed_series)
    # 统计超过q90的数据点数量
    count_above_q90 = len(speed_series[speed_series > q90])/n

    res["NewSpeed_90%_quantile"] = count_above_q90

    #————————计算参数 Speed C3（计算速度时间序列的C3非线性度量)——————————————
    #基于当前值与前两值自协方差的非线性度量(Schreiber&Schmitz，1997)
    #用途:识别速度时间序列中的非线性动力学特征(如急加速/减速)。
    speed_values = speed_series.dropna().values
    n = len(speed_values)
    mu = np.mean(speed_values)  # 均值
    
    # 计算三阶自协方差项的和
    sum_c3 = 0.0
    for i in range(2, n):
        sum_c3 += speed_values[i] * speed_values[i-1] * speed_values[i-2]
    
    # 计算C3
    c3 = (sum_c3 / (n - 2)) - mu**3
    res["NewSpeed_C3"] = c3

    #————————计算参数 Speed change quantile variance（定义:速度变化量绝对值的方差。)——————————————
    #用途:量化速度变化的波动性(如频繁加减速)

    # 计算速度变化量（一阶差分）
    speed_diff = speed_series.diff().dropna()
    
    # 取绝对值后计算方差
    abs_diff = np.abs(speed_diff)
    variance = np.var(abs_diff)
    res["NewSpeed_change_quantile_variance"] = variance

    return res

from shapely.geometry import Point, box
from shapely.ops import unary_union

def compute_gaze_area(data:pd.DataFrame, radius=25):
    # Create shapely circles for each gaze point
    def create_circle(x, y, radius):
        screen_box=box(0,0,1920,1080)
        circle=Point(x, y).buffer(radius)
        return circle.intersection(screen_box)

    data['circle'] = data.apply(lambda row: create_circle(row['Gaze point X'], row['Gaze point Y'], radius), axis=1)

    # Union of all circles
    union_of_circles = unary_union(data['circle'].tolist())

    # Area of the union of all circles
    return union_of_circles.area

def extract_eye_movement_type(eye_df):
    eye_df=eye_df.copy()
    res = {
        "GE_saccade_times" : np.nan,
        "GE_fixation_times" : np.nan,

        "GE_saccade_duration_mean" : np.nan,
        "GE_saccade_duration_std" : np.nan,
        "GE_saccade_duration_005quantiles" : np.nan,
        "GE_saccade_duration_095quantiles" : np.nan,
        "GE_saccade_duration_skewness" : np.nan,
        "GE_saccade_duration_kurtosis" : np.nan,

        "GE_fixation_duration_mean" : np.nan,
        "GE_fixation_duration_std" : np.nan,
        "GE_fixation_duration_005quantiles" : np.nan,
        "GE_fixation_duration_095quantiles" : np.nan,
        "GE_fixation_duration_skewness" : np.nan,
        "GE_fixation_duration_kurtosis" : np.nan,

        "GE_saccade_amplitude_mean" : np.nan,
        "GE_saccade_amplitude_std" : np.nan,
        "GE_saccade_amplitude_005quantiles" : np.nan,
        "GE_saccade_amplitude_095quantiles" : np.nan,
        "GE_saccade_amplitude_skewness" : np.nan,
        "GE_saccade_amplitude_kurtosis" : np.nan,

        "GE_saccade_peak_v_mean" : np.nan,
        "GE_saccade_peak_v_std" : np.nan,
        "GE_saccade_peak_v_005quantiles" : np.nan,
        "GE_saccade_peak_v_095quantiles" : np.nan,
        "GE_saccade_peak_v_skewness" : np.nan,
        "GE_saccade_peak_v_kurtosis" : np.nan,

        "GE_saccade_mean_v_mean" : np.nan,
        "GE_saccade_mean_v_std" : np.nan,
        "GE_saccade_mean_v_005quantiles" : np.nan,
        "GE_saccade_mean_v_095quantiles" : np.nan,
        "GE_saccade_mean_v_skewness" : np.nan,
        "GE_saccade_mean_v_kurtosis" : np.nan,
    }
    # 计算帧间的时间差
    eye_df['dt'] = eye_df['timestamp'].diff()

    # 计算帧间的位移
    eye_df['dx'] = eye_df['Gaze point X'].diff()
    eye_df['dy'] = eye_df['Gaze point Y'].diff()

    # 计算瞬时速度 (像素/秒)
    eye_df['velocity'] = np.sqrt(eye_df['dx']**2 + eye_df['dy']**2) / eye_df['dt']

    # 给每个连续相同类型的段落编号
    eye_df['segment_id'] = (eye_df['Eye movement type'] != eye_df['Eye movement type'].shift()).cumsum()
    results = []

    for seg_id, group in eye_df.groupby('segment_id'):
        seg_type = group['Eye movement type'].iloc[0]
        start_time = group['timestamp'].iloc[0]
        end_time = group['timestamp'].iloc[-1]
        duration = end_time - start_time
        
        if seg_type == 'Fixation':
            results.append({
                'type': 'Fixation',
                'duration': duration,
                'count': 1
            })
        elif seg_type == 'Saccade':
            amplitude = np.sqrt((group['Gaze point X'].iloc[-1] - group['Gaze point X'].iloc[0])**2 +
                                (group['Gaze point Y'].iloc[-1] - group['Gaze point Y'].iloc[0])**2)
            peak_vel = group['velocity'].max()
            mean_vel = group['velocity'].mean()
            results.append({
                'type': 'Saccade',
                'duration': duration,
                'count': 1,
                'amplitude': amplitude,
                'peak_velocity': peak_vel,
                'mean_velocity': mean_vel
            })

    # 转成 DataFrame 方便汇总
    res_df = pd.DataFrame(results)
    # 分组数据
    if res_df.empty:
        return res
    fix_df = res_df[res_df['type'] == 'Fixation']
    sac_df = res_df[res_df['type'] == 'Saccade']

    # 次数
    res["GE_fixation_times"] = len(fix_df)
    res["GE_saccade_times"] = len(sac_df)

    # Fixation duration
    if not fix_df.empty:
        dur = fix_df['duration']
        res["GE_fixation_duration_mean"] = dur.mean()
        res["GE_fixation_duration_std"] = dur.std()
        res["GE_fixation_duration_005quantiles"] = dur.quantile(0.05)
        res["GE_fixation_duration_095quantiles"] = dur.quantile(0.95)
        res["GE_fixation_duration_skewness"] = skew(dur, nan_policy='omit')
        res["GE_fixation_duration_kurtosis"] = kurtosis(dur, nan_policy='omit')

    # Saccade duration
    if not sac_df.empty:
        dur = sac_df['duration']
        res["GE_saccade_duration_mean"] = dur.mean()
        res["GE_saccade_duration_std"] = dur.std()
        res["GE_saccade_duration_005quantiles"] = dur.quantile(0.05)
        res["GE_saccade_duration_095quantiles"] = dur.quantile(0.95)
        res["GE_saccade_duration_skewness"] = skew(dur, nan_policy='omit')
        res["GE_saccade_duration_kurtosis"] = kurtosis(dur, nan_policy='omit')

        # Saccade amplitude
        amp = sac_df['amplitude']
        res["GE_saccade_amplitude_mean"] = amp.mean()
        res["GE_saccade_amplitude_std"] = amp.std()
        res["GE_saccade_amplitude_005quantiles"] = amp.quantile(0.05)
        res["GE_saccade_amplitude_095quantiles"] = amp.quantile(0.95)
        res["GE_saccade_amplitude_skewness"] = skew(amp, nan_policy='omit')
        res["GE_saccade_amplitude_kurtosis"] = kurtosis(amp, nan_policy='omit')

        # Saccade peak velocity
        pv = sac_df['peak_velocity']
        res["GE_saccade_peak_v_mean"] = pv.mean()
        res["GE_saccade_peak_v_std"] = pv.std()
        res["GE_saccade_peak_v_005quantiles"] = pv.quantile(0.05)
        res["GE_saccade_peak_v_095quantiles"] = pv.quantile(0.95)
        res["GE_saccade_peak_v_skewness"] = skew(pv, nan_policy='omit')
        res["GE_saccade_peak_v_kurtosis"] = kurtosis(pv, nan_policy='omit')

        # Saccade mean velocity
        mv = sac_df['mean_velocity']
        res["GE_saccade_mean_v_mean"] = mv.mean()
        res["GE_saccade_mean_v_std"] = mv.std()
        res["GE_saccade_mean_v_005quantiles"] = mv.quantile(0.05)
        res["GE_saccade_mean_v_095quantiles"] = mv.quantile(0.95)
        res["GE_saccade_mean_v_skewness"] = skew(mv, nan_policy='omit')
        res["GE_saccade_mean_v_kurtosis"] = kurtosis(mv, nan_policy='omit')

    return res

def extract_eye(eye_df):
    res = {
        "LASTE_eye_speed_x_mean" : np.nan,
        "LASTE_eye_speed_y_mean" : np.nan,
        "LASTE_eye_speed_eye_mean" : np.nan,
        "LASTE_eye_speed_x_max" : np.nan,
        "LASTE_eye_speed_y_max" : np.nan,
        "LASTE_eye_speed_eye_max" : np.nan,
        #"LASTE_gaze_area_ratio" : np.nan,
    }
    if eye_df.shape[0]==0:
        return res
    
    data = np.abs(eye_df["Gaze point X_derivative"])

    if len(data) > 0:
        res["LASTE_eye_speed_x_mean"] = np.nanmean(data)
    else:
        res["LASTE_eye_speed_x_mean"] = np.nan

    data = np.abs(eye_df["Gaze point Y_derivative"])

    if len(data) > 0:
        res["LASTE_eye_speed_y_mean"] = np.nanmean(data)
    else:
        res["LASTE_eye_speed_y_mean"] = np.nan

    # res["LASTE_eye_speed_x_mean"] = np.nanmean(np.abs(eye_df["Gaze point X_derivative"]))
    # res["LASTE_eye_speed_y_mean"] = np.nanmean(np.abs(eye_df["Gaze point Y_derivative"]))
    res["LASTE_eye_speed_eye_mean"] = np.nanmean(np.abs(eye_df["eyemovement_speed"]))
    res["LASTE_eye_speed_x_max"] = np.max(np.abs(eye_df["Gaze point X_derivative"]))
    res["LASTE_eye_speed_y_max"] = np.max(np.abs(eye_df["Gaze point Y_derivative"]))
    res["LASTE_eye_speed_eye_max"] = np.max(np.abs(eye_df["eyemovement_speed"]))
    #res["gaze_area_ratio"] = np.float32(compute_gaze_area(eye_df) / (1920*1080))
    return res

#普通统计特征计算
def get_stats(data, key_suffix: str = None, feature_type: str = None):
    """
    Function defining the statistical measures considered for aggregation
    :return: (pd.DataFrame) data of aggregated featues with column 'num_samples'
    """
    results = {
        'mean': np.nan,
        'std': np.nan,
        'q5': np.nan,
        'q95': np.nan,
        #'power': np.nan,
        'skewness': np.nan,
        'kurtosis': np.nan,
    }
    
    if len(data) > 0:
        results['mean'] = np.mean(data)
        results['std'] = np.std(data)
        results['q5'] = np.quantile(data, 0.05)
        results['q95'] = np.quantile(data, 0.95)
        if np.std(data) < 1e-8:
            results['skewness'] = np.nan
        else:
            results['skewness'] = skew(data)
        if np.std(data) < 1e-8:
            results['kurtosis'] = np.nan
        else:
            results['kurtosis'] = kurtosis(data)
        # results['skewness'] = skew(data)
        # results['kurtosis'] = kurtosis(data)
    
    # 拼接前缀和后缀
    prefix_map = {
        'pedal': 'STATSP_',
        'speed': 'STATSS_',
        'eyemovement': 'STATSE_',
        'head': 'STATSH_'
    }

    full_prefix = prefix_map.get(feature_type, '')  # 默认无前缀
    if key_suffix is not None:
        results = {f"{full_prefix}{key_suffix}_{k}": v for k, v in results.items()}
    else:
        results = {f"{full_prefix}{k}": v for k, v in results.items()}
    
    return results

#普通统计特征计算
def get_stats_new(data, key_suffix: str = None, feature_type: str = None):
    """
    Function defining the statistical measures considered for aggregation
    :return: (pd.DataFrame) data of aggregated featues with column 'num_samples'
    """
    results = {
        'ptp': np.nan,#峰峰值
        'median': np.nan, #中位数
        'energy': np.nan, #能量
        'rms': np.nan, #均方根
        'lineintegral': np.nan, #线积分 相邻数据点差值的绝对值之和
        'n_sign_changes': np.nan, #符号变化次数
        'iqr': np.nan, #四分位距
        'iqr_5_95': np.nan #5%-95%分位距
    }
    
    if len(data) > 0:
        results['ptp'] = np.ptp(data)
        results['median'] = np.median(data)
        results['energy'] = np.sum(data ** 2)
        results['rms'] = np.sqrt(np.sum(data ** 2) / len(data))
        results['lineintegral'] = np.abs(np.diff(data)).sum()
        results['n_sign_changes'] = np.sum(np.diff(np.sign(data)) != 0)
        results['iqr'] = np.subtract(*np.nanpercentile(data, [75, 25]))
        results['iqr_5_95'] = np.subtract(*np.nanpercentile(data, [95, 5]))
    
    # 拼接前缀和后缀
    prefix_map = {
        'pedal': 'STATSPNEW_',
        'speed': 'STATSSNEW_',
        'eyemovement': 'STATSENEW_',
        'head': 'STATSHNEW_'
    }

    full_prefix = prefix_map.get(feature_type, '')  # 默认无前缀
    if key_suffix is not None:
        results = {f"{full_prefix}{key_suffix}_{k}": v for k, v in results.items()}
    else:
        results = {f"{full_prefix}{k}": v for k, v in results.items()}
    
    return results

## 滑动窗口和特征提取

In [None]:
#滑动窗口
def get_sliding_window(data: pd.Series,speed_data, eyemovement_data, start_end_list):
    """
    Function to get aggregated features in parallel implementation
    data: (pd.DataFrame) data of whole dataframe to aggregate (no columns 'id', 'label', 'scenario')
    epoch_width: (int) time window in [s] to aggregate
    i: (int) index of start frame of aggregation
    :return: (pd.DataFrame) data of aggregated features with column 'num_samples'
    """
    min_timestamp = start_end_list[0]
    max_timestamp = start_end_list[1]

    results = {
        'datetime': min_timestamp,
    }    
    
    relevant_data = data[(data['timestamp'] > min_timestamp) 
                         & (data['timestamp'] < max_timestamp)]
    
    relevant_speed_data = speed_data[(speed_data['timestamp'] > min_timestamp) 
                         & (speed_data['timestamp'] < max_timestamp)]
    
    relevant_eyemovement_data = eyemovement_data[(eyemovement_data['timestamp'] > min_timestamp) 
                         & (eyemovement_data['timestamp'] < max_timestamp)]
    
    #-----------pedal-----------
    for column in relevant_data.columns:
        if column == "timestamp" or column == "person_id":
            continue
        column_results = get_stats(relevant_data[column], column,'pedal')
        results.update(column_results)
        column_results_new = get_stats_new(relevant_data[column], column,'pedal')
        results.update(column_results_new)
        
    #傅里叶变换、小波变换
    fft_results = get_fft_stats(relevant_data)
    results.update(fft_results)

    wavelet_results = extract_wavelet_features(relevant_data)
    results.update(wavelet_results)

    pedal_para = extract_pedal(relevant_data)
    results.update(pedal_para)

    new_pedal_para = extract_new_pedal_feature(relevant_data)
    results.update(new_pedal_para)
    
    #------------speed-------------
    new_speed_para = extract_new_speed_feature(relevant_speed_data)
    results.update(new_speed_para)

    for column in relevant_speed_data.columns:
        if column == "timestamp" or column == "person_id":
            continue
        column_results = get_stats(relevant_speed_data[column], column,'speed')
        results.update(column_results)
        column_results_new = get_stats_new(relevant_speed_data[column], column,'speed')
        results.update(column_results_new)

    #------------eye--------------    
    for column in relevant_eyemovement_data.columns:
        if column == "timestamp" or column == "person_id" or column == "Eye movement type" or column =="Eye position left X (DACSmm)" or column == "Eye position left Y (DACSmm)" or column == "Eye position left Z (DACSmm)" or column == "Eye position right X (DACSmm)" or column == "Eye position right Y (DACSmm)" or column == "Eye position right Z (DACSmm)": 
            continue
        if column.startswith("Head"):
            column_results = get_stats(relevant_eyemovement_data[column], column,'head')
            results.update(column_results)
            column_results_new = get_stats_new(relevant_eyemovement_data[column], column,'head')
            results.update(column_results_new)
        else:
            column_results = get_stats(relevant_eyemovement_data[column], column,'eyemovement')
            results.update(column_results)
            column_results_new = get_stats_new(relevant_eyemovement_data[column], column,'eyemovement')
            results.update(column_results_new)

    eye_movement_type = extract_eye_movement_type(relevant_eyemovement_data)
    results.update(eye_movement_type)
    # eye_para = extract_eye(relevant_eyemovement_data)
    # results.update(eye_para)

    return results

def get_features(data: pd.DataFrame, speed_data, eyemovement_data, num_cores: int = 0,start_row= 0):
    """
    Function to get aggregated features in parallel implementation
    data: (pd.DataFrame) data of whole dataframe to aggregate (no columns 'id', 'label', 'scenario')
    epoch_width: (int) time window in [s] to aggregate
    num_cores: (int) number of CPU cores to be used
    :return: (pd.DataFrame) data of aggregated features
    """

    input_data = data.copy()
    input_speed_data = speed_data.copy()
    input_eyemovement_data = eyemovement_data.copy()    

    windows = []
    total_rows = len(input_data)

    for start in range(0, total_rows - WINDOW_SIZE, WINDOW_STEP):
        start_timestamp = float(input_data.iloc[start]['timestamp'])
        end_timestamp = float(input_data.iloc[start + WINDOW_SIZE]['timestamp'])
        windows.append([start_timestamp,end_timestamp])

    results = []
    if len(windows)==0:
        return results
    for k in windows:
        results.append(get_sliding_window(input_data,input_speed_data, input_eyemovement_data, k))
    
    results = pd.DataFrame(list(filter(None, results)))  # filter out None values
    results.set_index('datetime', inplace=True)
    results.sort_index(inplace=True)

    return results

## 主函数

In [33]:
# import pandas as pd
# from config.config_loader import load_config
# import os

# road_type = "straight"
# CONFIG_DATA = load_config(os.path.join("config", "feature_engineering_config.json"))
# NC_number = CONFIG_DATA["NC_number"]
# PD_number = CONFIG_DATA["PD_number"]

# result = pd.DataFrame()
# # WINDOW_SIZE_LIST = [1275,1275*2,1275*3]  #120
# # size_list =['60','120','180']
# # WINDOW_STEP_LIST = [11,21,43,108,212,319,637]
# # step_list = ['0.5','1','2','5','10','15','30']
# # 0.5s 11
# # 1s 21
# # 2s 43
# # 5s 108
# WINDOW_SIZE_LIST = [1275*2]  #120
# size_list =['120']
# WINDOW_STEP_LIST = [21]
# step_list = ['1']

# for i in range(len(WINDOW_SIZE_LIST)):
#     for j in range(len(WINDOW_STEP_LIST)):
#         output_name= f"feature_{size_list[i]}_{step_list[j]}.csv"
#         print("processing"+output_name+"\n")
#         WINDOW_SIZE = WINDOW_SIZE_LIST[i]
#         WINDOW_STEP = WINDOW_STEP_LIST[j]
#         if i == 1 and (j == 1 or j==2 or j ==3 or j ==4):
#             continue
#         for type in ['PD','NC']:
#             if type == 'PD':
#                 person_amount = PD_number
#             else:
#                 person_amount = NC_number

#             for person_num in range(person_amount):
#                 print("在处理第"+str(person_num+1)+"个文件")

#                 data_type = "pedal"        
#                 pedal_data = pd.read_csv(
#                                     f"sliced_data/{type}/{type}{person_num + 1}/{road_type}_{data_type}_ex2.csv"
#                                 )
#                 #print(f"new_seg_data/{type}/{type}{person_num + 1}/{road_type}/{road_type}_{data_type}.csv")
#                 #print(pedal_data)
#                 data_type = "speed"        
#                 speed_data = pd.read_csv(
#                                     f"sliced_data/{type}/{type}{person_num + 1}/{road_type}_{data_type}_ex2.csv"
#                                 )
                
#                 data_type = "eyemovement"        
#                 eyemovement_data = pd.read_csv(
#                                     f"sliced_data/{type}/{type}{person_num + 1}/{road_type}_{data_type}_ex2.csv"
#                                 )

#                 res = get_features(pedal_data,speed_data,eyemovement_data,0,0)

#                 res = pd.concat([
#                     res,
#                     pd.DataFrame({
#                         'participant': [f"{type} P{person_num + 1}"] * len(res),
#                         'experiment': ['ex2'] * len(res),
#                         'label':1 if type =='PD' else 0
#                     }, index=res.index)
#                 ], axis=1)

#                 result = pd.concat([result, res], axis=0)

#         result.to_csv(output_name)


In [34]:
import pandas as pd
from config.config_loader import load_config
import os

road_type = "turn"
CONFIG_DATA = load_config(os.path.join("config", "feature_engineering_config.json"))
NC_number = CONFIG_DATA["NC_number"]
PD_number = CONFIG_DATA["PD_number"]

result = pd.DataFrame()
# WINDOW_SIZE_LIST = [1275,1275*2,1275*3]  #120
# size_list =['60','120','180']
# WINDOW_STEP_LIST = [11,21,43,108,212,319,637]
# step_list = ['0.5','1','2','5','10','15','30']
# 0.5s 11
# 1s 21
# 2s 43
# 5s 108
WINDOW_SIZE_LIST = [1275*2]  #120
size_list =['120']
WINDOW_STEP_LIST = [21]
step_list = ['1']

for i in range(len(WINDOW_SIZE_LIST)):
    for j in range(len(WINDOW_STEP_LIST)):
        output_name= f"turn_feature_{size_list[i]}_{step_list[j]}.csv"
        print("processing"+output_name+"\n")
        WINDOW_SIZE = WINDOW_SIZE_LIST[i]
        WINDOW_STEP = WINDOW_STEP_LIST[j]
        if i == 1 and (j == 1 or j==2 or j ==3 or j ==4):
            continue
        for type in ['PD','NC']:
            if type == 'PD':
                person_amount = PD_number
            else:
                person_amount = NC_number

            for person_num in range(person_amount):
                print("在处理第"+str(person_num+1)+"个文件")

                data_type = "pedal"        
                pedal_data = pd.read_csv(
                                    f"sliced_data/{type}/{type}{person_num + 1}/{road_type}_{data_type}_ex2.csv"
                                )
                #print(f"new_seg_data/{type}/{type}{person_num + 1}/{road_type}/{road_type}_{data_type}.csv")
                #print(pedal_data)
                data_type = "speed"        
                speed_data = pd.read_csv(
                                    f"sliced_data/{type}/{type}{person_num + 1}/{road_type}_{data_type}_ex2.csv"
                                )
                
                data_type = "eyemovement"        
                eyemovement_data = pd.read_csv(
                                    f"sliced_data/{type}/{type}{person_num + 1}/{road_type}_{data_type}_ex2.csv"
                                )

                res = get_features(pedal_data,speed_data,eyemovement_data,0,0)
                if  len(res)==0:
                    continue
                res = pd.concat([
                    res,
                    pd.DataFrame({
                        'participant': [f"{type} P{person_num + 1}"] * len(res),
                        'experiment': ['ex2'] * len(res),
                        'label':1 if type =='PD' else 0
                    }, index=res.index)
                ], axis=1)

                result = pd.concat([result, res], axis=0)

        result.to_csv(output_name)


processingturn_feature_120_1.csv

在处理第1个文件
在处理第2个文件
在处理第3个文件
在处理第4个文件
在处理第5个文件
在处理第6个文件
在处理第7个文件
在处理第8个文件
在处理第9个文件
在处理第10个文件
在处理第11个文件
在处理第12个文件
在处理第13个文件
在处理第14个文件
在处理第15个文件
在处理第16个文件
在处理第17个文件
在处理第18个文件
在处理第19个文件
在处理第20个文件
在处理第21个文件
在处理第22个文件
在处理第23个文件
在处理第24个文件
在处理第25个文件
在处理第26个文件
在处理第27个文件
在处理第1个文件
在处理第2个文件
在处理第3个文件
在处理第4个文件
在处理第5个文件
在处理第6个文件
在处理第7个文件
在处理第8个文件
在处理第9个文件
在处理第10个文件
在处理第11个文件
在处理第12个文件
在处理第13个文件
在处理第14个文件
在处理第15个文件
在处理第16个文件
在处理第17个文件
在处理第18个文件
在处理第19个文件
在处理第20个文件
在处理第21个文件
在处理第22个文件
在处理第23个文件
在处理第24个文件
在处理第25个文件
在处理第26个文件
在处理第27个文件
在处理第28个文件
在处理第29个文件
在处理第30个文件
在处理第31个文件


## 整段数据不切分 实验123 要修改地址

In [35]:
# import pandas as pd
# from config.config_loader import load_config
# import os

# CONFIG_DATA = load_config(os.path.join("config", "feature_engineering_config.json"))
# NC_number = CONFIG_DATA["NC_number"]
# PD_number = CONFIG_DATA["PD_number"]
# result = pd.DataFrame()

# WINDOW_SIZE_LIST = [1275*2]  #120
# size_list =['120']
# WINDOW_STEP_LIST = [21]
# step_list = ['1']
# output_name= f"feature_{size_list[0]}_{step_list[0]}.csv"

# for type in ['PD','NC']:
#     for ex in ['ex1','ex2']:

#         if type == 'PD':
#             person_amount = PD_number
#         else:
#             person_amount = NC_number

#         for person_num in range(person_amount):
#             print("在处理第"+str(person_num+1)+"个文件")

#             data_type = "pedal"        
#             pedal_data = pd.read_csv(
#                                 f"sliced_data/{type}/{type}{person_num + 1}/whole_{data_type}_{ex}.csv"
#                             )

#             data_type = "speed"        
#             speed_data = pd.read_csv(
#                                 f"sliced_data/{type}/{type}{person_num + 1}/whole_{data_type}_{ex}.csv"
#                             )
            
#             data_type = "eyemovement"        
#             eyemovement_data = pd.read_csv(
#                                 f"sliced_data/{type}/{type}{person_num + 1}/whole_{data_type}_{ex}.csv"
#                             )

#             res = get_features(pedal_data,speed_data,eyemovement_data,0,0)

#             res = pd.concat([
#                 res,
#                 pd.DataFrame({
#                     'participant': [f"{type} P{person_num + 1}"] * len(res),
#                     'experiment': [ex] * len(res),
#                     'label':1 if type =='PD' else 0
#                 }, index=res.index)
#             ], axis=1)

#             result = pd.concat([result, res], axis=0)

# result.to_csv(output_name)