## 查看資料

In [2]:
# 讀入 test_info.csv
import pandas as pd

test_info = pd.read_csv('/Users/yuchingchen/Documents/AI_CUP/data/39_Test_Dataset/test_info.csv')

test_info.shape

# test_info.head(5)

Unnamed: 0,unique_id,mode,cut_point
0,1968,9,[ 0 95 190 285 380 475 571 666 761 ...
1,1969,9,[ 0 99 198 297 396 495 594 693 792 ...
2,1970,9,[ 0 80 161 242 323 404 485 566 647 ...
3,1971,10,[ 0 70 141 212 283 353 424 495 566 ...
4,1972,9,[ 0 88 176 264 352 440 528 617 705 ...


## 資料特徵工程處理

In [7]:
import pandas as pd
import numpy as np
import os
import re
from scipy.stats import skew, kurtosis
from scipy.fft import fft

# === 設定測試資料路徑 ===
info_path = "/Users/yuchingchen/Documents/AI_CUP/data/39_Test_Dataset/test_info.csv"
data_dir = "/Users/yuchingchen/Documents/AI_CUP/data/39_Test_Dataset/test_data"
output_csv_path = "/Users/yuchingchen/Documents/AI_CUP/feature_engineering/test_features.csv"

# === 特徵擷取函數（每段 34 維）===
def extract_34_features(segment):
    features = []
    norm_acc = np.linalg.norm(segment[:, 0:3], axis=1)
    norm_gyro = np.linalg.norm(segment[:, 3:6], axis=1)

    for i in range(segment.shape[1]):
        axis = segment[:, i]
        features.extend([
            np.mean(axis),
            np.var(axis),
            np.sqrt(np.mean(axis ** 2)),
            skew(axis),
            kurtosis(axis)
        ])

    features.extend([
        np.max(norm_acc), np.min(norm_acc), np.mean(norm_acc),
        np.max(norm_gyro), np.min(norm_gyro), np.mean(norm_gyro)
    ])

    for group in [segment[:, 0:3], segment[:, 3:6]]:
        magnitude = np.linalg.norm(group, axis=1)
        fft_vals = np.abs(fft(magnitude))
        power = np.mean(fft_vals ** 2)
        entropy = -np.sum((fft_vals / np.sum(fft_vals)) * np.log2((fft_vals + 1e-12) / np.sum(fft_vals)))
        features.append(power)
        features.append(entropy)

    return features

# === 主流程開始 ===
info_df = pd.read_csv(info_path)
all_features = []
all_uids = []

for idx, row in info_df.iterrows():
    uid = row['unique_id']
    file_path = os.path.join(data_dir, f"{uid}.txt")

    try:
        data = np.loadtxt(file_path)
    except Exception as e:
        print(f"[跳過] 無法讀取 {file_path}: {e}")
        continue

    cut_str = str(row['cut_point'])
    cut_str = re.sub(r"[\[\]\n\r]", " ", cut_str)
    try:
        cut_points = list(map(int, cut_str.strip().split()))
    except:
        print(f"[跳過] UID={uid} 的 cut_point 轉換失敗：{cut_str}")
        continue

    if len(cut_points) != 28:
        print(f"[跳過] UID={uid} 的 cut_point 長度錯誤（{len(cut_points)}）：{cut_str}")
        continue

    feature_vec = []
    valid = True
    for i in range(27):
        start, end = cut_points[i], cut_points[i+1]
        if start >= end or end > data.shape[0]:
            print(f"[跳過] UID={uid} 的第 {i+1} 段資料範圍錯誤：{start} ~ {end}")
            valid = False
            break
        segment = data[start:end]
        feature_vec.extend(extract_34_features(segment))  # 共 27 段 × 34 維 = 918 維

    if not valid:
        continue

    # ➕ 插入 unique_id 欄位
    feature_vec.insert(0, uid)
    all_features.append(feature_vec)

# === 組合欄位名稱 ===
columns = ['unique_id']
for i in range(27):
    for axis in ['Ax', 'Ay', 'Az', 'Gx', 'Gy', 'Gz']:
        for stat in ['mean', 'var', 'rms', 'skew', 'kurt']:
            columns.append(f"seg{i+1}_{axis}_{stat}")
    columns += [f"seg{i+1}_acc_max", f"seg{i+1}_acc_min", f"seg{i+1}_acc_mean"]
    columns += [f"seg{i+1}_gyro_max", f"seg{i+1}_gyro_min", f"seg{i+1}_gyro_mean"]
    columns += [f"seg{i+1}_acc_power", f"seg{i+1}_acc_entropy"]
    columns += [f"seg{i+1}_gyro_power", f"seg{i+1}_gyro_entropy"]

# === 儲存成 CSV ===
df_test = pd.DataFrame(all_features, columns=columns)
df_test.to_csv(output_csv_path, index=False)
print(f"✅ 已輸出測試特徵至：{output_csv_path}，共 {len(df_test)} 筆")

  skew(axis),
  kurtosis(axis)


✅ 已輸出測試特徵至：/Users/yuchingchen/Documents/AI_CUP/feature_engineering/test_features.csv，共 1430 筆
