## 查看資料

In [2]:
# 讀入 test_info.csv
import pandas as pd

test_info = pd.read_csv('/Users/yuchingchen/Documents/AI_CUP/data/39_Test_Dataset/test_info.csv')

test_info.shape

# test_info.head(5)

Unnamed: 0,unique_id,mode,cut_point
0,1968,9,[ 0 95 190 285 380 475 571 666 761 ...
1,1969,9,[ 0 99 198 297 396 495 594 693 792 ...
2,1970,9,[ 0 80 161 242 323 404 485 566 647 ...
3,1971,10,[ 0 70 141 212 283 353 424 495 566 ...
4,1972,9,[ 0 88 176 264 352 440 528 617 705 ...


## 資料特徵工程處理

In [3]:
import os
import numpy as np
import pandas as pd
from scipy.fft import fft
from scipy.stats import skew, kurtosis

# === 0. 路徑 ===
TEST_INFO_CSV = "/Users/yuchingchen/Documents/AI_CUP/data/39_Test_Dataset/test_info.csv"
TEST_DATA_DIR = "/Users/yuchingchen/Documents/AI_CUP/data/39_Test_Dataset/test_data"
OUTPUT_PATH   = "/Users/yuchingchen/Documents/AI_CUP/feature_engineering/test_features.csv"

# === 1. 讀入 test_info.csv ===
test_info = pd.read_csv(TEST_INFO_CSV, dtype={"unique_id": str})

# === 2. 定義和 train 完全相同的 34 維特徵擷取函式 ===
def extract_34_features(segment: np.ndarray) -> list:
    feats = []
    # (1) 各軸 mean, var, rms
    for i in range(6):
        x = segment[:, i]
        feats += [x.mean(), x.var(), np.sqrt((x**2).mean())]

    # (2) acc/gyro max, min, mean
    acc  = np.linalg.norm(segment[:, :3], axis=1)
    gyro = np.linalg.norm(segment[:, 3:], axis=1)
    for arr in (acc, gyro):
        feats += [arr.max(), arr.min(), arr.mean()]

    # (3) acc/gyro 的 kurtosis, skewness, power, spectral entropy
    def spec_stats(arr):
        Xf = np.abs(fft(arr))**2
        psd = Xf / (Xf.sum() + 1e-12)
        power   = Xf.mean()
        entropy = -(psd * np.log(psd + 1e-12)).sum()
        return kurtosis(arr), skew(arr), power, entropy

    for arr in (acc, gyro):
        feats += list(spec_stats(arr))

    # (4) 各軸的 kurtosis 平均、skewness 平均（若序列常數則直接給 0）  
    k_list = []
    s_list = []
    for i in range(6):
        arr = segment[:, i]
        # 如果整段都是相同值，直接填 0
        if np.allclose(arr, arr[0], atol=1e-8):
            k_list.append(0.0)
            s_list.append(0.0)
        else:
            k_list.append(kurtosis(arr))
            s_list.append(skew(arr))
    feats += [np.mean(k_list), np.mean(s_list)]

    return feats

# === 3. 對所有測試檔案擷取特徵並攤平成一列 ===
rows = []
uids = []
for _, row in test_info.iterrows():
    uid  = row.unique_id
    # parse cut_point
    cuts = list(map(int, str(row.cut_point).strip("[]").split()))
    data = np.loadtxt(os.path.join(TEST_DATA_DIR, f"{uid}.txt"))

    feat_row = []
    for i in range(27):
        s, e = cuts[i], cuts[i+1]
        segment = data[s:e, :]
        feat_row += extract_34_features(segment)

    rows.append(feat_row)
    uids.append(uid)

# === 4. 建立 DataFrame 並輸出 CSV ===
# 欄位自動命名 f0...f(n-1)
n_feats = len(rows[0])
col_names = [f"f{i}" for i in range(n_feats)]

df_feats = pd.DataFrame(rows, columns=col_names)
df_feats.insert(0, "unique_id", uids)

os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)
df_feats.to_csv(OUTPUT_PATH, index=False)
print(f"✅ 已輸出測試集特徵：{OUTPUT_PATH}，shape = {df_feats.shape}")

✅ 已輸出測試集特徵：/Users/yuchingchen/Documents/AI_CUP/feature_engineering/test_features.csv，shape = (1430, 919)
