# Unit19｜Autoencoder（表示學習）做異常偵測

目標：用「只看正常資料」的方式學習多變數關聯，透過重建誤差做告警，並示範連續 N 次觸發（降低誤報）。

In [None]:
# ===== Shared environment (repo-root + outputs/) =====
from pathlib import Path
import os

# Part_4 本地化路徑設定（不依賴舊教材 Jupyter_Scripts）
NOTEBOOK_DIR = Path.cwd()
OUTPUT_DIR = NOTEBOOK_DIR / 'outputs'
OUTPUT_DIR.mkdir(exist_ok=True)

RESULTS_DIR = OUTPUT_DIR / 'P4_Unit19_Results'
RESULTS_DIR.mkdir(exist_ok=True)

print('Notebook 目錄:', NOTEBOOK_DIR)
print('輸出目錄:', OUTPUT_DIR)
print('結果目錄:', RESULTS_DIR)

In [None]:
from pathlib import Path
import os

# Part_4 本地化路徑設定（不依賴舊教材 Jupyter_Scripts）
NOTEBOOK_DIR = Path.cwd()
OUTPUT_DIR = NOTEBOOK_DIR / 'outputs'
OUTPUT_DIR.mkdir(exist_ok=True)

RESULTS_DIR = OUTPUT_DIR / 'P4_Unit19_Results'
RESULTS_DIR.mkdir(exist_ok=True)

print('Notebook 目錄:', NOTEBOOK_DIR)
print('輸出目錄:', OUTPUT_DIR)
print('結果目錄:', RESULTS_DIR)


In [None]:
# Train only on normal segment
feature_cols = [f'x{i}' for i in range(1,7)]
X_train = df.loc[df['label'] == 0, feature_cols].to_numpy()
X_all = df[feature_cols].to_numpy()

scaler = StandardScaler().fit(X_train)
Z_train = scaler.transform(X_train)
Z_all = scaler.transform(X_all)

# MSPC-style baseline: PCA reconstruction error
pca = PCA(n_components=2, random_state=42).fit(Z_train)
Z_hat_pca = pca.inverse_transform(pca.transform(Z_all))
err_pca = np.mean((Z_all - Z_hat_pca)**2, axis=1)

# "Autoencoder" via MLPRegressor (multi-output reconstruction)
ae = MLPRegressor(hidden_layer_sizes=(10, 2, 10), activation='relu',
                  random_state=42, max_iter=400, early_stopping=True)
ae.fit(Z_train, Z_train)
Z_hat_ae = ae.predict(Z_all)
err_ae = np.mean((Z_all - Z_hat_ae)**2, axis=1)

thr_pca = np.quantile(err_pca[df['label'].to_numpy() == 0], 0.99)
thr_ae = np.quantile(err_ae[df['label'].to_numpy() == 0], 0.99)
print('Thresholds (99% normal): PCA=', float(thr_pca), 'AE=', float(thr_ae))

out = df[['t','label']].copy()
out['err_pca'] = err_pca
out['err_ae'] = err_ae
out.to_csv('P4_Unit19_Results/01_reconstruction_errors.csv', index=False)
print('Saved: P4_Unit19_Results/01_reconstruction_errors.csv')

In [None]:
# Alarm design: consecutive-N trigger (reduce false positives)
N = 5
alarm_raw = (out['err_ae'] > thr_ae).astype(int)
alarm_consec = alarm_raw.rolling(N).sum().fillna(0).to_numpy() >= N
out['alarm_raw'] = alarm_raw
out['alarm_consecN'] = alarm_consec.astype(int)
out.to_csv('P4_Unit19_Results/02_alarm_table.csv', index=False)
print('Saved: P4_Unit19_Results/02_alarm_table.csv')

fig, ax = plt.subplots(1, 1, figsize=(10,4))
ax.plot(out['t'], out['err_ae'], lw=1, label='AE reconstruction error')
ax.axhline(thr_ae, color='r', ls='--', label='threshold (99% normal)')
ax.fill_between(out['t'], 0, out['label']*out['err_ae'].max(), color='orange', alpha=0.15, label='abnormal segments')
ax.set_title('Reconstruction error control chart + abnormal segments')
ax.set_xlabel('t')
ax.set_ylabel('error')
ax.legend(loc='upper right')
plt.tight_layout()
plt.savefig('P4_Unit19_Results/03_recon_error_chart.png', dpi=150)
print('Saved: P4_Unit19_Results/03_recon_error_chart.png')

fig, ax = plt.subplots(1, 1, figsize=(10,2.2))
ax.plot(out['t'], out['alarm_raw'], lw=1, label='raw alarm')
ax.plot(out['t'], out['alarm_consecN'], lw=2, label=f'consecutive-{N} alarm')
ax.set_title('Alarm logic: raw vs consecutive-N')
ax.set_ylim(-0.1, 1.1)
ax.legend(loc='upper right')
plt.tight_layout()
plt.savefig('P4_Unit19_Results/04_alarm_logic.png', dpi=150)
print('Saved: P4_Unit19_Results/04_alarm_logic.png')

## 誤差分解：哪個 tag 在拉高重建誤差？

現場排障時，除了告警曲線，你通常需要 top-K 變數清單（優先查哪支儀表/哪段機制）。

In [None]:
import numpy as np
import pandas as pd

# per-feature squared reconstruction error (AE)
err_mat = (Z_all - Z_hat_ae) ** 2
df_err = pd.DataFrame(err_mat, columns=[f'e_{c}' for c in feature_cols])
df_err['t'] = df['t'].to_numpy()
df_err['label'] = df['label'].to_numpy()
df_err.to_csv('P4_Unit19_Results/05_per_feature_errors.csv', index=False)
print('Saved: P4_Unit19_Results/05_per_feature_errors.csv')

# top-K tags during alarm periods
K = 3
top_tags = []
alarm_idx = np.where(out['alarm_consecN'].to_numpy() == 1)[0]
for idx in alarm_idx[:200]:  # limit
    row = df_err.iloc[idx].drop(['t','label'])
    tags = row.sort_values(ascending=False).head(K).index.tolist()
    top_tags.append({'t': int(df_err.iloc[idx]['t']), 'top_tags': ','.join(tags)})
df_top = pd.DataFrame(top_tags)
df_top.to_csv('P4_Unit19_Results/06_alarm_top_tags.csv', index=False)
print('Saved: P4_Unit19_Results/06_alarm_top_tags.csv')


## 交付物提示（SOP）

1. 告警是否集中在「漂移」還是「關聯破壞」？
2. 你的連續 N（抑制策略）怎麼選？（誤報成本 vs 反應速度）
3. 告警後的處置：先排除感測器問題，再判斷是否降載/切換/停機檢查。