<a href="https://colab.research.google.com/github/hamagami/pycaret/blob/main/mfccanom.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install git+https://github.com/pycaret/pycaret.git@master --upgrade

In [None]:
!pip install librosa


In [None]:
# 1. ライブラリのインストール


import os
import scipy.io
import numpy as np
import pandas as pd
import librosa
import librosa.display
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import urllib.request
from pycaret.anomaly import *

# --- 2. データの取得・生成関数の定義 ---
def generate_vibration_data():
    urls = {
        "normal": "http://csegroups.case.edu/sites/default/files/bearingdata/files/97.mat",
        "fault": "http://csegroups.case.edu/sites/default/files/bearingdata/files/130.mat"
    }
    sr = 12000
    try:
        data_list = []
        for label, url in urls.items():
            filename = f"{label}.mat"
            opener = urllib.request.build_opener(); opener.addheaders = [('User-agent', 'Mozilla/5.0')]
            urllib.request.install_opener(opener); urllib.request.urlretrieve(url, filename)
            mat = scipy.io.loadmat(filename)
            key = [k for k in mat.keys() if "DE_time" in k][0]
            data_list.append(mat[key].flatten()[:24000])
        y = np.concatenate(data_list)
        print("Successfully loaded CWRU data.")
    except:
        print("Using simulated data.")
        t = np.linspace(0, 4, 48000)
        y = np.sin(2 * np.pi * 30 * t) + np.random.normal(0, 0.1, 48000)
        y[24000:] += np.random.normal(0, 0.5, 24000)
    return y, sr

# --- 3. メイン処理 ---
y, sr = generate_vibration_data()

# フレーム分析の設定
n_mfcc = 13
frame_length = 2048
hop_length = 512

# MFCC抽出
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc, n_fft=frame_length, hop_length=hop_length)
df_full = pd.DataFrame(mfccs.T, columns=[f'mfcc_{i}' for i in range(n_mfcc)])

# --- 特徴量準備 (0次除外) ---
df_raw = df_full.drop(columns=['mfcc_0'])
df_smooth = df_raw.rolling(window=5, center=True).mean().fillna(method='bfill').fillna(method='ffill')

# 異常検知 (PyCaret: マハラノビス用)
frames_in_1s = int(sr / hop_length)

# A. Rawデータの分析
s_raw = setup(data=df_raw.iloc[:frames_in_1s, :], session_id=123, normalize=True, verbose=False)
mcd_raw = create_model('mcd')
res_raw = predict_model(mcd_raw, data=df_raw)
dist_raw_eucl = np.sqrt(((df_raw - df_raw.iloc[:frames_in_1s].mean())**2).sum(axis=1))

# B. Smoothデータの分析
s_smooth = setup(data=df_smooth.iloc[:frames_in_1s, :], session_id=123, normalize=True, verbose=False)
mcd_smooth = create_model('mcd')
res_smooth = predict_model(mcd_smooth, data=df_smooth)
dist_smooth_eucl = np.sqrt(((df_smooth - df_smooth.iloc[:frames_in_1s].mean())**2).sum(axis=1))

# --- 4. 可視化 (ユークリッド距離の比較) ---
times = librosa.frames_to_time(np.arange(len(res_raw)), sr=sr, hop_length=hop_length)
fig, axes = plt.subplots(4, 1, figsize=(16, 26), sharex=True)

# (1) 波形
axes[0].plot(np.linspace(0, len(y)/sr, len(y)), y, color='gray', alpha=0.3)
axes[0].axvspan(0, 1, color='green', alpha=0.05, label='Training')
axes[0].set_title("1. Original Vibration Waveform")

# (2) 全次数の偏差 (Smooth)
for col in df_smooth.columns:
    axes[1].plot(times, df_smooth[col] - df_smooth[col].iloc[:frames_in_1s].mean(), alpha=0.7)
axes[1].set_title("2. Smoothed MFCC Deviations (Orders 1-12)")

# (3) ユークリッド距離の比較 (Raw vs Smooth)
axes[2].plot(times, dist_raw_eucl, color='gray', alpha=0.5, label='Euclidean (Raw)')
axes[2].plot(times, dist_smooth_eucl, color='orange', linewidth=2, label='Euclidean (Smoothed)')
axes[2].set_title("3. Euclidean Distance: Raw vs Smoothed (Direct Change)")
axes[2].legend()

# (4) マハラノビス距離の比較 (Raw vs Smooth)
axes[3].plot(times, res_raw['Anomaly_Score'], color='gray', alpha=0.5, label='Mahalanobis (Raw)')
axes[3].plot(times, res_smooth['Anomaly_Score'], color='red', linewidth=2, label='Mahalanobis (Smoothed)')
axes[3].set_title("4. Mahalanobis Distance: Raw vs Smoothed (Statistical Change)")
axes[3].set_xlabel("Time (s)")
axes[3].legend()

plt.tight_layout()
plt.show()