### EDA Playground

- GUI 환경에서 dataset classes 선택시 해당하는 class별로 볼 수 있음
- anormaly - normal pair data를 찾아서 묶어준다음 idx별로 볼 수 있게 만듬.
- 스펙트로그램을 볼떄 n_fft 사이즈를 조절가능함. html로 따지면 range와 number input으로 조작가능
- 소리를 들을 수 있도록 재생 버튼도 있어야함
- 

In [None]:
import numpy as np
import librosa
import pyfftw

# pyfftw 설정
pyfftw.config.NUM_THREADS = 4  # 사용하고자 하는 스레드 수로 설정하세요.
pyfftw.interfaces.cache.enable()  # 캐시 활성화로 성능 향상

# librosa에 pyfftw 설정
librosa.set_fftlib(pyfftw.interfaces.numpy_fft)

# 오디오 로드
y, sr = librosa.load('audio_file.wav')

# STFT 계산
D = librosa.stft(y, n_fft=2048, hop_length=512)

# 결과 확인
print(D.shape)


In [None]:
import os
import random
import pandas as pd
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import Audio, display, HTML
import warnings
import ipywidgets as widgets
from ipywidgets import interact, fixed
from skimage.metrics import structural_similarity as ssim

# Suppress warnings from librosa (optional)
warnings.filterwarnings('ignore')

# -----------------------------
# Configuration Parameters
# -----------------------------
DATASETS_DIR = "../../../datasets/dev"  # Path to the datasets directory
CLASS_NAMES = [name for name in os.listdir(DATASETS_DIR) if os.path.isdir(os.path.join(DATASETS_DIR, name))]
N_FFT = 160

# -----------------------------
# Step 1: Load Dataset
# -----------------------------
def load_dataset(attributes_file, datasets_dir, class_name):
    if not os.path.isfile(attributes_file):
        raise FileNotFoundError(f"Attributes file not found: {attributes_file}")

    df = pd.read_csv(attributes_file)
    filenames = df['file_name'].tolist()
    labels = ['anomaly' if 'anomaly' in name.lower() else 'normal' for name in filenames]
    
    # 파일 경로를 생성할 때 datasets_dir, class_name, 파일명을 합침
    file_paths = [os.path.join(datasets_dir, f) for f in filenames]

    return file_paths, labels

# -----------------------------
# Step 2: Compute Spectrogram
# -----------------------------
def compute_spectrogram(y, n_fft, hop_length):
    S = librosa.stft(y, n_fft=n_fft, hop_length=hop_length)
    S_mag = np.abs(S)
    return S_mag

# -----------------------------
# Step 3: Find Corresponding Normal File for Each Anomaly Based on Exact Code Name and File Number
# -----------------------------
def find_matching_normal_file(anomaly_path, normal_paths):
    anomaly_filename = os.path.basename(anomaly_path)
    anomaly_parts = anomaly_filename.split("_")
    anomaly_number = anomaly_parts[5]
    anomaly_code = anomaly_parts[7]

    for normal_path in normal_paths:
        normal_filename = os.path.basename(normal_path)
        normal_parts = normal_filename.split("_")
        normal_number = normal_parts[5]
        normal_code = normal_parts[7]

        if anomaly_number == normal_number and anomaly_code == normal_code:
            return normal_path
    return None

# -----------------------------
# Step 4: Find Most Similar Normal for Anomaly Using SSIM
# -----------------------------
def find_most_similar_normal_ssim(anomaly_spectrogram, normal_spectrograms):
    max_ssim = -1
    most_similar_normal = None

    for normal_spectrogram, path in normal_spectrograms:
        score, _ = ssim(anomaly_spectrogram, normal_spectrogram, full=True, data_range=anomaly_spectrogram.max())
        if score > max_ssim:
            max_ssim = score
            most_similar_normal = path

    return most_similar_normal, max_ssim

# -----------------------------
# Step 5: Plot and Play Normal and Matching Anomaly Spectrograms
# -----------------------------
def plot_and_play_normal_anomaly_pair(anom_path, norm_path, n_fft, hop_length):
    y_anomaly, sr_anomaly = librosa.load(anom_path, sr=None)
    y_normal, sr_normal = librosa.load(norm_path, sr=None)

    S_anomaly = compute_spectrogram(y_anomaly, n_fft, hop_length)
    S_normal = compute_spectrogram(y_normal, n_fft, hop_length)

    plt.figure(figsize=(30, 6))

    plt.subplot(1, 2, 1)
    S_db_anomaly = librosa.amplitude_to_db(S_anomaly, ref=np.max)
    librosa.display.specshow(S_db_anomaly, hop_length=hop_length, x_axis='time', y_axis='linear')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Anomaly Spectrogram')

    plt.subplot(1, 2, 2)
    S_db_normal = librosa.amplitude_to_db(S_normal, ref=np.max)
    librosa.display.specshow(S_db_normal, hop_length=hop_length, x_axis='time', y_axis='linear')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Normal Spectrogram')

    plt.tight_layout()
    plt.show()

    # Display audio players horizontally using HTML
    anomaly_audio_html = Audio(y_anomaly, rate=sr_anomaly)._repr_html_()
    normal_audio_html = Audio(y_normal, rate=sr_normal)._repr_html_()
    
    combined_html = f"""
    <div style="display: flex; gap: 20px;">
        <div>
            <p>Anomaly Audio:</p>
            {anomaly_audio_html}
        </div>
        <div>
            <p>Normal Audio:</p>
            {normal_audio_html}
        </div>
    </div>
    """
    display(HTML(combined_html))



# -----------------------------
# Interactive Widgets and Main Execution Flow
# -----------------------------
def interactive_visualization():
    class_dropdown = widgets.Dropdown(
        options=CLASS_NAMES,
        value=CLASS_NAMES[0],
        description='Class:',
        disabled=False,
    )

    n_fft_slider = widgets.IntSlider(
        value=N_FFT,
        min=32,
        max=512,
        step=2,
        description='n_fft:',
        continuous_update=False
    )

    hop_length_ratio_dropdown = widgets.Dropdown(
        options=[1/2, 1/3, 1/4],
        value=1/2,
        description='Hop Ratio:',
        disabled=False
    )

    index_input = widgets.IntText(
        value=0,
        description='Index:',
        continuous_update=True,
        disabled=False
    )

    def update_index_slider(*args):
        class_name = class_dropdown.value
        attributes_file = os.path.join(DATASETS_DIR, class_name, "attributes_00.csv")
        datasets_dir = DATASETS_DIR
        try:
            file_paths, labels = load_dataset(attributes_file, datasets_dir, class_name)
            anomaly_paths = [path for path, label in zip(file_paths, labels) if label == 'anomaly']
            index_input.max = len(anomaly_paths) - 1 if len(anomaly_paths) > 0 else 0
        except FileNotFoundError:
            index_input.max = 0

    class_dropdown.observe(update_index_slider, names='value')
    update_index_slider()

    def visualize(class_name, n_fft, hop_ratio, pair_index):
        hop_length = int(n_fft * hop_ratio)
        attributes_file = os.path.join(DATASETS_DIR, class_name, "attributes_00.csv")
        datasets_dir = DATASETS_DIR

        try:
            file_paths, labels = load_dataset(attributes_file, datasets_dir, class_name)
        except FileNotFoundError as e:
            print(e)
            return

        normal_paths = [path for path, label in zip(file_paths, labels) if label == 'normal']
        anomaly_paths = [path for path, label in zip(file_paths, labels) if label == 'anomaly']

        if pair_index >= len(anomaly_paths):
            print("Index out of range.")
            return

        anomaly_path = anomaly_paths[pair_index]
        print(f"Current anomaly file: {anomaly_path}")
        matching_normal_path = find_matching_normal_file(anomaly_path, normal_paths)

        if not matching_normal_path:
            print(f"No direct match found for anomaly file: {anomaly_path}")
            normal_spectrograms = [(compute_spectrogram(librosa.load(path, sr=None)[0], n_fft, hop_length), path) for path in normal_paths]
            y_anomaly, _ = librosa.load(anomaly_path, sr=None)
            S_anomaly = compute_spectrogram(y_anomaly, n_fft, hop_length)
            matching_normal_path, max_ssim = find_most_similar_normal_ssim(S_anomaly, normal_spectrograms)
            print(f"Most similar normal file: {matching_normal_path} (SSIM: {max_ssim})")
        else:
            print(f"Matching normal file: {matching_normal_path}")

        plot_and_play_normal_anomaly_pair(anomaly_path, matching_normal_path, n_fft, hop_length)

    interact(
        visualize,
        class_name=class_dropdown,
        n_fft=n_fft_slider,
        hop_ratio=hop_length_ratio_dropdown,
        pair_index=index_input
    )

if __name__ == "__main__":
    interactive_visualization()

interactive(children=(Dropdown(description='Class:', options=('ToyCar', 'gearbox', 'valve', 'bearing', 'slider…