In [None]:
# 02_Feature_Engineering.ipynb
import os
import numpy as np
import librosa
from tqdm import tqdm

# --- CONFIGURATION (UPDATED) ---
BASE_DATA_DIR = "../data/raw" 
OUTPUT_DIR = "../data/processed"
SAMPLE_RATE = 16000
DURATION = 10
N_MELS = 64
HOP_LENGTH = 512

# Create output folder if it doesn't exist
if not os.path.exists(OUTPUT_DIR): 
    os.makedirs(OUTPUT_DIR)

def process_folder(subfolder_name, label):
    features = []
    # Path becomes: data/raw/normal
    folder_path = os.path.join(BASE_DATA_DIR, subfolder_name)
    
    if not os.path.exists(folder_path):
        print(f" Error: {folder_path} does not exist. Skipping.")
        return np.array([])

    files = [f for f in os.listdir(folder_path) if f.endswith('.wav')]
    print(f"Processing {label} data from {folder_path} ({len(files)} files)...")
    
    for f in tqdm(files):
        try:
            path = os.path.join(folder_path, f)
            y, _ = librosa.load(path, sr=SAMPLE_RATE, duration=DURATION)
            
            # Pad to ensure exact length (Crucial for matrix shape)
            expected_len = SAMPLE_RATE * DURATION
            if len(y) < expected_len:
                y = np.pad(y, (0, expected_len - len(y)))
            else:
                y = y[:expected_len]
            
            # Create Spectrogram
            mel = librosa.feature.melspectrogram(y=y, sr=SAMPLE_RATE, n_mels=N_MELS, hop_length=HOP_LENGTH)
            mel_db = librosa.power_to_db(mel, ref=np.max)
            
            # Transpose to (Time, Freq) for RNNs: (313, 64)
            features.append(mel_db.T) 
        except Exception as e:
            print(f"Skipping corrupt file {f}: {e}")
            pass
            
    return np.array(features)

# 1. Process
# Note: We pass the subfolder names "normal" and "abnormal"
X_normal = process_folder("normal", "Normal")
X_abnormal = process_folder("abnormal", "Abnormal")

# 2. Save to Disk
# Only save if we actually found data
if len(X_normal) > 0 and len(X_abnormal) > 0:
    print("üíæ Saving processed arrays to disk...")
    np.save(os.path.join(OUTPUT_DIR, "X_normal.npy"), X_normal)
    np.save(os.path.join(OUTPUT_DIR, "X_abnormal.npy"), X_abnormal)
    print(f" Success! Saved to {OUTPUT_DIR}/")
else:
    print(" Error: No data processed. Check your paths again.")

‚öôÔ∏è Processing Normal data from ../data/raw\normal (1005 files)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1005/1005 [01:20<00:00, 12.51it/s]


‚öôÔ∏è Processing Abnormal data from ../data/raw\abnormal (146 files)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 146/146 [00:11<00:00, 12.97it/s]


üíæ Saving processed arrays to disk...
‚úÖ Success! Saved to ../data/processed/
