In [3]:
import os
import glob
import numpy as np
import random
from scipy.signal import butter, sosfilt, resample_poly, stft
import soundfile as sf
from tqdm import tqdm
import librosa
# ————————————
# Your generalized processing function:
def process_audio_general(
    path,
    target_rates=[1000, 2000, 4000, 8000, 16000, 22000],
    high_freq_threshold=11000,
    significance_threshold=1e-5
):
    data, fs = sf.read(path)
    if data.ndim > 1:
        data = data[:, 0]
    chunk_size = fs
    n_chunks = len(data) // chunk_size
    raw_resampled   = {sr: [] for sr in target_rates}
    filtered_resampled = {sr: [] for sr in target_rates}
    power = []
    rat = []
    for i in range(n_chunks):
        chunk = data[i*chunk_size:(i+1)*chunk_size]
        # power spectral density
        psd = np.abs(np.fft.rfft(chunk))**2
        freqs = np.fft.rfftfreq(len(chunk), d=1/fs)
        total_power = psd.sum()
        mean_square_power = psd.sum() / (len(chunk)**2)
        # print("MSP", mean_square_power)
        if mean_square_power <= significance_threshold:
            continue
        high_power  = psd[freqs > high_freq_threshold].sum() / (len(chunk)**2)
        if high_power / mean_square_power < significance_threshold:
            continue
        # print("HTR", high_power / mean_square_power)
        for sr in target_rates:
            # raw resample → “unfiltered”
            raw = resample_poly(chunk, sr, fs)
            raw_resampled[sr].append(raw)

            # filter @ Nyquist(sr), then resample → “filtered”
            cutoff = sr / 2
            sos = butter(6, cutoff, fs=fs, btype='low', output='sos')
            filtered = sosfilt(sos, chunk)
            filt_rs = resample_poly(filtered, sr, fs)
            filtered_resampled[sr].append(filt_rs)

    return raw_resampled, filtered_resampled, fs

# ————————————
# STFT helper (fixed n_fft & 50% overlap):
def generate_stft(chunk, fs, n_fft=512):
    f, t, Zxx = stft(chunk, fs=fs, nperseg=n_fft, noverlap=n_fft//2)
    return np.abs(Zxx)

# ————————————
# 1) Gather all files + class IDs
def collect_file_list():
    files = [[], [], []]
    # UrbanSound8K
    base1 = r"D:\Aliasing3\UrbanSound8K\audio"
    for fold in tqdm(range(1, 11), desc="u8k"):
        for path in glob.glob(os.path.join(base1, f"fold{fold}", "*.wav")):
            cls = os.path.basename(path).split('-')[1]  # e.g. “7061-6-0-0.wav” → class “6”
            files[0].append({'path': path, 'class': cls})
    # ESC‑50
    base2 = r"D:\Aliasing3\ESC-50-master\audio"
    for path in tqdm(glob.glob(os.path.join(base2, "*.wav")), desc="esc"):
        name = os.path.basename(path).rsplit('.', 1)[0].split('-')
        cls = name[-1]  # “1-137-A-32.wav” → class “32”
        files[1].append({'path': path, 'class': cls})
    # Zenodo
    base3 = r"D:\Aliasing3\Zenodo\audio_train"
    for path in tqdm(glob.glob(os.path.join(base3, "*.flac")), desc="zen"):
        files[2].append({'path': path, 'class': '0'})  # no class → “0”
    return [files[0], files[1], files[2]]

# ————————————
# 2) First pass → global min/max of every STFT bin
def find_global_bounds(file_list):
    lo, hi = np.inf, -np.inf
    for entry in tqdm(file_list, desc="Global Bounds"):
        raw, filt, fs = process_audio_general(entry['path'])
        for d in (raw, filt):
            for sr, chunks in d.items():
                for chunk in chunks:
                    mag = generate_stft(chunk, fs)
                    lo = min(lo, mag.min())
                    hi = max(hi, mag.max())
    return lo, hi

# ————————————
# 3) Second pass → normalize & save .npy, then split
def build_and_save_dataset(file_list, lo, hi, out_root,
                           train_frac=0.8, val_frac=0.1, seed=42):
    os.makedirs(out_root, exist_ok=True)
    all_samples = []

    # --- collect with 'sr' in metadata ---
    for entry in tqdm(file_list, desc="Processing Samples"):
        raw, filt, fs = process_audio_general(entry['path'])
        for label, d in [('unfiltered', raw), ('filtered', filt)]:
            for sr, chunks in d.items():
                for chunk in chunks:
                    mag = generate_stft(chunk, sr)   # use sr as fs here!
                    norm = (mag - lo) / (hi - lo)
                    all_samples.append({
                        'array': norm.astype(np.float32),
                        'label': label,
                        'class': entry['class'],
                        'sr': sr
                    })

    # shuffle & split
    random.seed(seed)
    random.shuffle(all_samples)
    N = len(all_samples)
    n_train = int(train_frac * N)
    n_val   = int(val_frac  * N)

    splits = {
        'train':      all_samples[            :n_train],
        'validation': all_samples[n_train    :n_train+n_val],
        'test':       all_samples[n_train+n_val:]
    }

    # save out
    counters = {}
    for sp, samples in splits.items():
        for s in tqdm(samples, desc="Saving Files"):
            sr = s['sr']
            lbl = s['label']
            # initialize counter for this combination
            counters.setdefault((sr, sp, lbl), 0)
            counters[(sr, sp, lbl)] += 1

            idx = counters[(sr, sp, lbl)]
            fname = f"{idx}-{s['class']}.npy"

            # new path: out_root/{sr}/{split}/{label}
            save_dir = os.path.join(out_root, str(sr), sp, lbl)
            os.makedirs(save_dir, exist_ok=True)

            full_path = os.path.join(save_dir, fname)
            np.save(full_path, s['array'])


def create_mfcc_dataset(
    file_list,
    out_root,
    target_rates=[1000, 2000, 4000, 8000, 16000, 22000],
    n_mfcc=20,
    train_frac=0.8,
    val_frac=0.1,
    seed=42,
    high_freq_threshold=11000,
    significance_threshold=1e-5
):
    """
    Build a MFCC-based dataset from processed chunks, mirroring the STFT pipeline:
      - Uses process_audio_general to generate 'unfiltered' & 'filtered' chunks at each sr.
      - Computes MFCCs (n_mfcc coefficients) on each chunk (resampled at its sr).
      - Flattens each MFCC matrix to a 1D feature vector.
      - Shuffles and splits into train/validation/test (80/10/10 by default).
      - Normalizes features with z-score (mean/std) computed on TRAIN only.
      - Saves each flattened, normalized feature vector as .npy under:

        out_root/{sr}/{split}/{filtered|unfiltered}/<idx>-<class>.npy

      - Also saves normalization stats to out_root/mfcc_norm_stats.npz
    """
    os.makedirs(out_root, exist_ok=True)
    samples = []

    # 1) Gather all MFCC features
    for entry in tqdm(file_list, desc="Generating MFCC features"):
        raw_dict, filt_dict, _ = process_audio_general(
            entry['path'],
            target_rates=target_rates,
            high_freq_threshold=high_freq_threshold,
            significance_threshold=significance_threshold
        )
        for label, d in [('unfiltered', raw_dict), ('filtered', filt_dict)]:
            for sr, chunks in d.items():
                for chunk in chunks:
                    # compute MFCC on resampled chunk
                    mfcc = librosa.feature.mfcc(
                        y=chunk,
                        sr=sr,
                        n_mfcc=n_mfcc,
                        n_fft=sr//2,
                        hop_length=sr//4
                    )
                    feat = mfcc.flatten()
                    samples.append({
                        'feat': feat.astype(np.float32),
                        'label': label,
                        'class': entry['class'],
                        'sr': sr
                    })

    # 2) Shuffle & split
    random.seed(seed)
    random.shuffle(samples)
    N = len(samples)
    n_train = int(train_frac * N)
    n_val   = int(val_frac   * N)

    splits = {
        'train':      samples[:n_train],
        'validation': samples[n_train:n_train+n_val],
        'test':       samples[n_train+n_val:]
    }

    # 3) Compute normalization on TRAIN only
    feats_train = np.vstack([s['feat'] for s in splits['train']])
    mean = feats_train.mean(axis=0)
    std  = feats_train.std(axis=0) + 1e-8
    np.savez(os.path.join(out_root, 'mfcc_norm_stats.npz'), mean=mean, std=std)

    # 4) Save normalized features per split/sr/label
    counters = {}
    for split_name, split_samples in splits.items():
        for s in tqdm(split_samples, desc=f"Saving {split_name}"):
            sr    = s['sr']
            label = s['label']
            cls   = s['class']
            key = (sr, split_name, label)
            counters.setdefault(key, 0)
            counters[key] += 1
            idx = counters[key]

            # normalize
            norm_feat = (s['feat'] - mean) / std

            # path: out_root/sr/split/label
            save_dir = os.path.join(out_root, str(sr), split_name, label)
            os.makedirs(save_dir, exist_ok=True)
            fname = f"{idx}-{cls}.npy"
            np.save(os.path.join(save_dir, fname), norm_feat)

    print(f"MFCC dataset saved under {out_root}")
# ————————————
if __name__ == "__main__":
    file_list = collect_file_list()
    # lo1, hi1     = find_global_bounds(file_list[0])
    # lo2, hi2     = find_global_bounds(file_list[1])
    lo3, hi3     = find_global_bounds(file_list[2])
    # build_and_save_dataset(
    #     file_list[0],
    #     lo1, hi1,
    #     out_root=r"D:\Aliasing3\Processed_Files\DS_U8K"
    # )
    # build_and_save_dataset(
    #     file_list[1],
    #     lo2, hi2,
    #     out_root=r"D:\Aliasing3\Processed_Files\DS_ESC"
    # )
    build_and_save_dataset(
        file_list[2],
        lo3, hi3,
        out_root=r"D:\Aliasing3\Processed_Files\DS_ZEN"
    )
    # lists = collect_file_list()
    # out_roots = [
    #     # r"D:\Aliasing3\Processed_Files\DS_U8K_MFCC",
    #     # r"D:\Aliasing3\Processed_Files\DS_ESC_MFCC",
    #     r"D:\Aliasing3\Processed_Files\DS_ZEN_MFCC",
    # ]
    # for flist, root in zip(lists, out_roots):
    #     create_mfcc_dataset(flist, root)
    # print("Done! 🎉  Your train/validation/test splits are in Processed_Files.")

# **Key points:**
# - **Two‑pass normalization** ensures every `.npy` STFT uses the same min/max scale.
# - **80/10/10 split**, shuffled with a fixed seed for reproducibility.

u8k: 100%|█████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 81.48it/s]
esc: 100%|█████████████████████████████████████████████████████████████████████| 2000/2000 [00:00<00:00, 132368.80it/s]
zen: 100%|█████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<?, ?it/s]
Global Bounds: 100%|███████████████████████████████████████████████████████████████████| 24/24 [09:03<00:00, 22.66s/it]
Processing Samples: 100%|██████████████████████████████████████████████████████████████| 24/24 [14:07<00:00, 35.32s/it]
Saving Files: 100%|█████████████████████████████████████████████████████████████| 53385/53385 [02:16<00:00, 391.06it/s]
Saving Files: 100%|███████████████████████████████████████████████████████████████| 6673/6673 [00:14<00:00, 460.98it/s]
Saving Files: 100%|███████████████████████████████████████████████████████████████| 6674/6674 [00:18<00:00, 351.77it/s]


In [4]:
import os
import glob
import numpy as np
import random
import pickle
import csv
from datetime import datetime
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, f1_score
from sklearn.model_selection import GridSearchCV
from tqdm import tqdm

# Configuration
SAMPLE_RATES = [1000, 2000, 4000, 8000, 16000, 22000]

# Dataset roots for STFT and MFCC pipelines
DATASETS = {
    'STFT': [
        # r"D:\Aliasing3\Processed_Files\DS_ESC",
        r"D:\Aliasing3\Processed_Files\DS_U8K",
        r"D:\Aliasing3\Processed_Files\DS_ZEN"
    ],
    # 'MFCC': [
    #     r"D:\Aliasing3\Processed_Files\DS_ESC_MFCC",
    #     r"D:\Aliasing3\Processed_Files\DS_U8K_MFCC",
    #     r"D:\Aliasing3\Processed_Files\DS_ZEN_MFCC"
    # ]
}

# Collect results for CSV output
results = []

def load_data(root_dir, sr, split):
    """
    Load flattened feature .npy files for a given sample rate and split.
    Handles both STFT and MFCC datasets (features already flattened).
    Assumes structure: root_dir/{sr}/{split}/{filtered|unfiltered}/*.npy
    Returns X (n_samples, n_features), y (n_samples,)
    """
    X, y = [], []
    for label, cls in [('unfiltered', 0), ('filtered', 1)]:
        folder = os.path.join(root_dir, str(sr), split, label)
        if not os.path.isdir(folder):
            continue
        files = glob.glob(os.path.join(folder, '*.npy'))
        for fpath in tqdm(files, desc=f"Loading {split} {label} {sr}Hz"):
            arr = np.load(fpath)
            vec = arr.flatten()
            X.append(vec)
            y.append(cls)
    if not X:
        return np.empty((0, 0)), np.empty((0,))
    return np.vstack(X), np.array(y)


def balance_data(X, y, seed=42):
    """Down-sample majority class to match minority for balanced classes."""
    np.random.seed(seed)
    classes, counts = np.unique(y, return_counts=True)
    min_count = counts.min()
    idxs = []
    for cls in classes:
        cls_idxs = np.where(y == cls)[0]
        selected = np.random.choice(cls_idxs, min_count, replace=False)
        idxs.extend(selected)
    random.shuffle(idxs)
    return X[idxs], y[idxs]


def train_and_validate_for_rate(root_dir, sr, feature_type):
    print(f"\n=== {feature_type} | Root: {root_dir} | {sr}Hz ===")
    models_dir = os.path.join(root_dir, 'models')
    os.makedirs(models_dir, exist_ok=True)

    # Check for MFCC normalization stats
    norm_path = os.path.join(root_dir, 'mfcc_norm_stats.npz')
    have_norm = os.path.isfile(norm_path)
    if have_norm:
        stats = np.load(norm_path)
        mean_vec = stats['mean']
        std_vec  = stats['std']

    # Load and preprocess training data
    X_train, y_train = load_data(root_dir, sr, 'train')
    if X_train.size == 0:
        print(f"No training data at {sr}Hz in {root_dir}; skipping.")
        return
    if have_norm:
        X_train = (X_train - mean_vec) / std_vec
    X_train, y_train = balance_data(X_train, y_train)

    # Load and preprocess validation data
    X_val, y_val = load_data(root_dir, sr, 'validation')
    if X_val.size == 0:
        print(f"No validation data at {sr}Hz in {root_dir}; skipping.")
        return
    if have_norm:
        X_val = (X_val - mean_vec) / std_vec
    X_val, y_val = balance_data(X_val, y_val)

    # Base classifiers
    xgb_base = XGBClassifier(
        n_estimators=500,
        use_label_encoder=False,
        eval_metric='logloss',
        random_state=42
    )
    rf_base = AdaBoostClassifier(
        estimator=RandomForestClassifier(n_estimators=10, random_state=42),
        n_estimators=5,
        random_state=42
    )
    dt_base = AdaBoostClassifier(
        estimator=DecisionTreeClassifier(),
        n_estimators=5,
        random_state=42
    )

    # Grid search only for XGBoost
    print("Running grid search for XGBoost...")
    param_grid = {
        'max_depth': [5],
        'learning_rate': [0.01, 0.1]
    }
    grid = GridSearchCV(
        xgb_base,
        param_grid,
        cv=3,
        scoring='accuracy',
        n_jobs=-1,
        verbose=1
    )
    grid.fit(X_train, y_train)
    best_xgb = grid.best_estimator_
    print("Best XGBoost params:", grid.best_params_)

    models = {
        'xgboost': best_xgb,
        'random_forest_boost': rf_base,
        'adaboost_dt': dt_base
    }

    # Train, evaluate, and save models
    for name, clf in models.items():
        print(f"Training {name}...")
        clf.fit(X_train, y_train)
        preds = clf.predict(X_val)
        acc = accuracy_score(y_val, preds)
        f1 = f1_score(y_val, preds, average='macro')
        print(f"Validation Accuracy ({name}): {acc:.4f}, F1 (macro): {f1:.4f}")
        print(classification_report(y_val, preds, target_names=['unfiltered', 'filtered']))

        # Save model
        model_path = os.path.join(models_dir, f"{name}_{sr}Hz.pkl")
        with open(model_path, 'wb') as f:
            pickle.dump(clf, f)
        print(f"Saved model to {model_path}")

        # Record results
        results.append({
            'feature_type': feature_type,
            'dataset_root': root_dir,
            'model': name,
            'sample_rate': sr,
            'f1_score': f1
        })


def main():
    for feature_type, roots in DATASETS.items():
        for root_dir in roots:
            for sr in SAMPLE_RATES:
                train_and_validate_for_rate(root_dir, sr, feature_type)

    # Save results to a new CSV with timestamp
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    csv_file = f"model_performance_{timestamp}.csv"
    with open(csv_file, 'w', newline='') as cf:
        writer = csv.DictWriter(cf, fieldnames=['feature_type', 'dataset_root', 'model', 'sample_rate', 'f1_score'])
        writer.writeheader()
        writer.writerows(results)
    print(f"Results saved to {csv_file}")
    print("All models trained and results recorded.")

if __name__ == '__main__':
    main()


=== STFT | Root: D:\Aliasing3\Processed_Files\DS_U8K | 1000Hz ===


Loading train unfiltered 1000Hz: 100%|█████████████████████████████████████████████| 5056/5056 [00:56<00:00, 89.39it/s]
Loading train filtered 1000Hz: 100%|███████████████████████████████████████████████| 5040/5040 [01:00<00:00, 83.75it/s]
Loading validation unfiltered 1000Hz: 100%|██████████████████████████████████████████| 596/596 [00:06<00:00, 89.42it/s]
Loading validation filtered 1000Hz: 100%|███████████████████████████████████████████| 618/618 [00:05<00:00, 103.04it/s]


Running grid search for XGBoost...
Fitting 3 folds for each of 2 candidates, totalling 6 fits
Best XGBoost params: {'learning_rate': 0.01, 'max_depth': 5}
Training xgboost...
Validation Accuracy (xgboost): 0.6737, F1 (macro): 0.6737
              precision    recall  f1-score   support

  unfiltered       0.67      0.67      0.67       596
    filtered       0.67      0.67      0.67       596

    accuracy                           0.67      1192
   macro avg       0.67      0.67      0.67      1192
weighted avg       0.67      0.67      0.67      1192

Saved model to D:\Aliasing3\Processed_Files\DS_U8K\models\xgboost_1000Hz.pkl
Training random_forest_boost...




Validation Accuracy (random_forest_boost): 0.5285, F1 (macro): 0.5285
              precision    recall  f1-score   support

  unfiltered       0.53      0.53      0.53       596
    filtered       0.53      0.53      0.53       596

    accuracy                           0.53      1192
   macro avg       0.53      0.53      0.53      1192
weighted avg       0.53      0.53      0.53      1192

Saved model to D:\Aliasing3\Processed_Files\DS_U8K\models\random_forest_boost_1000Hz.pkl
Training adaboost_dt...




Validation Accuracy (adaboost_dt): 0.7005, F1 (macro): 0.7005
              precision    recall  f1-score   support

  unfiltered       0.70      0.70      0.70       596
    filtered       0.70      0.70      0.70       596

    accuracy                           0.70      1192
   macro avg       0.70      0.70      0.70      1192
weighted avg       0.70      0.70      0.70      1192

Saved model to D:\Aliasing3\Processed_Files\DS_U8K\models\adaboost_dt_1000Hz.pkl

=== STFT | Root: D:\Aliasing3\Processed_Files\DS_U8K | 2000Hz ===


Loading train unfiltered 2000Hz: 100%|█████████████████████████████████████████████| 5024/5024 [01:05<00:00, 77.23it/s]
Loading train filtered 2000Hz: 100%|███████████████████████████████████████████████| 5025/5025 [01:10<00:00, 71.52it/s]
Loading validation unfiltered 2000Hz: 100%|██████████████████████████████████████████| 637/637 [00:07<00:00, 83.04it/s]
Loading validation filtered 2000Hz: 100%|████████████████████████████████████████████| 632/632 [00:08<00:00, 71.34it/s]


Running grid search for XGBoost...
Fitting 3 folds for each of 2 candidates, totalling 6 fits
Best XGBoost params: {'learning_rate': 0.01, 'max_depth': 5}
Training xgboost...
Validation Accuracy (xgboost): 0.7002, F1 (macro): 0.7001
              precision    recall  f1-score   support

  unfiltered       0.69      0.72      0.70       632
    filtered       0.71      0.69      0.70       632

    accuracy                           0.70      1264
   macro avg       0.70      0.70      0.70      1264
weighted avg       0.70      0.70      0.70      1264

Saved model to D:\Aliasing3\Processed_Files\DS_U8K\models\xgboost_2000Hz.pkl
Training random_forest_boost...




Validation Accuracy (random_forest_boost): 0.5672, F1 (macro): 0.5672
              precision    recall  f1-score   support

  unfiltered       0.57      0.58      0.57       632
    filtered       0.57      0.56      0.56       632

    accuracy                           0.57      1264
   macro avg       0.57      0.57      0.57      1264
weighted avg       0.57      0.57      0.57      1264

Saved model to D:\Aliasing3\Processed_Files\DS_U8K\models\random_forest_boost_2000Hz.pkl
Training adaboost_dt...




Validation Accuracy (adaboost_dt): 0.7207, F1 (macro): 0.7207
              precision    recall  f1-score   support

  unfiltered       0.72      0.72      0.72       632
    filtered       0.72      0.72      0.72       632

    accuracy                           0.72      1264
   macro avg       0.72      0.72      0.72      1264
weighted avg       0.72      0.72      0.72      1264

Saved model to D:\Aliasing3\Processed_Files\DS_U8K\models\adaboost_dt_2000Hz.pkl

=== STFT | Root: D:\Aliasing3\Processed_Files\DS_U8K | 4000Hz ===


Loading train unfiltered 4000Hz: 100%|█████████████████████████████████████████████| 5029/5029 [01:10<00:00, 70.98it/s]
Loading train filtered 4000Hz: 100%|███████████████████████████████████████████████| 5052/5052 [01:14<00:00, 67.76it/s]
Loading validation unfiltered 4000Hz: 100%|██████████████████████████████████████████| 632/632 [00:09<00:00, 67.35it/s]
Loading validation filtered 4000Hz: 100%|████████████████████████████████████████████| 626/626 [00:08<00:00, 71.49it/s]


Running grid search for XGBoost...
Fitting 3 folds for each of 2 candidates, totalling 6 fits
Best XGBoost params: {'learning_rate': 0.01, 'max_depth': 5}
Training xgboost...
Validation Accuracy (xgboost): 0.7244, F1 (macro): 0.7244
              precision    recall  f1-score   support

  unfiltered       0.73      0.71      0.72       626
    filtered       0.72      0.74      0.73       626

    accuracy                           0.72      1252
   macro avg       0.72      0.72      0.72      1252
weighted avg       0.72      0.72      0.72      1252

Saved model to D:\Aliasing3\Processed_Files\DS_U8K\models\xgboost_4000Hz.pkl
Training random_forest_boost...




Validation Accuracy (random_forest_boost): 0.5535, F1 (macro): 0.5535
              precision    recall  f1-score   support

  unfiltered       0.55      0.54      0.55       626
    filtered       0.55      0.56      0.56       626

    accuracy                           0.55      1252
   macro avg       0.55      0.55      0.55      1252
weighted avg       0.55      0.55      0.55      1252

Saved model to D:\Aliasing3\Processed_Files\DS_U8K\models\random_forest_boost_4000Hz.pkl
Training adaboost_dt...




Validation Accuracy (adaboost_dt): 0.7085, F1 (macro): 0.7085
              precision    recall  f1-score   support

  unfiltered       0.71      0.72      0.71       626
    filtered       0.71      0.70      0.71       626

    accuracy                           0.71      1252
   macro avg       0.71      0.71      0.71      1252
weighted avg       0.71      0.71      0.71      1252

Saved model to D:\Aliasing3\Processed_Files\DS_U8K\models\adaboost_dt_4000Hz.pkl

=== STFT | Root: D:\Aliasing3\Processed_Files\DS_U8K | 8000Hz ===


Loading train unfiltered 8000Hz: 100%|█████████████████████████████████████████████| 4975/4975 [01:37<00:00, 50.92it/s]
Loading train filtered 8000Hz: 100%|███████████████████████████████████████████████| 5026/5026 [01:39<00:00, 50.51it/s]
Loading validation unfiltered 8000Hz: 100%|██████████████████████████████████████████| 670/670 [00:13<00:00, 51.27it/s]
Loading validation filtered 8000Hz: 100%|████████████████████████████████████████████| 617/617 [00:11<00:00, 52.95it/s]


Running grid search for XGBoost...
Fitting 3 folds for each of 2 candidates, totalling 6 fits
Best XGBoost params: {'learning_rate': 0.1, 'max_depth': 5}
Training xgboost...
Validation Accuracy (xgboost): 0.7472, F1 (macro): 0.7469
              precision    recall  f1-score   support

  unfiltered       0.73      0.78      0.75       617
    filtered       0.76      0.72      0.74       617

    accuracy                           0.75      1234
   macro avg       0.75      0.75      0.75      1234
weighted avg       0.75      0.75      0.75      1234

Saved model to D:\Aliasing3\Processed_Files\DS_U8K\models\xgboost_8000Hz.pkl
Training random_forest_boost...




Validation Accuracy (random_forest_boost): 0.5446, F1 (macro): 0.5446
              precision    recall  f1-score   support

  unfiltered       0.54      0.55      0.55       617
    filtered       0.55      0.54      0.54       617

    accuracy                           0.54      1234
   macro avg       0.54      0.54      0.54      1234
weighted avg       0.54      0.54      0.54      1234

Saved model to D:\Aliasing3\Processed_Files\DS_U8K\models\random_forest_boost_8000Hz.pkl
Training adaboost_dt...




Validation Accuracy (adaboost_dt): 0.7018, F1 (macro): 0.7017
              precision    recall  f1-score   support

  unfiltered       0.71      0.68      0.70       617
    filtered       0.69      0.72      0.71       617

    accuracy                           0.70      1234
   macro avg       0.70      0.70      0.70      1234
weighted avg       0.70      0.70      0.70      1234

Saved model to D:\Aliasing3\Processed_Files\DS_U8K\models\adaboost_dt_8000Hz.pkl

=== STFT | Root: D:\Aliasing3\Processed_Files\DS_U8K | 16000Hz ===


Loading train unfiltered 16000Hz: 100%|████████████████████████████████████████████| 5030/5030 [01:34<00:00, 53.11it/s]
Loading train filtered 16000Hz: 100%|██████████████████████████████████████████████| 5077/5077 [01:33<00:00, 54.19it/s]
Loading validation unfiltered 16000Hz: 100%|█████████████████████████████████████████| 657/657 [00:11<00:00, 56.95it/s]
Loading validation filtered 16000Hz: 100%|███████████████████████████████████████████| 609/609 [00:11<00:00, 53.63it/s]


Running grid search for XGBoost...
Fitting 3 folds for each of 2 candidates, totalling 6 fits
Best XGBoost params: {'learning_rate': 0.01, 'max_depth': 5}
Training xgboost...
Validation Accuracy (xgboost): 0.7176, F1 (macro): 0.7175
              precision    recall  f1-score   support

  unfiltered       0.73      0.70      0.71       609
    filtered       0.71      0.74      0.72       609

    accuracy                           0.72      1218
   macro avg       0.72      0.72      0.72      1218
weighted avg       0.72      0.72      0.72      1218

Saved model to D:\Aliasing3\Processed_Files\DS_U8K\models\xgboost_16000Hz.pkl
Training random_forest_boost...




Validation Accuracy (random_forest_boost): 0.5493, F1 (macro): 0.5493
              precision    recall  f1-score   support

  unfiltered       0.55      0.55      0.55       609
    filtered       0.55      0.55      0.55       609

    accuracy                           0.55      1218
   macro avg       0.55      0.55      0.55      1218
weighted avg       0.55      0.55      0.55      1218

Saved model to D:\Aliasing3\Processed_Files\DS_U8K\models\random_forest_boost_16000Hz.pkl
Training adaboost_dt...




Validation Accuracy (adaboost_dt): 0.7053, F1 (macro): 0.7053
              precision    recall  f1-score   support

  unfiltered       0.70      0.71      0.71       609
    filtered       0.71      0.70      0.70       609

    accuracy                           0.71      1218
   macro avg       0.71      0.71      0.71      1218
weighted avg       0.71      0.71      0.71      1218

Saved model to D:\Aliasing3\Processed_Files\DS_U8K\models\adaboost_dt_16000Hz.pkl

=== STFT | Root: D:\Aliasing3\Processed_Files\DS_U8K | 22000Hz ===


Loading train unfiltered 22000Hz: 100%|████████████████████████████████████████████| 5067/5067 [01:39<00:00, 50.99it/s]
Loading train filtered 22000Hz: 100%|██████████████████████████████████████████████| 4973/4973 [01:41<00:00, 49.15it/s]
Loading validation unfiltered 22000Hz: 100%|█████████████████████████████████████████| 577/577 [00:11<00:00, 49.38it/s]
Loading validation filtered 22000Hz: 100%|███████████████████████████████████████████| 675/675 [00:13<00:00, 49.87it/s]


Running grid search for XGBoost...
Fitting 3 folds for each of 2 candidates, totalling 6 fits
Best XGBoost params: {'learning_rate': 0.01, 'max_depth': 5}
Training xgboost...
Validation Accuracy (xgboost): 0.7184, F1 (macro): 0.7184
              precision    recall  f1-score   support

  unfiltered       0.72      0.72      0.72       577
    filtered       0.72      0.71      0.72       577

    accuracy                           0.72      1154
   macro avg       0.72      0.72      0.72      1154
weighted avg       0.72      0.72      0.72      1154

Saved model to D:\Aliasing3\Processed_Files\DS_U8K\models\xgboost_22000Hz.pkl
Training random_forest_boost...




Validation Accuracy (random_forest_boost): 0.5433, F1 (macro): 0.5431
              precision    recall  f1-score   support

  unfiltered       0.54      0.56      0.55       577
    filtered       0.55      0.52      0.53       577

    accuracy                           0.54      1154
   macro avg       0.54      0.54      0.54      1154
weighted avg       0.54      0.54      0.54      1154

Saved model to D:\Aliasing3\Processed_Files\DS_U8K\models\random_forest_boost_22000Hz.pkl
Training adaboost_dt...




Validation Accuracy (adaboost_dt): 0.7132, F1 (macro): 0.7132
              precision    recall  f1-score   support

  unfiltered       0.71      0.71      0.71       577
    filtered       0.71      0.71      0.71       577

    accuracy                           0.71      1154
   macro avg       0.71      0.71      0.71      1154
weighted avg       0.71      0.71      0.71      1154

Saved model to D:\Aliasing3\Processed_Files\DS_U8K\models\adaboost_dt_22000Hz.pkl

=== STFT | Root: D:\Aliasing3\Processed_Files\DS_ZEN | 1000Hz ===


Loading train unfiltered 1000Hz: 100%|█████████████████████████████████████████████| 4485/4485 [00:59<00:00, 75.50it/s]
Loading train filtered 1000Hz: 100%|███████████████████████████████████████████████| 4457/4457 [00:57<00:00, 77.80it/s]
Loading validation unfiltered 1000Hz: 100%|██████████████████████████████████████████| 537/537 [00:07<00:00, 74.29it/s]
Loading validation filtered 1000Hz: 100%|████████████████████████████████████████████| 555/555 [00:06<00:00, 82.25it/s]


Running grid search for XGBoost...
Fitting 3 folds for each of 2 candidates, totalling 6 fits
Best XGBoost params: {'learning_rate': 0.01, 'max_depth': 5}
Training xgboost...
Validation Accuracy (xgboost): 0.7011, F1 (macro): 0.7009
              precision    recall  f1-score   support

  unfiltered       0.71      0.67      0.69       537
    filtered       0.69      0.73      0.71       537

    accuracy                           0.70      1074
   macro avg       0.70      0.70      0.70      1074
weighted avg       0.70      0.70      0.70      1074

Saved model to D:\Aliasing3\Processed_Files\DS_ZEN\models\xgboost_1000Hz.pkl
Training random_forest_boost...




Validation Accuracy (random_forest_boost): 0.3752, F1 (macro): 0.3751
              precision    recall  f1-score   support

  unfiltered       0.37      0.36      0.37       537
    filtered       0.38      0.39      0.38       537

    accuracy                           0.38      1074
   macro avg       0.38      0.38      0.38      1074
weighted avg       0.38      0.38      0.38      1074

Saved model to D:\Aliasing3\Processed_Files\DS_ZEN\models\random_forest_boost_1000Hz.pkl
Training adaboost_dt...




Validation Accuracy (adaboost_dt): 0.5466, F1 (macro): 0.5465
              precision    recall  f1-score   support

  unfiltered       0.55      0.54      0.54       537
    filtered       0.55      0.56      0.55       537

    accuracy                           0.55      1074
   macro avg       0.55      0.55      0.55      1074
weighted avg       0.55      0.55      0.55      1074

Saved model to D:\Aliasing3\Processed_Files\DS_ZEN\models\adaboost_dt_1000Hz.pkl

=== STFT | Root: D:\Aliasing3\Processed_Files\DS_ZEN | 2000Hz ===


Loading train unfiltered 2000Hz: 100%|█████████████████████████████████████████████| 4416/4416 [00:58<00:00, 75.94it/s]
Loading train filtered 2000Hz: 100%|███████████████████████████████████████████████| 4437/4437 [00:59<00:00, 74.75it/s]
Loading validation unfiltered 2000Hz: 100%|██████████████████████████████████████████| 546/546 [00:07<00:00, 75.38it/s]
Loading validation filtered 2000Hz: 100%|████████████████████████████████████████████| 564/564 [00:07<00:00, 74.34it/s]


Running grid search for XGBoost...
Fitting 3 folds for each of 2 candidates, totalling 6 fits
Best XGBoost params: {'learning_rate': 0.01, 'max_depth': 5}
Training xgboost...
Validation Accuracy (xgboost): 0.7033, F1 (macro): 0.7033
              precision    recall  f1-score   support

  unfiltered       0.71      0.69      0.70       546
    filtered       0.70      0.71      0.71       546

    accuracy                           0.70      1092
   macro avg       0.70      0.70      0.70      1092
weighted avg       0.70      0.70      0.70      1092

Saved model to D:\Aliasing3\Processed_Files\DS_ZEN\models\xgboost_2000Hz.pkl
Training random_forest_boost...




Validation Accuracy (random_forest_boost): 0.3645, F1 (macro): 0.3638
              precision    recall  f1-score   support

  unfiltered       0.37      0.40      0.38       546
    filtered       0.35      0.33      0.34       546

    accuracy                           0.36      1092
   macro avg       0.36      0.36      0.36      1092
weighted avg       0.36      0.36      0.36      1092

Saved model to D:\Aliasing3\Processed_Files\DS_ZEN\models\random_forest_boost_2000Hz.pkl
Training adaboost_dt...




Validation Accuracy (adaboost_dt): 0.5385, F1 (macro): 0.5385
              precision    recall  f1-score   support

  unfiltered       0.54      0.54      0.54       546
    filtered       0.54      0.53      0.54       546

    accuracy                           0.54      1092
   macro avg       0.54      0.54      0.54      1092
weighted avg       0.54      0.54      0.54      1092

Saved model to D:\Aliasing3\Processed_Files\DS_ZEN\models\adaboost_dt_2000Hz.pkl

=== STFT | Root: D:\Aliasing3\Processed_Files\DS_ZEN | 4000Hz ===


Loading train unfiltered 4000Hz: 100%|█████████████████████████████████████████████| 4444/4444 [01:08<00:00, 64.61it/s]
Loading train filtered 4000Hz: 100%|███████████████████████████████████████████████| 4471/4471 [01:11<00:00, 62.70it/s]
Loading validation unfiltered 4000Hz: 100%|██████████████████████████████████████████| 542/542 [00:07<00:00, 68.72it/s]
Loading validation filtered 4000Hz: 100%|████████████████████████████████████████████| 538/538 [00:07<00:00, 71.48it/s]


Running grid search for XGBoost...
Fitting 3 folds for each of 2 candidates, totalling 6 fits
Best XGBoost params: {'learning_rate': 0.1, 'max_depth': 5}
Training xgboost...
Validation Accuracy (xgboost): 0.8020, F1 (macro): 0.8020
              precision    recall  f1-score   support

  unfiltered       0.79      0.81      0.80       538
    filtered       0.81      0.79      0.80       538

    accuracy                           0.80      1076
   macro avg       0.80      0.80      0.80      1076
weighted avg       0.80      0.80      0.80      1076

Saved model to D:\Aliasing3\Processed_Files\DS_ZEN\models\xgboost_4000Hz.pkl
Training random_forest_boost...




Validation Accuracy (random_forest_boost): 0.4796, F1 (macro): 0.4792
              precision    recall  f1-score   support

  unfiltered       0.48      0.51      0.49       538
    filtered       0.48      0.45      0.47       538

    accuracy                           0.48      1076
   macro avg       0.48      0.48      0.48      1076
weighted avg       0.48      0.48      0.48      1076

Saved model to D:\Aliasing3\Processed_Files\DS_ZEN\models\random_forest_boost_4000Hz.pkl
Training adaboost_dt...




Validation Accuracy (adaboost_dt): 0.5632, F1 (macro): 0.5632
              precision    recall  f1-score   support

  unfiltered       0.56      0.57      0.57       538
    filtered       0.56      0.55      0.56       538

    accuracy                           0.56      1076
   macro avg       0.56      0.56      0.56      1076
weighted avg       0.56      0.56      0.56      1076

Saved model to D:\Aliasing3\Processed_Files\DS_ZEN\models\adaboost_dt_4000Hz.pkl

=== STFT | Root: D:\Aliasing3\Processed_Files\DS_ZEN | 8000Hz ===


Loading train unfiltered 8000Hz: 100%|█████████████████████████████████████████████| 4437/4437 [01:11<00:00, 61.66it/s]
Loading train filtered 8000Hz: 100%|███████████████████████████████████████████████| 4415/4415 [01:12<00:00, 61.02it/s]
Loading validation unfiltered 8000Hz: 100%|██████████████████████████████████████████| 563/563 [00:08<00:00, 69.11it/s]
Loading validation filtered 8000Hz: 100%|████████████████████████████████████████████| 580/580 [00:08<00:00, 65.28it/s]


Running grid search for XGBoost...
Fitting 3 folds for each of 2 candidates, totalling 6 fits
Best XGBoost params: {'learning_rate': 0.1, 'max_depth': 5}
Training xgboost...
Validation Accuracy (xgboost): 0.7718, F1 (macro): 0.7718
              precision    recall  f1-score   support

  unfiltered       0.77      0.78      0.77       563
    filtered       0.77      0.77      0.77       563

    accuracy                           0.77      1126
   macro avg       0.77      0.77      0.77      1126
weighted avg       0.77      0.77      0.77      1126

Saved model to D:\Aliasing3\Processed_Files\DS_ZEN\models\xgboost_8000Hz.pkl
Training random_forest_boost...




Validation Accuracy (random_forest_boost): 0.3774, F1 (macro): 0.3774
              precision    recall  f1-score   support

  unfiltered       0.38      0.38      0.38       563
    filtered       0.38      0.37      0.37       563

    accuracy                           0.38      1126
   macro avg       0.38      0.38      0.38      1126
weighted avg       0.38      0.38      0.38      1126

Saved model to D:\Aliasing3\Processed_Files\DS_ZEN\models\random_forest_boost_8000Hz.pkl
Training adaboost_dt...




Validation Accuracy (adaboost_dt): 0.5329, F1 (macro): 0.5327
              precision    recall  f1-score   support

  unfiltered       0.53      0.55      0.54       563
    filtered       0.53      0.51      0.52       563

    accuracy                           0.53      1126
   macro avg       0.53      0.53      0.53      1126
weighted avg       0.53      0.53      0.53      1126

Saved model to D:\Aliasing3\Processed_Files\DS_ZEN\models\adaboost_dt_8000Hz.pkl

=== STFT | Root: D:\Aliasing3\Processed_Files\DS_ZEN | 16000Hz ===


Loading train unfiltered 16000Hz: 100%|████████████████████████████████████████████| 4426/4426 [01:03<00:00, 69.58it/s]
Loading train filtered 16000Hz: 100%|██████████████████████████████████████████████| 4439/4439 [01:04<00:00, 68.70it/s]
Loading validation unfiltered 16000Hz: 100%|█████████████████████████████████████████| 591/591 [00:07<00:00, 75.66it/s]
Loading validation filtered 16000Hz: 100%|███████████████████████████████████████████| 580/580 [00:09<00:00, 61.35it/s]


Running grid search for XGBoost...
Fitting 3 folds for each of 2 candidates, totalling 6 fits
Best XGBoost params: {'learning_rate': 0.1, 'max_depth': 5}
Training xgboost...
Validation Accuracy (xgboost): 0.8155, F1 (macro): 0.8155
              precision    recall  f1-score   support

  unfiltered       0.81      0.83      0.82       580
    filtered       0.83      0.80      0.81       580

    accuracy                           0.82      1160
   macro avg       0.82      0.82      0.82      1160
weighted avg       0.82      0.82      0.82      1160

Saved model to D:\Aliasing3\Processed_Files\DS_ZEN\models\xgboost_16000Hz.pkl
Training random_forest_boost...




Validation Accuracy (random_forest_boost): 0.4034, F1 (macro): 0.4034
              precision    recall  f1-score   support

  unfiltered       0.40      0.41      0.41       580
    filtered       0.40      0.40      0.40       580

    accuracy                           0.40      1160
   macro avg       0.40      0.40      0.40      1160
weighted avg       0.40      0.40      0.40      1160

Saved model to D:\Aliasing3\Processed_Files\DS_ZEN\models\random_forest_boost_16000Hz.pkl
Training adaboost_dt...




Validation Accuracy (adaboost_dt): 0.5802, F1 (macro): 0.5801
              precision    recall  f1-score   support

  unfiltered       0.58      0.59      0.58       580
    filtered       0.58      0.57      0.58       580

    accuracy                           0.58      1160
   macro avg       0.58      0.58      0.58      1160
weighted avg       0.58      0.58      0.58      1160

Saved model to D:\Aliasing3\Processed_Files\DS_ZEN\models\adaboost_dt_16000Hz.pkl

=== STFT | Root: D:\Aliasing3\Processed_Files\DS_ZEN | 22000Hz ===


Loading train unfiltered 22000Hz: 100%|████████████████████████████████████████████| 4436/4436 [01:07<00:00, 66.05it/s]
Loading train filtered 22000Hz: 100%|██████████████████████████████████████████████| 4522/4522 [01:12<00:00, 62.75it/s]
Loading validation unfiltered 22000Hz: 100%|█████████████████████████████████████████| 555/555 [00:06<00:00, 80.00it/s]
Loading validation filtered 22000Hz: 100%|███████████████████████████████████████████| 522/522 [00:07<00:00, 73.79it/s]


Running grid search for XGBoost...
Fitting 3 folds for each of 2 candidates, totalling 6 fits
Best XGBoost params: {'learning_rate': 0.1, 'max_depth': 5}
Training xgboost...
Validation Accuracy (xgboost): 0.7749, F1 (macro): 0.7748
              precision    recall  f1-score   support

  unfiltered       0.76      0.80      0.78       522
    filtered       0.79      0.75      0.77       522

    accuracy                           0.77      1044
   macro avg       0.78      0.77      0.77      1044
weighted avg       0.78      0.77      0.77      1044

Saved model to D:\Aliasing3\Processed_Files\DS_ZEN\models\xgboost_22000Hz.pkl
Training random_forest_boost...




Validation Accuracy (random_forest_boost): 0.3190, F1 (macro): 0.3184
              precision    recall  f1-score   support

  unfiltered       0.33      0.35      0.34       522
    filtered       0.31      0.29      0.30       522

    accuracy                           0.32      1044
   macro avg       0.32      0.32      0.32      1044
weighted avg       0.32      0.32      0.32      1044

Saved model to D:\Aliasing3\Processed_Files\DS_ZEN\models\random_forest_boost_22000Hz.pkl
Training adaboost_dt...




Validation Accuracy (adaboost_dt): 0.5412, F1 (macro): 0.5412
              precision    recall  f1-score   support

  unfiltered       0.54      0.54      0.54       522
    filtered       0.54      0.54      0.54       522

    accuracy                           0.54      1044
   macro avg       0.54      0.54      0.54      1044
weighted avg       0.54      0.54      0.54      1044

Saved model to D:\Aliasing3\Processed_Files\DS_ZEN\models\adaboost_dt_22000Hz.pkl
Results saved to model_performance_20250726_125159.csv
All models trained and results recorded.
