# Drum Sample Auto-Classifier

This notebook demonstrates how to use the trained model to automatically classify and organize your unsorted drum samples.

## Prerequisites
1. **Train a model first** by running these notebooks in order:
   - `MFCC_Feature_Extractor.ipynb` (extracts features from sorted training data)
   - `Model1_Train.ipynb` or `Model2_Train.ipynb` (trains the classifier)

2. **Prepare your unsorted data**:
   - Create a folder called `complete_drum_archive` in your project directory
   - Put all your unsorted `.wav` files in the `complete_drum_archive` folder

## How it works
- Loads your trained model (`models/model.keras`)
- Processes each audio file in `complete_drum_archive`
- Predicts which instrument it is (Crash, Hihat, Kick, Ride, Snare, Tom)
- Moves files into organized folders with confidence scores
- Files are renamed to include the predicted class and confidence level

In [1]:
import os
import numpy as np

import glob
import librosa
import librosa.display

import keras

import shutil

In [2]:
# Configuration Paths
# ===================

# Project directory (where the trained model is located)
PROJECT_PATH = "/Users/Gilby/Projects/MLAudioClassifier"

# Archive directory (your complete drum archive with nested structure)
ARCHIVE_PATH = "complete_drum_archive"  # Update this to your actual path
OUTPUT_PATH = "ClassifiedArchive"  # Where organized files will be placed

# Processing options
COPY_FILES = True  # True = copy files (keeps originals), False = move files
MAX_FILES_PER_RUN = None  # Limit files per run (None = process all)
PRESERVE_STRUCTURE = True  # Preserve original folder structure in filenames

# Set working directory
os.chdir(PROJECT_PATH)
print(f"Project directory: {PROJECT_PATH}")
print(f"Archive source: {ARCHIVE_PATH}")
print(f"Output destination: {OUTPUT_PATH}")
print(f"Mode: {'COPY' if COPY_FILES else 'MOVE'} files")
print(f"Structure preservation: {'ON' if PRESERVE_STRUCTURE else 'OFF'}")

# Verify archive exists
if not os.path.exists(ARCHIVE_PATH):
    print(f"\n❌ ERROR: Archive not found at {ARCHIVE_PATH}")
    print("Please update ARCHIVE_PATH to point to your complete_drum_archive")
else:
    print(f"\n✅ Archive found at {ARCHIVE_PATH}")

Project directory: /Users/Gilby/Projects/MLAudioClassifier
Archive source: complete_drum_archive
Output destination: ClassifiedArchive
Mode: COPY files
Structure preservation: ON

✅ Archive found at complete_drum_archive


In [3]:
from keras.models import load_model
model=load_model('models/model.keras')

ValueError: File not found: filepath=models/model.keras. Please ensure the file is an accessible `.keras` zip file.

In [None]:
# Verify model and scan archive
print("Model Summary:")
model.summary()
print(f"\nModel expects input shape: {model.input_shape}")
print(f"Model outputs {model.output_shape[1]} classes")

# Scan archive for all .wav files (including nested directories)
print(f"\n🔍 Scanning {ARCHIVE_PATH} for audio files...")

def find_wav_files(root_path):
    """Recursively find all .wav files and preserve their relative paths"""
    wav_files = []
    for root, dirs, files in os.walk(root_path):
        for file in files:
            if file.lower().endswith('.wav'):
                full_path = os.path.join(root, file)
                # Calculate relative path from archive root
                rel_path = os.path.relpath(full_path, root_path)
                wav_files.append((full_path, rel_path))
    return wav_files

if os.path.exists(ARCHIVE_PATH):
    all_wav_files = find_wav_files(ARCHIVE_PATH)
    print(f"✅ Found {len(all_wav_files)} .wav files in archive")
    
    # Show directory structure summary 
    dirs = set()
    for _, rel_path in all_wav_files:
        dir_path = os.path.dirname(rel_path)
        if dir_path:
            dirs.add(dir_path)
    
    print(f"📁 Archive contains {len(dirs)} directories with samples")
    if len(dirs) <= 10:  # Show directories if not too many
        for directory in sorted(dirs):
            count = sum(1 for _, rel_path in all_wav_files if rel_path.startswith(directory))
            print(f"   {directory}: {count} files")
    else:
        print("   (Many nested directories - structure will be preserved)")
        
    if MAX_FILES_PER_RUN and len(all_wav_files) > MAX_FILES_PER_RUN:
        print(f"\n⚠️  Will process first {MAX_FILES_PER_RUN} files this run")
else:
    print(f"❌ Archive not found at {ARCHIVE_PATH}")
    all_wav_files = []

Model Summary:



Model expects input shape: (None, 40, 98)
Model outputs 3 classes

🔍 Scanning complete_drum_archive for audio files...
❌ Archive not found at complete_drum_archive


In [None]:
# Process archive with nested structure preservation
if not all_wav_files:
    print("❌ No audio files found to process!")
else:
    # Create output directory structure
    if not os.path.exists(OUTPUT_PATH):
        os.makedirs(OUTPUT_PATH)
        print(f"📁 Created output directory: {OUTPUT_PATH}")
    
    # Create instrument class folders
    instrument_names = ["Crash", "Hihat", "Kick", "Ride", "Snare", "Tom"]
    for instrument in instrument_names:
        instrument_dir = os.path.join(OUTPUT_PATH, instrument)
        if not os.path.exists(instrument_dir):
            os.makedirs(instrument_dir)
            print(f"📁 Created {instrument} folder")
    
    # Process files (limit if specified)
    files_to_process = all_wav_files[:MAX_FILES_PER_RUN] if MAX_FILES_PER_RUN else all_wav_files
    print(f"\n🎵 Processing {len(files_to_process)} audio files...")
    
    processed_count = 0
    error_count = 0
    classification_stats = {name: 0 for name in instrument_names}
    
    for full_path, rel_path in files_to_process:
        try:
            print(f"Processing: {rel_path}")
            
            # Load and process the audio file
            waveform, samplerate = librosa.load(full_path, sr=44100, mono=True)
            waveform = librosa.util.fix_length(waveform, size=50000)
            mfcc = librosa.feature.mfcc(y=waveform, sr=samplerate, n_mfcc=40, n_fft=2048, hop_length=512)
            features = librosa.util.normalize(mfcc)
            features = features[np.newaxis, ...]
            
            # Predict the instrument class
            probs = model.predict(features, verbose=0)
            label = np.argmax(probs)
            confidence = np.max(probs)
            predicted_instrument = instrument_names[label]
            
            # Create output filename preserving structure information
            original_filename = os.path.basename(rel_path)
            name_without_ext = os.path.splitext(original_filename)[0]
            
            if PRESERVE_STRUCTURE:
                # Include directory structure in filename
                dir_structure = os.path.dirname(rel_path).replace('/', '_').replace('\\', '_')
                if dir_structure:
                    new_filename = f"{predicted_instrument.lower()}_{confidence:.3f}_{dir_structure}_{name_without_ext}.wav"
                else:
                    new_filename = f"{predicted_instrument.lower()}_{confidence:.3f}_{name_without_ext}.wav"
            else:
                new_filename = f"{predicted_instrument.lower()}_{confidence:.3f}_{name_without_ext}.wav"
            
            # Determine destination
            destination_dir = os.path.join(OUTPUT_PATH, predicted_instrument)
            destination_path = os.path.join(destination_dir, new_filename)
            
            # Copy or move file
            if COPY_FILES:
                shutil.copy2(full_path, destination_path)  # copy2 preserves metadata
                action = "copied"
            else:
                shutil.move(full_path, destination_path)
                action = "moved"
            
            print(f"  → {action} to {predicted_instrument} (confidence: {confidence:.3f})")
            
            processed_count += 1
            classification_stats[predicted_instrument] += 1
            
        except Exception as e:
            print(f"  → Error processing {rel_path}: {str(e)}")
            error_count += 1
    
    # Print summary
    print(f"\n📊 Processing Complete!")
    print(f"✅ Successfully processed: {processed_count} files")
    if error_count > 0:
        print(f"❌ Errors: {error_count} files")
    
    print(f"\n🎯 Classification Results:")
    for instrument, count in classification_stats.items():
        if count > 0:
            percentage = (count / processed_count) * 100
            print(f"   {instrument}: {count} files ({percentage:.1f}%)")
    
    print(f"\n📁 Results saved to: {os.path.abspath(OUTPUT_PATH)}")
    
    if MAX_FILES_PER_RUN and len(all_wav_files) > MAX_FILES_PER_RUN:
        remaining = len(all_wav_files) - MAX_FILES_PER_RUN
        print(f"\n⏳ {remaining} files remaining for future runs")

❌ No audio files found to process!


In [None]:
# Save detailed results and metadata
import json
from datetime import datetime

if processed_count > 0:
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    # Create metadata directory
    metadata_dir = os.path.join(OUTPUT_PATH, "metadata")
    if not os.path.exists(metadata_dir):
        os.makedirs(metadata_dir)
    
    # Save processing summary
    summary = {
        "timestamp": timestamp,
        "archive_path": ARCHIVE_PATH,
        "total_files_found": len(all_wav_files),
        "files_processed": processed_count,
        "files_with_errors": error_count,
        "classification_results": classification_stats,
        "processing_mode": "copy" if COPY_FILES else "move",
        "structure_preserved": PRESERVE_STRUCTURE
    }
    
    summary_file = os.path.join(metadata_dir, f"processing_summary_{timestamp}.json")
    with open(summary_file, 'w') as f:
        json.dump(summary, f, indent=2)
    
    print(f"📄 Metadata saved to: {summary_file}")
    
    # Also save to a general "latest" file for easy access
    latest_file = os.path.join(metadata_dir, "latest_run.json")
    with open(latest_file, 'w') as f:
        json.dump(summary, f, indent=2)
    
    print("✅ All processing complete!")

NameError: name 'processed_count' is not defined