# MFCC Feature Extractor 

This is stage 1 of the Method 1 pipeline, so this should be the first notebook you run after unzipping the data archives. 

**Updated to support 6 instrument classes**: Crash, Hihat, Kick, Ride, Snare, Tom

In [30]:
# Load libraries
import numpy as np
from math import log, pi, exp, ceil
import statistics as st
import pandas as pd
import scipy.io as io
import scipy.stats as stats
import matplotlib.pyplot as plt
import sklearn
import librosa
import librosa.display
import glob
import os
import json


In [31]:
# please set this variable to where you cloned the git repo "MLAudioClassifier"

filepath = "/Users/Gilby/Projects/MLAudioClassifier"
os.chdir(filepath)

# make sure you have extracted the Training and Testing Data before you proceed with the rest of the notebook
# with something like "unzip TrainingData.zip"
# 

In [32]:
# Reset to project root directory (run this if you get lost in subdirectories)
filepath = "/Users/Gilby/Projects/MLAudioClassifier"
os.chdir(filepath)
print(f"✅ Reset to project directory: {os.getcwd()}")

# Verify we can see the expected directories
expected_dirs = ["TrainingData", "TestData"]
for directory in expected_dirs:
    if os.path.exists(directory):
        print(f"✅ Found {directory}/")
    else:
        print(f"❌ Missing {directory}/")
        
print("Ready to proceed!")

✅ Reset to project directory: /Users/Gilby/Projects/MLAudioClassifier
✅ Found TrainingData/
✅ Found TestData/
Ready to proceed!


# Loading training data 

This block of code will load in each audio file of the training data set and calculate it's MFCCs for all 6 instrument classes (Crash, Hihat, Kick, Ride, Snare, Tom). Please note that there will be a lot of warnings in red after you run this cell, but they are just warnings and the code is still running fine without errors.

In [33]:
# Verify directory structure before processing
# First, ensure we're in the correct project directory
os.chdir(filepath)
print("Checking directory structure...")
print(f"Current directory: {os.getcwd()}")

# Check training data
train_path = "TrainingData/AudioSamples"
if os.path.exists(train_path):
    print(f"✅ Found {train_path}")
    train_folders = [f for f in os.listdir(train_path) if os.path.isdir(os.path.join(train_path, f)) and not f.startswith('.')]
    print(f"   Training classes: {train_folders}")
    for folder in train_folders:
        wav_count = len(glob.glob(os.path.join(train_path, folder, "*.wav")))
        print(f"   {folder}: {wav_count} samples")
else:
    print(f"❌ {train_path} not found!")

# Check test data  
test_path = "TestData"
if os.path.exists(test_path):
    print(f"✅ Found {test_path}")
    test_folders = [f for f in os.listdir(test_path) if os.path.isdir(os.path.join(test_path, f)) and not f.startswith('.')]
    print(f"   Test classes: {test_folders}")
    for folder in test_folders:
        wav_count = len(glob.glob(os.path.join(test_path, folder, "*.wav")))
        print(f"   {folder}: {wav_count} samples")
else:
    print(f"❌ {test_path} not found!")

print("\nReady to process audio files!")
print("="*50)

Checking directory structure...
Current directory: /Users/Gilby/Projects/MLAudioClassifier
✅ Found TrainingData/AudioSamples
   Training classes: ['Kick', 'Rim', 'Splash', 'Tom', 'Shaker', 'Bongo', 'Conga', 'Maracas', 'Metal', 'Hihat', 'Snare', 'Perc', 'FX', 'Ride', 'Agogo', 'Triangle', 'Whistle', 'Crash', 'Reverse', 'China', 'Tambourine', 'Vox', 'Noise', 'Clap', 'Woodblock', 'Clave', 'Bell', 'Cowbell', 'Cabasa', 'Vibraslap', 'Guiro', 'Timpani', 'Cuica', 'Timbale']
   Kick: 1540 samples
   Rim: 224 samples
   Splash: 33 samples
   Tom: 1985 samples
   Shaker: 66 samples
   Bongo: 168 samples
   Conga: 351 samples
   Maracas: 55 samples
   Metal: 26 samples
   Hihat: 524 samples
   Snare: 2409 samples
   Perc: 483 samples
   FX: 1595 samples
   Ride: 370 samples
   Agogo: 53 samples
   Triangle: 59 samples
   Whistle: 43 samples
   Crash: 203 samples
   Reverse: 76 samples
   China: 42 samples
   Tambourine: 61 samples
   Vox: 101 samples
   Noise: 90 samples
   Clap: 392 samples
   Woo

In [34]:
# Ensure we're in the project root directory first
os.chdir(filepath)
print(f"Starting from: {os.getcwd()}")

os.chdir("TrainingData/AudioSamples")
folder_names = glob.glob("*")
# Filter out hidden files and directories
folder_names = [f for f in folder_names if not f.startswith('.') and os.path.isdir(f)]
all_samples = { 'label':[], 'mfcc':[] }
i=0
frames_max = 0
for instrument in folder_names:
    print(f"Processing {instrument} samples...")
    os.chdir(instrument)
    file_names = glob.glob("*.wav")
    print(f"  Found {len(file_names)} files")
    for wav in file_names:
        waveform, samplerate = librosa.load(wav, sr=44100, mono=True)
        waveform = librosa.util.fix_length(waveform, size=50000)
        # Fixed: Added y= parameter for newer librosa versions
        mfcc = librosa.feature.mfcc(y=waveform, sr=samplerate, n_mfcc=40, n_fft=2048, hop_length=512)
        normalized_mfcc = librosa.util.normalize(mfcc)
        all_samples['mfcc'].append(normalized_mfcc.tolist())
        all_samples['label'].append(i)
        # Update frames maximum
        #if (num_frames > frames_max):
            #frames_max = num_frames
    os.chdir("..")
    i=i+1
os.chdir(filepath)

print(f"Training data extraction complete!")
print(f"Total samples: {len(all_samples['label'])}")
print(f"Class distribution: {dict(zip(*np.unique(all_samples['label'], return_counts=True)))}")

Starting from: /Users/Gilby/Projects/MLAudioClassifier
Processing Kick samples...
  Found 1540 files
Processing Rim samples...
  Found 224 files
Processing Splash samples...
  Found 33 files
Processing Tom samples...
  Found 1985 files
Processing Shaker samples...
  Found 66 files
Processing Bongo samples...
  Found 168 files
Processing Conga samples...
  Found 351 files
Processing Maracas samples...
  Found 55 files
Processing Metal samples...
  Found 26 files
Processing Hihat samples...
  Found 524 files
Processing Snare samples...
  Found 2409 files
Processing Perc samples...
  Found 483 files
Processing FX samples...
  Found 1595 files
Processing Ride samples...
  Found 370 files
Processing Agogo samples...
  Found 53 files
Processing Triangle samples...
  Found 59 files
Processing Whistle samples...
  Found 43 files
Processing Crash samples...
  Found 203 files
Processing Reverse samples...
  Found 76 files
Processing China samples...
  Found 42 files
Processing Tambourine samples

<h1> Loading testing data </h1>
<p> This block of code will load in each audio file of the testing data set and calculate it's MFCCs. Please note that there will be a lot of warnings in red after you run this cell, but they are just warnings and the code is still running fine without errors. </p>

In [35]:
os.chdir(filepath)
os.chdir("TestData")  # Fixed: removed the extra /TestData
folder_names = glob.glob("*")
# Filter out hidden files and directories
folder_names = [f for f in folder_names if not f.startswith('.') and os.path.isdir(f)]
test_samples = { 'label':[], 'mfcc':[] }
i=0
frames_max = 0
for instrument in folder_names:
    print(f"Processing {instrument} test samples...")
    os.chdir(instrument)
    file_names = glob.glob("*.wav")
    print(f"  Found {len(file_names)} files")
    for wav in file_names:
        waveform, samplerate = librosa.load(wav, sr=44100, mono=True)
        waveform = librosa.util.fix_length(waveform, size=50000)
        # Fixed: Added y= parameter for newer librosa versions
        mfcc = librosa.feature.mfcc(y=waveform, sr=samplerate, n_mfcc=40, n_fft=2048, hop_length=512)
        normalized_mfcc = librosa.util.normalize(mfcc)
        test_samples['mfcc'].append(normalized_mfcc.tolist())
        test_samples['label'].append(i)
        # Update frames maximum
        #if (num_frames > frames_max):
            #frames_max = num_frames
    os.chdir("..")
    i=i+1
os.chdir("..")

print(f"Test data extraction complete!")
print(f"Total test samples: {len(test_samples['label'])}")
print(f"Test class distribution: {dict(zip(*np.unique(test_samples['label'], return_counts=True)))}")

Processing Kick test samples...
  Found 124 files
Processing Rim test samples...
  Found 34 files
Processing Tom test samples...
  Found 362 files
Processing Hihat test samples...
  Found 242 files
Processing Snare test samples...
  Found 124 files
Processing Ride test samples...
  Found 224 files
Processing Crash test samples...
  Found 313 files
Processing Clap test samples...
  Found 71 files
Processing Bell test samples...
  Found 14 files
Processing Cowbell test samples...
  Found 24 files
Test data extraction complete!
Total test samples: 1532
Test class distribution: {np.int64(0): np.int64(124), np.int64(1): np.int64(34), np.int64(2): np.int64(362), np.int64(3): np.int64(242), np.int64(4): np.int64(124), np.int64(5): np.int64(224), np.int64(6): np.int64(313), np.int64(7): np.int64(71), np.int64(8): np.int64(14), np.int64(9): np.int64(24)}


In [None]:
os.chdir(filepath)
json_path='data/mfcc_train_data.json'
with open(json_path, "w") as fp:
        json.dump(all_samples, fp, indent=4)
        
json_path2='dat/mfcc_test_data.json'
with open(json_path2, "w") as fp2:
        json.dump(test_samples, fp2, indent=4)