# MFCC Feature Extractor 

This is stage 1 of the Method 1 pipeline, so this should be the first notebook you run after unzipping the data archives. 

**Updated to support 6 instrument classes**: Crash, Hihat, Kick, Ride, Snare, Tom

In [15]:
# Load libraries
import numpy as np
from math import log, pi, exp, ceil
import statistics as st
import pandas as pd
import scipy.io as io
import scipy.stats as stats
import matplotlib.pyplot as plt
import sklearn
import librosa
import librosa.display
import glob
import os
import json


In [16]:
# please set this variable to where you cloned the git repo "MLAudioClassifier"

filepath = "/Users/Gilby/Projects/MLAudioClassifier"
os.chdir(filepath)

# make sure you have extracted the Training and Testing Data before you proceed with the rest of the notebook
# with something like "unzip TrainingData.zip"
# 

In [17]:
# Reset to project root directory (run this if you get lost in subdirectories)
filepath = "/Users/Gilby/Projects/MLAudioClassifier"
os.chdir(filepath)
print(f"✅ Reset to project directory: {os.getcwd()}")

# Verify we can see the expected directories
expected_dirs = ["TrainingData", "TestData"]
for directory in expected_dirs:
    if os.path.exists(directory):
        print(f"✅ Found {directory}/")
    else:
        print(f"❌ Missing {directory}/")
        
print("Ready to proceed!")

✅ Reset to project directory: /Users/Gilby/Projects/MLAudioClassifier
✅ Found TrainingData/
✅ Found TestData/
Ready to proceed!


# Loading training data 

This block of code will load in each audio file of the training data set and calculate it's MFCCs for all 6 instrument classes (Crash, Hihat, Kick, Ride, Snare, Tom). Please note that there will be a lot of warnings in red after you run this cell, but they are just warnings and the code is still running fine without errors.

In [18]:
# Verify directory structure before processing
# First, ensure we're in the correct project directory
os.chdir(filepath)
print("Checking directory structure...")
print(f"Current directory: {os.getcwd()}")

# Check training data
train_path = "TrainingData/AudioSamples"
if os.path.exists(train_path):
    print(f"✅ Found {train_path}")
    train_folders = [f for f in os.listdir(train_path) if os.path.isdir(os.path.join(train_path, f)) and not f.startswith('.')]
    print(f"   Training classes: {train_folders}")
    for folder in train_folders:
        wav_count = len(glob.glob(os.path.join(train_path, folder, "*.wav")))
        print(f"   {folder}: {wav_count} samples")
else:
    print(f"❌ {train_path} not found!")

# Check test data  
test_path = "TestData"
if os.path.exists(test_path):
    print(f"✅ Found {test_path}")
    test_folders = [f for f in os.listdir(test_path) if os.path.isdir(os.path.join(test_path, f)) and not f.startswith('.')]
    print(f"   Test classes: {test_folders}")
    for folder in test_folders:
        wav_count = len(glob.glob(os.path.join(test_path, folder, "*.wav")))
        print(f"   {folder}: {wav_count} samples")
else:
    print(f"❌ {test_path} not found!")

print("\nReady to process audio files!")
print("="*50)

Checking directory structure...
Current directory: /Users/Gilby/Projects/MLAudioClassifier
✅ Found TrainingData/AudioSamples
   Training classes: ['Kick', 'Sizzle', 'Rim', 'Splash', 'Tom', 'Shaker', 'Bongo', 'Conga', 'Maracas', 'Metal', 'Hihat', 'Snare', 'Perc', 'FX', 'Ride', 'Agogo', 'Triangle', 'Whistle', 'Crash', 'Bass', 'Reverse', 'China', 'Tambourine', 'Vox', 'Noise', 'Clap', 'Woodblock', 'Clave', 'Cymbal', 'Bell', 'Cowbell', 'Cabasa', 'Vibraslap', 'Guiro', 'Timpani', 'Cuica', 'Timbale']
   Kick: 1360 samples
   Sizzle: 3 samples
   Rim: 192 samples
   Splash: 23 samples
   Tom: 1828 samples
   Shaker: 54 samples
   Bongo: 151 samples
   Conga: 335 samples
   Maracas: 46 samples
   Metal: 23 samples
   Hihat: 969 samples
   Snare: 2264 samples
   Perc: 483 samples
   FX: 1595 samples
   Ride: 312 samples
   Agogo: 52 samples
   Triangle: 53 samples
   Whistle: 40 samples
   Crash: 187 samples
   Bass: 107 samples
   Reverse: 76 samples
   China: 33 samples
   Tambourine: 56 sample

In [19]:
# Ensure we're in the project root directory first
os.chdir(filepath)
print(f"Starting from: {os.getcwd()}")

os.chdir("TrainingData/AudioSamples")
folder_names = glob.glob("*")
# Filter out hidden files and directories
folder_names = [f for f in folder_names if not f.startswith('.') and os.path.isdir(f)]
all_samples = { 'label':[], 'mfcc':[] }
i=0
frames_max = 0
for instrument in folder_names:
    print(f"Processing {instrument} samples...")
    os.chdir(instrument)
    file_names = glob.glob("*.wav")
    print(f"  Found {len(file_names)} files")
    for wav in file_names:
        waveform, samplerate = librosa.load(wav, sr=44100, mono=True)
        waveform = librosa.util.fix_length(waveform, size=50000)
        # Fixed: Added y= parameter for newer librosa versions
        mfcc = librosa.feature.mfcc(y=waveform, sr=samplerate, n_mfcc=40, n_fft=2048, hop_length=512)
        normalized_mfcc = librosa.util.normalize(mfcc)
        all_samples['mfcc'].append(normalized_mfcc.tolist())
        all_samples['label'].append(i)
        # Update frames maximum
        #if (num_frames > frames_max):
            #frames_max = num_frames
    os.chdir("..")
    i=i+1
os.chdir(filepath)

print(f"Training data extraction complete!")
print(f"Total samples: {len(all_samples['label'])}")
print(f"Class distribution: {dict(zip(*np.unique(all_samples['label'], return_counts=True)))}")

Starting from: /Users/Gilby/Projects/MLAudioClassifier
Processing Kick samples...
  Found 1360 files
Processing Sizzle samples...
  Found 3 files
Processing Rim samples...
  Found 192 files
Processing Splash samples...
  Found 23 files
Processing Tom samples...
  Found 1828 files
Processing Shaker samples...
  Found 54 files
Processing Bongo samples...
  Found 151 files
Processing Conga samples...
  Found 335 files
Processing Maracas samples...
  Found 46 files
Processing Metal samples...
  Found 23 files
Processing Hihat samples...
  Found 969 files
Processing Snare samples...
  Found 2264 files
Processing Perc samples...
  Found 483 files
Processing FX samples...
  Found 1595 files
Processing Ride samples...
  Found 312 files
Processing Agogo samples...
  Found 52 files
Processing Triangle samples...
  Found 53 files
Processing Whistle samples...
  Found 40 files
Processing Crash samples...
  Found 187 files
Processing Bass samples...
  Found 107 files
Processing Reverse samples...
 

<h1> Loading testing data </h1>
<p> This block of code will load in each audio file of the testing data set and calculate it's MFCCs. Please note that there will be a lot of warnings in red after you run this cell, but they are just warnings and the code is still running fine without errors. </p>

In [20]:
os.chdir(filepath)
os.chdir("TestData")  # Fixed: removed the extra /TestData
folder_names = glob.glob("*")
# Filter out hidden files and directories
folder_names = [f for f in folder_names if not f.startswith('.') and os.path.isdir(f)]
test_samples = { 'label':[], 'mfcc':[] }
i=0
frames_max = 0
for instrument in folder_names:
    print(f"Processing {instrument} test samples...")
    os.chdir(instrument)
    file_names = glob.glob("*.wav")
    print(f"  Found {len(file_names)} files")
    for wav in file_names:
        waveform, samplerate = librosa.load(wav, sr=44100, mono=True)
        waveform = librosa.util.fix_length(waveform, size=50000)
        # Fixed: Added y= parameter for newer librosa versions
        mfcc = librosa.feature.mfcc(y=waveform, sr=samplerate, n_mfcc=40, n_fft=2048, hop_length=512)
        normalized_mfcc = librosa.util.normalize(mfcc)
        test_samples['mfcc'].append(normalized_mfcc.tolist())
        test_samples['label'].append(i)
        # Update frames maximum
        #if (num_frames > frames_max):
            #frames_max = num_frames
    os.chdir("..")
    i=i+1
os.chdir("..")

print(f"Test data extraction complete!")
print(f"Total test samples: {len(test_samples['label'])}")
print(f"Test class distribution: {dict(zip(*np.unique(test_samples['label'], return_counts=True)))}")

Processing Kick test samples...
  Found 100 files
Processing Rim test samples...
  Found 31 files
Processing Splash test samples...
  Found 4 files
Processing Tom test samples...
  Found 100 files
Processing Shaker test samples...
  Found 10 files
Processing Bongo test samples...
  Found 24 files
Processing Conga test samples...
  Found 52 files
Processing Maracas test samples...
  Found 9 files
Processing Metal test samples...
  Found 4 files
Processing Hihat test samples...
  Found 37 files
Processing Snare test samples...
  Found 100 files
Processing Perc test samples...
  Found 96 files
Processing Ride test samples...
  Found 30 files
Processing Agogo test samples...
  Found 9 files
Processing Triangle test samples...
  Found 9 files
Processing Whistle test samples...
  Found 7 files
Processing Crash test samples...
  Found 35 files
Processing Bass test samples...
  Found 13 files
Processing China test samples...
  Found 6 files
Processing Tambourine test samples...
  Found 11 file

In [22]:
os.chdir(filepath)
json_path='data/mfcc_train_data.json'
with open(json_path, "w") as fp:
        json.dump(all_samples, fp, indent=4)
        
json_path2='data/mfcc_test_data.json'
with open(json_path2, "w") as fp2:
        json.dump(test_samples, fp2, indent=4)